diff --git a/.github/workflows/win-builds.yaml b/.github/workflows/win-builds.yaml
index ce25d4eb41..4d2b246a59 100644
--- a/.github/workflows/win-builds.yaml
+++ b/.github/workflows/win-builds.yaml
@@ -205,6 +205,7 @@ jobs:
             fftw:p
             ffmpeg:p
             SDL2:p
+            zstd:p
 
       - name: Build
         shell: msys2 {0}
diff --git a/.gitmodules b/.gitmodules
index 58b0c0c488..50ce2e7799 100755
--- a/.gitmodules
+++ b/.gitmodules
@@ -115,3 +115,18 @@
 [submodule "3rdparty/opengametools"]
 	path = 3rdparty/opengametools
 	url = https://github.com/jpaver/opengametools
+[submodule "3rdparty/ufbx"]
+	path = 3rdparty/ufbx
+	url = https://github.com/ufbx/ufbx
+[submodule "3rdparty/fastgltf"]
+	path = 3rdparty/fastgltf
+	url = https://github.com/spnda/fastgltf
+[submodule "3rdparty/OffsetAllocator"]
+	path = 3rdparty/OffsetAllocator
+	url = https://github.com/sebbbi/OffsetAllocator
+[submodule "3rdparty/spz"]
+	path = 3rdparty/spz
+	url = https://github.com/nianticlabs/spz
+[submodule "3rdparty/zstd"]
+	path = 3rdparty/zstd
+	url = https://github.com/facebook/zstd
diff --git a/3rdparty/3rdparty.cmake b/3rdparty/3rdparty.cmake
index 53694ce380..c94fe718cb 100644
--- a/3rdparty/3rdparty.cmake
+++ b/3rdparty/3rdparty.cmake
@@ -29,3 +29,4 @@ include(3rdparty/shmdata.cmake)
 include(3rdparty/snappy.cmake)
 include(3rdparty/sndfile.cmake)
 include(3rdparty/xtensor.cmake)
+include(3rdparty/zstd.cmake)
diff --git a/3rdparty/OffsetAllocator b/3rdparty/OffsetAllocator
new file mode 160000
index 0000000000..3610a73770
--- /dev/null
+++ b/3rdparty/OffsetAllocator
@@ -0,0 +1 @@
+Subproject commit 3610a7377088b1e8c8f1525f458c96038a4e6fc0
diff --git a/3rdparty/avendish b/3rdparty/avendish
index 7eafe1735a..77be36e03b 160000
--- a/3rdparty/avendish
+++ b/3rdparty/avendish
@@ -1 +1 @@
-Subproject commit 7eafe1735a2c6c20891ead7404333884a6e15971
+Subproject commit 77be36e03b7d327f6f2bee38c63b4abf63f41a2d
diff --git a/3rdparty/fastgltf b/3rdparty/fastgltf
new file mode 160000
index 0000000000..ce52187411
--- /dev/null
+++ b/3rdparty/fastgltf
@@ -0,0 +1 @@
+Subproject commit ce521874115d66679cbb33c6b2811469b04c1066
diff --git a/3rdparty/libossia b/3rdparty/libossia
index 476e6e50d2..b335062f52 160000
--- a/3rdparty/libossia
+++ b/3rdparty/libossia
@@ -1 +1 @@
-Subproject commit 476e6e50d2ac11298b9ea2f6e4d9372973a52db0
+Subproject commit b335062f524775ad9a5ef094eec5bdcb8fd20e8d
diff --git a/3rdparty/spz b/3rdparty/spz
new file mode 160000
index 0000000000..7ae1621e54
--- /dev/null
+++ b/3rdparty/spz
@@ -0,0 +1 @@
+Subproject commit 7ae1621e54e4b42c3c9c192b366d09116e558e19
diff --git a/3rdparty/ufbx b/3rdparty/ufbx
new file mode 160000
index 0000000000..83bc7cf44f
--- /dev/null
+++ b/3rdparty/ufbx
@@ -0,0 +1 @@
+Subproject commit 83bc7cf44f76bc8622de63b809a42b5d557cd733
diff --git a/3rdparty/zstd b/3rdparty/zstd
new file mode 160000
index 0000000000..885c79ba4a
--- /dev/null
+++ b/3rdparty/zstd
@@ -0,0 +1 @@
+Subproject commit 885c79ba4ae8345e006f61bc97b270d4cf7ff076
diff --git a/3rdparty/zstd.cmake b/3rdparty/zstd.cmake
new file mode 100644
index 0000000000..42e4256d67
--- /dev/null
+++ b/3rdparty/zstd.cmake
@@ -0,0 +1,49 @@
+if(SCORE_USE_SYSTEM_LIBRARIES)
+  find_package(zstd GLOBAL CONFIG)
+endif()
+
+if(NOT TARGET zstd::libzstd_static AND NOT TARGET zstd::libzstd_shared AND NOT TARGET zstd)
+  set(ZSTD_BUILD_PROGRAMS OFF CACHE INTERNAL "" FORCE)
+  set(ZSTD_BUILD_TESTS OFF CACHE INTERNAL "" FORCE)
+  set(ZSTD_BUILD_SHARED OFF CACHE INTERNAL "" FORCE)
+  set(ZSTD_BUILD_STATIC ON CACHE INTERNAL "" FORCE)
+  set(ZSTD_BUILD_DICTBUILDER OFF CACHE INTERNAL "" FORCE)
+
+  set(old_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
+  set(BUILD_SHARED_LIBS OFF)
+
+  if(NOT MSVC AND NOT CMAKE_CROSSCOMPILING)
+    if(CMAKE_BUILD_TYPE MATCHES ".*Deb.*")
+      set(old_CFLAGS "${CMAKE_C_FLAGS}")
+      set(old_CXXFLAGS "${CMAKE_CXX_FLAGS}")
+      set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -march=native")
+      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native")
+    endif()
+  endif()
+
+  add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/zstd/build/cmake" EXCLUDE_FROM_ALL)
+
+  if(NOT MSVC AND NOT CMAKE_CROSSCOMPILING)
+    if(CMAKE_BUILD_TYPE MATCHES ".*Deb.*")
+      set(CMAKE_C_FLAGS "${old_CFLAGS}")
+      set(CMAKE_CXX_FLAGS "${old_CXXFLAGS}")
+    endif()
+  endif()
+
+  set(BUILD_SHARED_LIBS ${old_BUILD_SHARED_LIBS})
+endif()
+
+# Make later find_package(zstd) calls (e.g. 3rdparty/spz) resolve to the
+# targets configured above: some prebuilt SDKs ship zstd configs pointing to
+# files that do not exist, and a not-found result would trigger FetchContent
+# fallbacks that clash with the vendored targets.
+file(WRITE "${CMAKE_FIND_PACKAGE_REDIRECTS_DIR}/zstd-config.cmake" [=[
+if(TARGET libzstd_static AND NOT TARGET zstd::libzstd_static)
+  add_library(zstd::libzstd_static INTERFACE IMPORTED GLOBAL)
+  target_link_libraries(zstd::libzstd_static INTERFACE libzstd_static)
+endif()
+if(TARGET libzstd_shared AND NOT TARGET zstd::libzstd_shared)
+  add_library(zstd::libzstd_shared INTERFACE IMPORTED GLOBAL)
+  target_link_libraries(zstd::libzstd_shared INTERFACE libzstd_shared)
+endif()
+]=])
\ No newline at end of file
diff --git a/ci/debian.bookworm.deps.sh b/ci/debian.bookworm.deps.sh
index 1e9af4c07f..861b03d01c 100755
--- a/ci/debian.bookworm.deps.sh
+++ b/ci/debian.bookworm.deps.sh
@@ -40,6 +40,7 @@ $SUDO apt-get install -qq --force-yes \
      libavahi-compat-libdnssd-dev libsamplerate0-dev \
      portaudio19-dev \
      libpipewire-0.3-dev \
-     libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libavformat-dev libswresample-dev
+     libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libavformat-dev libswresample-dev \
+     libzstd-dev
 
 source ci/common.deps.sh LINUX
diff --git a/ci/debian.bullseye.deps.sh b/ci/debian.bullseye.deps.sh
index 74a8b15db6..88fcc2b1d4 100755
--- a/ci/debian.bullseye.deps.sh
+++ b/ci/debian.bullseye.deps.sh
@@ -32,6 +32,7 @@ $SUDO apt-get install -qq --force-yes -t bullseye-backports \
      libavahi-compat-libdnssd-dev libsamplerate0-dev \
      portaudio19-dev \
      libpipewire-0.3-dev \
-     libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libavformat-dev libswresample-dev
+     libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libavformat-dev libswresample-dev \
+     libzstd-dev
 
 source ci/common.deps.sh LINUX
diff --git a/ci/debian.trixie-system.deps.sh b/ci/debian.trixie-system.deps.sh
index 9649c052ff..b9ffee0af3 100755
--- a/ci/debian.trixie-system.deps.sh
+++ b/ci/debian.trixie-system.deps.sh
@@ -55,7 +55,8 @@ $SUDO apt-get install -qq --force-yes \
      libzita-alsa-pcmi-dev \
      libvst3sdk-dev \
      puredata-dev \
-     libpd-dev
+     libpd-dev \
+     libzstd-dev
 
 
 source ci/common.deps.sh LINUX
diff --git a/ci/debian.trixie.deps.sh b/ci/debian.trixie.deps.sh
index 9aa4fb56a2..70ff36f408 100755
--- a/ci/debian.trixie.deps.sh
+++ b/ci/debian.trixie.deps.sh
@@ -34,6 +34,7 @@ $SUDO apt-get install -qq --force-yes \
      libavahi-compat-libdnssd-dev libsamplerate0-dev \
      portaudio19-dev \
      libpipewire-0.3-dev \
-     libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libavformat-dev libswresample-dev
+     libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libavformat-dev libswresample-dev \
+     libzstd-dev
 
 source ci/common.deps.sh LINUX
diff --git a/ci/fedora.deps.sh b/ci/fedora.deps.sh
index 77eac40f3b..f8d8117547 100755
--- a/ci/fedora.deps.sh
+++ b/ci/fedora.deps.sh
@@ -34,6 +34,7 @@ dnf -y install --allowerasing \
      qt6-qtsvg-devel \
      qt6-qtconnectivity-devel \
      pipewire-devel \
-     zlib-ng-compat-static zlib-ng-compat-devel
+     zlib-ng-compat-static zlib-ng-compat-devel \
+     libzstd-devel
 
 source ci/common.deps.sh LINUX
diff --git a/ci/freebsd.deps.sh b/ci/freebsd.deps.sh
index bcdb633dbb..ea21bbc65a 100755
--- a/ci/freebsd.deps.sh
+++ b/ci/freebsd.deps.sh
@@ -18,6 +18,7 @@ pkg install -y \
   libcoap \
   freetype2 harfbuzz fontconfig \
   alsa-lib \
-  jackit
+  jackit \
+  zstd
 
 source ci/common.deps.sh FREEBSD
diff --git a/ci/nix.build.nix b/ci/nix.build.nix
index fe17e697af..e174fbdf37 100644
--- a/ci/nix.build.nix
+++ b/ci/nix.build.nix
@@ -33,12 +33,14 @@
 , rapidfuzz-cpp
 , re2
 , rubberband
+, simdjson
 , snappy
 , SDL2
 , spdlog
 , suil
 , udev
 , xorg
+, zstd
 }:
 
 # TODO: figure out LLVM jit
@@ -88,6 +90,7 @@ clangStdenv.mkDerivation (finalAttrs: {
     rapidfuzz-cpp
     re2
     rubberband
+    simdjson
     snappy
     SDL2
     spdlog
diff --git a/ci/osx.brew.deps.sh b/ci/osx.brew.deps.sh
index a7e14917ad..c971f3b5b3 100755
--- a/ci/osx.brew.deps.sh
+++ b/ci/osx.brew.deps.sh
@@ -4,9 +4,9 @@ set +e
 
 export HOMEBREW_NO_AUTO_UPDATE=1
 brew update && (brew list cmake || brew install cmake)
-brew install ninja qt boost ffmpeg@7 fftw portaudio jack sdl lv2 lilv suil freetype
+brew install ninja qt boost ffmpeg@7 fftw portaudio jack sdl lv2 lilv suil freetype zstd
 brew uninstall --ignore-dependencies qt@5 || true
 
 source ci/common.deps.sh MACOS
 
-echo PKG_CONFIG_PATH="/opt/homebrew/opt/ffmpeg@7/lib/pkgconfig" >> "$GITHUB_ENV"
\ No newline at end of file
+echo PKG_CONFIG_PATH="/opt/homebrew/opt/ffmpeg@7/lib/pkgconfig" >> "$GITHUB_ENV"
diff --git a/ci/suse.leap.deps.sh b/ci/suse.leap.deps.sh
index 21e24e7562..650ac271e0 100755
--- a/ci/suse.leap.deps.sh
+++ b/ci/suse.leap.deps.sh
@@ -25,7 +25,8 @@ $SUDO zypper -n install \
    qt6-qml-devel qt6-qml-private-devel \
    qt6-svg-devel \
    ffmpeg-4-libavcodec-devel ffmpeg-4-libavdevice-devel ffmpeg-4-libavfilter-devel ffmpeg-4-libavformat-devel ffmpeg-4-libswresample-devel \
-   curl gzip
+   curl gzip \
+   libzstd-devel
 
 curl -L -0 https://github.com/Kitware/CMake/releases/download/v3.28.1/cmake-3.28.1-linux-x86_64.tar.gz --output cmake.tgz
 tar xaf cmake.tgz
diff --git a/ci/suse.tumbleweed.deps.sh b/ci/suse.tumbleweed.deps.sh
index d8f36b9c13..153172fc89 100755
--- a/ci/suse.tumbleweed.deps.sh
+++ b/ci/suse.tumbleweed.deps.sh
@@ -31,5 +31,6 @@ $SUDO zypper -n install \
    qt6-qml-devel qt6-qml-private-devel \
    qt6-svg-devel \
    ffmpeg-7-libavcodec-devel ffmpeg-7-libavdevice-devel ffmpeg-7-libavfilter-devel ffmpeg-7-libavformat-devel ffmpeg-7-libswresample-devel \
-   zlib-devel zlib-devel-static
+   zlib-devel zlib-devel-static \
+   libzstd-devel
 
diff --git a/ci/ubuntu.2604.deps.sh b/ci/ubuntu.2604.deps.sh
index b5488d2fa8..b25b76f918 100755
--- a/ci/ubuntu.2604.deps.sh
+++ b/ci/ubuntu.2604.deps.sh
@@ -42,7 +42,8 @@ $SUDO apt install -y \
      libvulkan-dev \
      libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libavformat-dev libswresample-dev \
      file \
-     dpkg-dev
+     dpkg-dev \
+     libzstd-dev
 
 
 source ci/common.deps.sh LINUX
diff --git a/ci/ubuntu.jammy.deps.sh b/ci/ubuntu.jammy.deps.sh
index 43c9f7fe65..2c61376768 100755
--- a/ci/ubuntu.jammy.deps.sh
+++ b/ci/ubuntu.jammy.deps.sh
@@ -51,6 +51,7 @@ $SUDO apt-get install -y \
      libvulkan-dev \
      libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libavformat-dev libswresample-dev \
      file \
-     dpkg-dev
+     dpkg-dev \
+     libzstd-dev
 
 source ci/common.deps.sh LINUX
diff --git a/ci/ubuntu.lunar.deps.sh b/ci/ubuntu.lunar.deps.sh
index bb49e14254..752123c08e 100755
--- a/ci/ubuntu.lunar.deps.sh
+++ b/ci/ubuntu.lunar.deps.sh
@@ -48,6 +48,7 @@ $SUDO apt-get install -y \
      libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libavformat-dev libswresample-dev \
      file \
      dpkg-dev \
-     lsb-release
+     lsb-release \
+     libzstd-dev
 
 source ci/common.deps.sh LINUX
diff --git a/ci/ubuntu.noble.deps.sh b/ci/ubuntu.noble.deps.sh
index 4a64660239..6db2b97c53 100755
--- a/ci/ubuntu.noble.deps.sh
+++ b/ci/ubuntu.noble.deps.sh
@@ -43,7 +43,8 @@ $SUDO apt-get install -y \
      libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libavformat-dev libswresample-dev \
      file \
      dpkg-dev \
-     lsb-release
+     lsb-release \
+     libzstd-dev
 
 # needed because GCC does not support -fuse-ld=lld-19
 $SUDO rm -rf /usr/bin/lld /usr/bin/ld.lld
diff --git a/ci/ubuntu.oracular.deps.sh b/ci/ubuntu.oracular.deps.sh
index ff8630b9b6..7501e695b1 100755
--- a/ci/ubuntu.oracular.deps.sh
+++ b/ci/ubuntu.oracular.deps.sh
@@ -43,6 +43,7 @@ $SUDO apt-get install -y \
      libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libavformat-dev libswresample-dev \
      file \
      dpkg-dev \
-     lsb-release
+     lsb-release \
+     libzstd-dev
 
 source ci/common.deps.sh LINUX
diff --git a/ci/ubuntu.plucky.deps.sh b/ci/ubuntu.plucky.deps.sh
index bf53bd6fdd..b030b6796f 100755
--- a/ci/ubuntu.plucky.deps.sh
+++ b/ci/ubuntu.plucky.deps.sh
@@ -44,6 +44,7 @@ $SUDO apt-get install -y \
      libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libavformat-dev libswresample-dev \
      file \
      dpkg-dev \
-     lsb-release
+     lsb-release \
+     libzstd-dev
 
 source ci/common.deps.sh LINUX
diff --git a/cmake/Deployment/Linux/Flatpak/io.ossia.score.yml b/cmake/Deployment/Linux/Flatpak/io.ossia.score.yml
index 1e9c6d07cc..0d6fa0e466 100644
--- a/cmake/Deployment/Linux/Flatpak/io.ossia.score.yml
+++ b/cmake/Deployment/Linux/Flatpak/io.ossia.score.yml
@@ -120,6 +120,7 @@ modules:
   - modules/re2.yaml
   - modules/libcoap.yaml
   - modules/boost.yaml
+  - modules/simdjson.yaml
   - modules/snappy.yaml
   - modules/avahi.yaml
   - modules/suil.yaml
diff --git a/cmake/Deployment/Linux/Flatpak/modules/simdjson.yaml b/cmake/Deployment/Linux/Flatpak/modules/simdjson.yaml
new file mode 100644
index 0000000000..befaf599bc
--- /dev/null
+++ b/cmake/Deployment/Linux/Flatpak/modules/simdjson.yaml
@@ -0,0 +1,16 @@
+# JSON parser used by fastgltf (glTF support in score-plugin-threedim).
+# Provided here so that fastgltf does not try to download it at configure
+# time, which is impossible in the sandboxed flatpak build.
+name: simdjson
+buildsystem: cmake-ninja
+builddir: true
+config-opts:
+  - -Wno-dev
+  - -DCMAKE_BUILD_TYPE=RelWithDebInfo
+  - -DCMAKE_POSITION_INDEPENDENT_CODE=ON
+  - -DBUILD_SHARED_LIBS=ON
+  - -DSIMDJSON_DEVELOPER_MODE=OFF
+sources:
+  - type: archive
+    url: https://github.com/simdjson/simdjson/archive/refs/tags/v3.12.3.tar.gz
+    sha256: d0af071f2f4187d8b26b556e83ef832b634bd5feb4e2f537b9dabbd334d4e334
diff --git a/src/plugins/score-lib-process/Process/Drop/ProcessDropHandler.cpp b/src/plugins/score-lib-process/Process/Drop/ProcessDropHandler.cpp
index c7dd2177d3..14f947b01c 100644
--- a/src/plugins/score-lib-process/Process/Drop/ProcessDropHandler.cpp
+++ b/src/plugins/score-lib-process/Process/Drop/ProcessDropHandler.cpp
@@ -4,6 +4,7 @@
 
 #include <ossia/detail/algorithms.hpp>
 
+#include <QDebug>
 #include <QFile>
 #include <QFileInfo>
 #include <QSet>
@@ -19,8 +20,22 @@ void ProcessDropHandler::getCustomDrops(
     std::vector<ProcessDropHandler::ProcessDrop>& drops, const QMimeData& mime,
     const score::DocumentContext& ctx) const noexcept
 {
-  // Check for special mime handling code
-  return dropCustom(drops, mime, ctx);
+  // dropCustom is no longer noexcept (some overrides invoke parsers that
+  // can throw on malformed input — see ProcessDropHandler.hpp). Catch
+  // here so a throwing handler never escapes through the noexcept
+  // public API and tears down the editor.
+  try
+  {
+    dropCustom(drops, mime, ctx);
+  }
+  catch(const std::exception& e)
+  {
+    qWarning() << "ProcessDropHandler::dropCustom threw:" << e.what();
+  }
+  catch(...)
+  {
+    qWarning() << "ProcessDropHandler::dropCustom threw an unknown exception";
+  }
 }
 
 void ProcessDropHandler::getMimeDrops(
@@ -61,7 +76,7 @@ QSet<QString> ProcessDropHandler::fileExtensions() const noexcept
 
 void ProcessDropHandler::dropCustom(
     std::vector<ProcessDropHandler::ProcessDrop>&, const QMimeData& data,
-    const score::DocumentContext& ctx) const noexcept
+    const score::DocumentContext& ctx) const
 {
 }
 
diff --git a/src/plugins/score-lib-process/Process/Drop/ProcessDropHandler.hpp b/src/plugins/score-lib-process/Process/Drop/ProcessDropHandler.hpp
index 4f568657a1..a712f06016 100644
--- a/src/plugins/score-lib-process/Process/Drop/ProcessDropHandler.hpp
+++ b/src/plugins/score-lib-process/Process/Drop/ProcessDropHandler.hpp
@@ -59,7 +59,7 @@ class SCORE_LIB_PROCESS_EXPORT ProcessDropHandler : public score::InterfaceBase
 protected:
   virtual void dropCustom(
       std::vector<ProcessDrop>& drops, const QMimeData& mime,
-      const score::DocumentContext& ctx) const noexcept;
+      const score::DocumentContext& ctx) const;
 
   virtual void dropPath(
       std::vector<ProcessDrop>& drops, const score::FilePath& path,
diff --git a/src/plugins/score-plugin-avnd/Crousti/Concepts.hpp b/src/plugins/score-plugin-avnd/Crousti/Concepts.hpp
index 06905031fb..2b17f5ad2b 100644
--- a/src/plugins/score-plugin-avnd/Crousti/Concepts.hpp
+++ b/src/plugins/score-plugin-avnd/Crousti/Concepts.hpp
@@ -448,7 +448,35 @@ make_control_in(avnd::field_index<N>, Id<Process::Port>&& id, QObject* parent)
       auto [Mx, My, Mz] = c.max;
       auto [ix, iy, iz] = c.init;
       return new Process::XYZSpinboxes{{mx, my, mz}, {Mx, My, Mz}, {ix, iy, iz},
-                                       qname,        id,           parent};
+                                       false,        qname,        id,           parent};
+    }
+  }
+  else if constexpr(widg.widget == avnd::widget_type::xyzw_spinbox)
+  {
+    static constexpr auto c = avnd::get_range<T>();
+    if constexpr(requires {
+                   c.min == 0.f;
+                   c.max == 0.f;
+                   c.init == 0.f;
+                 })
+    {
+      return new Process::XYZSpinboxes{
+          {c.min, c.min, c.min},
+          {c.max, c.max, c.max},
+          {c.init, c.init, c.init},
+          false,
+          qname,
+          id,
+          parent};
+    }
+    else
+    {
+      auto [mx, my, mz, mw] = c.min;
+      auto [Mx, My, Mz, Mw] = c.max;
+      auto [ix, iy, iz, iw] = c.init;
+      // FIXME we don't have a good 4-way widget
+      return new Process::XYZSpinboxes{{mx, my, mz}, {Mx, My, Mz}, {ix, iy, iz},
+                                       false,        qname,        id,           parent};
     }
   }
   else if constexpr(widg.widget == avnd::widget_type::color)
diff --git a/src/plugins/score-plugin-avnd/Crousti/CpuAnalysisNode.hpp b/src/plugins/score-plugin-avnd/Crousti/CpuAnalysisNode.hpp
index 3f049ab18a..e0e1035b29 100644
--- a/src/plugins/score-plugin-avnd/Crousti/CpuAnalysisNode.hpp
+++ b/src/plugins/score-plugin-avnd/Crousti/CpuAnalysisNode.hpp
@@ -5,10 +5,10 @@
 
 namespace oscr
 {
-
 template <typename Node_T>
   requires(
-      (avnd::texture_output_introspection<Node_T>::size + avnd::buffer_output_introspection<Node_T>::size + avnd::geometry_output_introspection<Node_T>::size) == 0
+      (avnd::texture_output_introspection<Node_T>::size + avnd::buffer_output_introspection<Node_T>::size + avnd::geometry_output_introspection<Node_T>::size + scene_output_introspection<Node_T>::size) == 0
+      && (avnd::gpu_render_target_output_port_output_introspection<Node_T>::size == 0)
   )
 struct GfxRenderer<Node_T> final : score::gfx::OutputNodeRenderer
 {
@@ -19,6 +19,7 @@ struct GfxRenderer<Node_T> final : score::gfx::OutputNodeRenderer
   AVND_NO_UNIQUE_ADDRESS texture_inputs_storage<Node_T> texture_ins;
   AVND_NO_UNIQUE_ADDRESS buffer_inputs_storage<Node_T> buffer_ins;
   AVND_NO_UNIQUE_ADDRESS geometry_inputs_storage<Node_T> geometry_ins;
+  AVND_NO_UNIQUE_ADDRESS scene_inputs_storage<Node_T> scene_ins;
 
   const GfxNode<Node_T>& node() const noexcept
   {
@@ -44,9 +45,19 @@ struct GfxRenderer<Node_T> final : score::gfx::OutputNodeRenderer
     return {};
   }
 
-  void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override
+  // See CpuFilterNode.hpp for the reasoning: init must live in initState
+  // so the incremental edge-rewire path also runs it.
+  void initState(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override
   {
-    auto& parent = node();
+    if(m_initialized)
+      return;
+
+    // See CpuFilterNode for the reasoning: optional renderlist
+    // backchannel populated via SFINAE so nodes can reach the
+    // RenderList's GpuResourceRegistry / AssetTable without plumbing.
+    if constexpr(requires { state->renderlist = &renderer; })
+      state->renderlist = &renderer;
+
     if constexpr(requires { state->prepare(); })
     {
       this->node().processControlIn(
@@ -59,6 +70,13 @@ struct GfxRenderer<Node_T> final : score::gfx::OutputNodeRenderer
       texture_ins.init(*this, renderer);
 
     if_possible(state->init(renderer, res));
+
+    m_initialized = true;
+  }
+
+  void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override
+  {
+    initState(renderer, res);
   }
 
   void update(
@@ -82,32 +100,69 @@ struct GfxRenderer<Node_T> final : score::gfx::OutputNodeRenderer
     }
   }
 
-  void release(score::gfx::RenderList& r) override
+  void releaseState(score::gfx::RenderList& r) override
   {
+    if(!m_initialized)
+      return;
+
     if constexpr(avnd::texture_input_introspection<Node_T>::size > 0)
       texture_ins.release();
 
     if constexpr(avnd::geometry_input_introspection<Node_T>::size > 0)
       geometry_ins.release(r);
 
+    if constexpr(scene_input_introspection<Node_T>::size > 0)
+      scene_ins.release(r);
+
     if constexpr(
         avnd::texture_input_introspection<Node_T>::size > 0
         || avnd::texture_output_introspection<Node_T>::size > 0)
     {
-      // FIXME this->defaultRelease(r);
+      // No call-through to GenericNodeRenderer::defaultRelease here:
+      // CpuAnalysisNode's GfxRenderer derives from OutputNodeRenderer,
+      // not GenericNodeRenderer, and OutputNodeRenderer has no
+      // defaultRelease equivalent (it owns no pipeline / passes — it
+      // is a sink, not a node renderer with m_p / m_pipelineCache).
+      // CpuFilterNode's mirror at line ~357 IS valid because that
+      // GfxRenderer derives from GenericNodeRenderer.
+      //
+      // If a future CpuAnalysisNode uses textures via OutputNodeRenderer
+      // surfaces, they'll need their own per-storage release path
+      // (texture_ins.release above already handles texture INPUTS).
     }
 
     if_possible(state->release(r));
+
+    // Clear the optional renderlist backchannel. Paired with initState;
+    // same SFINAE guard.
+    if constexpr(requires { state->renderlist = nullptr; })
+      state->renderlist = nullptr;
+
+    m_initialized = false;
+  }
+
+  void release(score::gfx::RenderList& r) override
+  {
+    releaseState(r);
   }
 
   void inputAboutToFinish(
       score::gfx::RenderList& renderer, const score::gfx::Port& p,
       QRhiResourceUpdateBatch*& res) override
   {
+    // Outer guard includes scene_input_introspection so a node with ONLY
+    // scene inputs (no texture / buffer / geometry) still allocates `res`
+    // — necessary if scene_inputs_storage ever grows an inputAboutToFinish
+    // method (today it's read-only via readInputScenes, but the storage's
+    // lifecycle is part of the new scene_port concept and may evolve).
+    // Without the include, a scene-only sink would silently skip the
+    // res allocation and any future scene-side write would have nowhere
+    // to land.
     if constexpr(
         avnd::texture_input_introspection<Node_T>::size > 0
         || avnd::buffer_input_introspection<Node_T>::size > 0
-        || avnd::geometry_input_introspection<Node_T>::size > 0)
+        || avnd::geometry_input_introspection<Node_T>::size > 0
+        || scene_input_introspection<Node_T>::size > 0)
     {
       res = renderer.state.rhi->nextResourceUpdateBatch();
 
@@ -118,6 +173,8 @@ struct GfxRenderer<Node_T> final : score::gfx::OutputNodeRenderer
       if constexpr(avnd::geometry_input_introspection<Node_T>::size > 0)
         geometry_ins.inputAboutToFinish(
             renderer, res, this->geometry, *state, this->node());
+      // No scene_ins.inputAboutToFinish today — the guard is forward-
+      // looking; add the call here when scene_inputs_storage grows one.
     }
 
     if_possible(state->inputAboutToFinish(renderer, p, res));
@@ -144,6 +201,8 @@ struct GfxRenderer<Node_T> final : score::gfx::OutputNodeRenderer
       buffer_ins.readInputBuffers(renderer, parent, *state);
     if constexpr(avnd::geometry_input_introspection<Node_T>::size > 0)
       geometry_ins.readInputGeometries(renderer, this->geometry, parent, *state);
+    if constexpr(scene_input_introspection<Node_T>::size > 0)
+      scene_ins.readInputScenes(this->scene, *state);
 
     parent.processControlIn(
         *this, *state, m_last_message, parent.last_message, parent.m_ctx);
@@ -158,9 +217,13 @@ struct GfxRenderer<Node_T> final : score::gfx::OutputNodeRenderer
 };
 
 template <typename Node_T>
-  requires(
-    (avnd::texture_output_introspection<Node_T>::size + avnd::buffer_output_introspection<Node_T>::size + avnd::geometry_output_introspection<Node_T>::size) == 0
-  )
+  requires((avnd::texture_output_introspection<Node_T>::size
+            + avnd::buffer_output_introspection<Node_T>::size
+            + avnd::geometry_output_introspection<Node_T>::size
+            + scene_output_introspection<Node_T>::size)
+               == 0
+           && (avnd::gpu_render_target_output_port_output_introspection<Node_T>::size
+               == 0))
 struct GfxNode<Node_T> final
     : CustomGpuOutputNodeBase
     , GpuNodeElements<Node_T>
diff --git a/src/plugins/score-plugin-avnd/Crousti/CpuFilterNode.hpp b/src/plugins/score-plugin-avnd/Crousti/CpuFilterNode.hpp
index 159c98a9f4..8bed9246d5 100644
--- a/src/plugins/score-plugin-avnd/Crousti/CpuFilterNode.hpp
+++ b/src/plugins/score-plugin-avnd/Crousti/CpuFilterNode.hpp
@@ -3,18 +3,25 @@
 #if SCORE_PLUGIN_GFX
 #include <Crousti/GfxNode.hpp>
 
+#include <halp/texture.hpp>
+
 namespace oscr
 {
 
 template <typename Node_T>
   requires(
-    (avnd::texture_output_introspection<Node_T>::size + avnd::buffer_output_introspection<Node_T>::size + avnd::geometry_output_introspection<Node_T>::size) >= 1
+    (avnd::texture_output_introspection<Node_T>::size + avnd::buffer_output_introspection<Node_T>::size + avnd::geometry_output_introspection<Node_T>::size + scene_output_introspection<Node_T>::size + avnd::gpu_render_target_output_port_output_introspection<Node_T>::size) >= 1
   )
 struct GfxRenderer<Node_T> final : score::gfx::GenericNodeRenderer
 {
   std::shared_ptr<Node_T> state;
   score::gfx::Message m_last_message{};
-  ossia::time_value m_last_time{-1};
+  // RenderList::frame id of the last frame on which we ran the expensive
+  // once-per-frame body of runInitialPasses (input readbacks, operator()(),
+  // output uploads). runInitialPasses is invoked once PER OUTGOING EDGE, so
+  // without this guard that whole body re-ran for every downstream edge,
+  // every frame. -1 = never run yet.
+  int64_t m_last_frame{-1};
 
   AVND_NO_UNIQUE_ADDRESS texture_inputs_storage<Node_T> texture_ins;
   AVND_NO_UNIQUE_ADDRESS texture_outputs_storage<Node_T> texture_outs;
@@ -24,6 +31,8 @@ struct GfxRenderer<Node_T> final : score::gfx::GenericNodeRenderer
 
   AVND_NO_UNIQUE_ADDRESS geometry_inputs_storage<Node_T> geometry_ins;
   AVND_NO_UNIQUE_ADDRESS geometry_outputs_storage<Node_T> geometry_outs;
+  AVND_NO_UNIQUE_ADDRESS scene_inputs_storage<Node_T> scene_ins;
+  AVND_NO_UNIQUE_ADDRESS scene_outputs_storage<Node_T> scene_outs;
 
   const GfxNode<Node_T>& node() const noexcept
   {
@@ -42,8 +51,14 @@ struct GfxRenderer<Node_T> final : score::gfx::GenericNodeRenderer
   {
     if constexpr(avnd::texture_input_introspection<Node_T>::size > 0)
     {
+      // Only texture-RT inputs live in m_rts. Geometry / buffer / scene
+      // inputs on the same node (e.g. PBRMesh: 4 gpu_texture_inputs + a
+      // dynamic_gpu_geometry mesh in) land here through the generic
+      // renderTargetForOutput path — return empty so the upstream's
+      // addOutputPass skips creating a graphics render pass for them.
       auto it = texture_ins.m_rts.find(&p);
-      SCORE_ASSERT(it != texture_ins.m_rts.end());
+      if(it == texture_ins.m_rts.end())
+        return {};
       return it->second;
     }
     return {};
@@ -60,6 +75,71 @@ struct GfxRenderer<Node_T> final : score::gfx::GenericNodeRenderer
     return {};
   }
 
+  // For non-2D gpu_texture_input fields (cubemap / array / 3D): the port
+  // is flagged GrabsFromSource (see initGfxPorts +
+  // port_flags_for_field), so Graph::updateSinkSampler calls us here
+  // with the upstream's QRhiTexture. Write it into the matching halp
+  // field so the node's operator()() / runInitialPasses see the handle.
+  // 2D (classic RT-rendered) inputs ignore this path — their handle is
+  // set up at init() time by texture_inputs_storage::init.
+  //
+  // depthTex: when the port also opts in via halp_meta(samplable_depth,
+  // true), Graph passes the upstream's depth attachment here too. Stored
+  // on `texture.depth_handle` for the consumer to sample alongside color.
+  void updateInputTexture(
+      const score::gfx::Port& input, QRhiTexture* tex,
+      QRhiTexture* depthTex = nullptr) override
+  {
+    if constexpr(avnd::texture_input_introspection<Node_T>::size > 0)
+    {
+      const auto& inputs = this->node().input;
+      int port_idx = -1;
+      for(int i = 0, n = (int)inputs.size(); i < n; ++i)
+      {
+        if(inputs[i] == &input)
+        {
+          port_idx = i;
+          break;
+        }
+      }
+      if(port_idx < 0)
+        return;
+
+      avnd::texture_input_introspection<Node_T>::for_all_n2(
+          avnd::get_inputs<Node_T>(*state),
+          [&]<typename F, std::size_t K, std::size_t N>(
+              F& t, avnd::predicate_index<K>, avnd::field_index<N>) {
+        if constexpr(avnd::gpu_texture_port<F>
+                     && halp::texture_kind_of<F>() != halp::texture_kind::texture_2d)
+        {
+          if((int)N == port_idx)
+          {
+            t.texture.handle = tex;
+            if(tex)
+            {
+              const auto sz = tex->pixelSize();
+              t.texture.width = sz.width();
+              t.texture.height = sz.height();
+            }
+            else
+            {
+              t.texture.width = 0;
+              t.texture.height = 0;
+            }
+            t.texture.kind = halp::texture_kind_of<F>();
+            if constexpr(halp::samplable_depth_of<F>())
+            {
+              t.texture.depth_handle = depthTex;
+              if(depthTex)
+                t.texture.depth_format
+                    = qrhiToHalpDepthFormat(depthTex->format());
+            }
+          }
+        }
+      });
+    }
+  }
+
   QRhiTexture* textureForOutput(const score::gfx::Port& output) override
   {
     if constexpr(avnd::gpu_texture_output_introspection<Node_T>::size > 0)
@@ -95,9 +175,47 @@ struct GfxRenderer<Node_T> final : score::gfx::GenericNodeRenderer
     return nullptr;
   }
 
-  void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override
+  // All of the setup lives in initState(), not init(). The incremental
+  // edge-rewire path (Graph::createPassForEdgeIfMissing) only calls
+  // initState() on newly-created renderers — so a halp scene-in/scene-out
+  // node inserted live would otherwise never allocate its storage, its
+  // operator()() would run against uninitialised state every frame, and
+  // nothing would flow downstream until a stop/start cycle forced a full
+  // rebuild through init().
+  void initState(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override
   {
+    if(m_initialized)
+      return;
+
     auto& parent = node();
+
+    // Optional renderlist backchannel for CPU halp nodes that need to
+    // reach their hosting RenderList's GpuResourceRegistry / AssetTable
+    // (e.g. Camera / Light / PBRMesh / MaterialOverride allocating arena
+    // slots). Populated by SFINAE so nodes that don't declare the member
+    // pay nothing. Lifetime: valid from initState until releaseState
+    // clears it back to nullptr.
+    if constexpr(requires { state->renderlist = &renderer; })
+      state->renderlist = &renderer;
+
+    // Ordering invariant: init → processControlIn → operator()()
+    //
+    // For nodes WITHOUT prepare(): processControlIn is NOT called here.
+    // state->init() therefore runs (line below) before any control-update
+    // callback can fire rebuild(). All five scene producers — Camera,
+    // CameraArray, Light, Transform3D, SceneGroup — rely on this: they
+    // populate m_*_ref arena handles in init(), and rebuild() reads those
+    // handles unconditionally. The invariant is also enforced at the two
+    // call-graph roots:
+    //   • Graph.cpp:865-893  (incremental edge update): initState() is
+    //     called before seedInitialOutputs() / operator()().
+    //   • RenderList.cpp:434-470 (full graph init): init() for all
+    //     renderers runs before the first render frame fires update().
+    //
+    // If you add prepare() to a scene producer, processControlIn becomes
+    // reachable BEFORE state->init() (see branch below vs. line 202) and
+    // any m_*_ref read inside rebuild() will observe an empty handle.
+    // Re-audit the producer's rebuild() ref-read sites before doing so.
     if constexpr(requires { state->prepare(); })
     {
       parent.processControlIn(
@@ -116,6 +234,70 @@ struct GfxRenderer<Node_T> final : score::gfx::GenericNodeRenderer
       buffer_outs.init(renderer, *state, parent);
 
     if_possible(state->init(renderer, res));
+
+    m_initialized = true;
+  }
+
+  void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override
+  {
+    initState(renderer, res);
+  }
+
+  // Called by Graph::reconcileAllRenderLists right after this renderer is
+  // spawned (in particular when the user live-inserts a scene-producing
+  // node — Camera, EnvironmentLoader, Light — into a running
+  // graph). Runs the node's operator()() once to populate its outputs and
+  // then pushes the result into every downstream sink's per-port scene
+  // cache immediately, rather than waiting for the first render-frame's
+  // upstream-input scan to find our new edge. Without this, the Camera
+  // live-insertion symptom is that the camera has no visible effect until
+  // the user stops and restarts transport (triggering a full render-list
+  // rebuild where every renderer's runInitialPasses runs from clean
+  // state).
+  void seedInitialOutputs(score::gfx::RenderList& renderer) override
+  {
+    if constexpr(
+        scene_output_introspection<Node_T>::size > 0
+        || avnd::geometry_output_introspection<Node_T>::size > 0)
+    {
+      auto& parent = node();
+      // Apply any control values that arrived before we were created.
+      // processControlIn is normally called from update() but the render
+      // loop won't run update() until the first frame after reconcile
+      // — the inserted Camera's slider defaults would leak through for
+      // one frame otherwise.
+      parent.processControlIn(
+          *this, *state, m_last_message, parent.last_message, parent.m_ctx);
+
+      if_possible((*state)());
+
+      // Push to every existing output edge on scene/geometry ports. The
+      // upload helpers look at edge.sink to find the downstream renderer
+      // and call its NodeRenderer::process(port, scene_spec, source) —
+      // seeding exactly the same m_portScenes slot the first runInitialPasses
+      // would have filled one frame later.
+      //
+      // Scene and geometry ports both stamp score::gfx::Types::Geometry (per
+      // port_to_type_enum in GpuUtils.hpp — Process::GeometryInlet carries
+      // either a geometry or a full scene by design). Dispatching on the
+      // runtime port->type can never see Types::Scene, so we branch on
+      // compile-time introspection instead. Each upload helper is a no-op
+      // for nodes that don't have the corresponding output kind, and both
+      // branches can fire for nodes with mixed outputs.
+      const auto& outs = parent.output;
+      for(std::size_t i = 0; i < outs.size(); ++i)
+      {
+        auto* port = outs[i];
+        if(!port || port->edges.empty())
+          continue;
+        if constexpr(scene_output_introspection<Node_T>::size > 0)
+          for(auto* edge : port->edges)
+            scene_outs.upload(renderer, *this->state, *edge);
+        if constexpr(avnd::geometry_output_introspection<Node_T>::size > 0)
+          for(auto* edge : port->edges)
+            geometry_outs.upload(renderer, *this->state, *edge);
+      }
+    }
   }
 
   void update(
@@ -145,8 +327,11 @@ struct GfxRenderer<Node_T> final : score::gfx::GenericNodeRenderer
     }
   }
 
-  void release(score::gfx::RenderList& r) override
+  void releaseState(score::gfx::RenderList& r) override
   {
+    if(!m_initialized)
+      return;
+
     if constexpr(avnd::texture_input_introspection<Node_T>::size > 0)
       texture_ins.release();
 
@@ -159,12 +344,38 @@ struct GfxRenderer<Node_T> final : score::gfx::GenericNodeRenderer
     if constexpr(avnd::geometry_input_introspection<Node_T>::size > 0)
       geometry_ins.release(r);
 
+    if constexpr(scene_input_introspection<Node_T>::size > 0)
+      scene_ins.release(r);
+
+    // Symmetric with the other *_outs.release calls above. No-ops today
+    // (scene_outputs_storage / geometry_outputs_storage own no QRhi
+    // resources — scene_spec is value-semantics + a shared_ptr; geometry
+    // wraps non-owning pointers + transform values). Wired so future
+    // RHI handles on the storages release cleanly.
+    if constexpr(avnd::geometry_output_introspection<Node_T>::size > 0)
+      geometry_outs.release(r);
+    if constexpr(scene_output_introspection<Node_T>::size > 0)
+      scene_outs.release(r);
+
     if constexpr(avnd::texture_input_introspection<Node_T>::size > 0 || avnd::texture_output_introspection<Node_T>::size > 0)
     {
       this->defaultRelease(r);
     }
 
     if_possible(state->release(r));
+
+    // Clear the optional renderlist backchannel. Paired with the init
+    // assignment; same SFINAE guard so nodes without the member are
+    // unaffected.
+    if constexpr(requires { state->renderlist = nullptr; })
+      state->renderlist = nullptr;
+
+    m_initialized = false;
+  }
+
+  void release(score::gfx::RenderList& r) override
+  {
+    releaseState(r);
   }
 
   void inputAboutToFinish(
@@ -197,59 +408,112 @@ struct GfxRenderer<Node_T> final : score::gfx::GenericNodeRenderer
     auto& parent = node();
     auto& rhi = *renderer.state.rhi;
 
-    if constexpr(
-        avnd::texture_input_introspection<Node_T>::size > 0
-        || avnd::buffer_input_introspection<Node_T>::size > 0
-        || avnd::geometry_input_introspection<Node_T>::size > 0)
+    // runInitialPasses is called once PER OUTGOING EDGE per frame. The
+    // expensive work below — rhi.finish() sync point, input readbacks,
+    // operator()(), and output buffer/texture uploads — only needs to run
+    // ONCE per frame: its result lives in `*this->state` and the storages,
+    // identical for every edge. We dedupe on RenderList::frame, which is
+    // bumped exactly once at the end of each RenderList::render() (see
+    // RenderList.cpp). This is NOT a transport-date gate: it does not
+    // freeze scene producers when the transport is paused (token.date
+    // frozen) — operator()() still re-runs every frame so live parameter
+    // edits take effect immediately. The per-edge geometry/scene uploads
+    // (which genuinely differ per edge — they target edge.sink) run for
+    // EVERY edge, below the guard.
+    const bool firstEdgeThisFrame = (renderer.frame != m_last_frame);
+    if(firstEdgeThisFrame)
     {
-      // FIXME: for geometry, here we should optimize if we know we aren't going to need them on the CPU, OR if it is a type ?
-      // Insert a synchronisation point to allow readbacks to complete
-      rhi.finish();
-    }
+      m_last_frame = renderer.frame;
 
-    // If we are paused, we don't run the processor implementation.
-    if(parent.last_message.token.date == m_last_time)
-      return;
-    m_last_time = parent.last_message.token.date;
+      if constexpr(
+          avnd::texture_input_introspection<Node_T>::size > 0
+          || avnd::buffer_input_introspection<Node_T>::size > 0
+          || avnd::geometry_input_introspection<Node_T>::size > 0)
+      {
+        // FIXME: for geometry, here we should optimize if we know we aren't going to need them on the CPU, OR if it is a type ?
+        // Insert a synchronisation point to allow readbacks to complete
+        rhi.finish();
+      }
 
-    if constexpr(avnd::texture_input_introspection<Node_T>::size > 0)
-      texture_ins.runInitialPasses(*this, rhi);
-    if constexpr(avnd::buffer_input_introspection<Node_T>::size > 0)
-      buffer_ins.readInputBuffers(renderer, parent, *state);
-    if constexpr(avnd::geometry_input_introspection<Node_T>::size > 0)
-      geometry_ins.readInputGeometries(renderer, this->geometry, parent, *state);
+      if constexpr(avnd::texture_input_introspection<Node_T>::size > 0)
+        texture_ins.runInitialPasses(*this, rhi);
+      if constexpr(avnd::buffer_input_introspection<Node_T>::size > 0)
+        buffer_ins.readInputBuffers(renderer, parent, *state);
+      if constexpr(avnd::geometry_input_introspection<Node_T>::size > 0)
+        geometry_ins.readInputGeometries(renderer, this->geometry, parent, *state);
+      if constexpr(scene_input_introspection<Node_T>::size > 0)
+        scene_ins.readInputScenes(this->scene, *state);
 
-    buffer_outs.prepareUpload(*res);
+      buffer_outs.prepareUpload(*res);
 
-    // Run the processor
-    if_possible(state->runInitialPasses(renderer, commands, res, edge));
-    if_possible((*state)());
+      // Run the processor
+      if_possible(state->runInitialPasses(renderer, commands, res, edge));
+      if_possible((*state)());
 
-    // Upload output buffers
-    if constexpr(avnd::buffer_output_introspection<Node_T>::size > 0)
-      buffer_outs.upload(renderer, *state, *res);
+      // Upload output buffers
+      if constexpr(avnd::buffer_output_introspection<Node_T>::size > 0)
+        buffer_outs.upload(renderer, *state, *res);
 
-    // Upload output textures
-    if constexpr(avnd::texture_output_introspection<Node_T>::size > 0)
-    {
-      texture_outs.runInitialPasses(*this, renderer, res);
+      // Upload output textures
+      if constexpr(avnd::texture_output_introspection<Node_T>::size > 0)
+      {
+        texture_outs.runInitialPasses(*this, renderer, res);
 
-      commands.resourceUpdate(res);
-      res = renderer.state.rhi->nextResourceUpdateBatch();
+        commands.resourceUpdate(res);
+        res = renderer.state.rhi->nextResourceUpdateBatch();
+      }
+
+      // Copy the data to the model node
+      parent.processControlOut(*this->state);
     }
 
+    // Per-edge uploads: these target the specific downstream sink
+    // (edge.sink) and must run for every outgoing edge, even on edges
+    // after the first this frame. The producer's output is already
+    // populated in *this->state by the once-per-frame body above.
+
     // Copy the geometry
     if constexpr(avnd::geometry_output_introspection<Node_T>::size > 0)
       geometry_outs.upload(renderer, *this->state, edge);
 
-    // Copy the data to the model node
-    parent.processControlOut(*this->state);
+    // Copy the scene (travels on the same Gfx::GeometryOutlet as geometry,
+    // published via NodeRenderer::process(scene_spec)).
+    if constexpr(scene_output_introspection<Node_T>::size > 0)
+      scene_outs.upload(renderer, *this->state, edge);
+  }
+
+  // Customization point for halp nodes that produce their output via
+  // their own GPU pipeline (post-process effects, custom rasterizers).
+  //
+  // Default GenericNodeRenderer::runRenderPass calls defaultRenderPass,
+  // which uses a pre-built fullscreen-quad pipeline that samples
+  // m_samplers[0] (the upstream input texture, set up by
+  // m_material.init) and writes to the consumer's per-edge RT via the
+  // generic_texgen_fs shader. That hard-codes "blit upstream input →
+  // downstream input RT" — which is fine for halp filter nodes whose
+  // output IS just a CPU-uploaded copy of their input, but is wrong for
+  // any node that did real work in runInitialPasses (writing to its own
+  // m_outputTex / a private RT): the framework's input-blit overwrites
+  // the result, so the consumer sees the unmodified upstream.
+  //
+  // When the halp class declares its own runRenderPass, we hand off to
+  // it. The method runs INSIDE the consumer's beginPass/endPass cycle —
+  // it is expected to record draw commands only (no beginPass/endPass
+  // on its own) targeting the currently-bound (per-edge) render target.
+  void runRenderPass(
+      score::gfx::RenderList& renderer, QRhiCommandBuffer& commands,
+      score::gfx::Edge& edge) override
+  {
+    if constexpr(requires { state->runRenderPass(renderer, commands, edge); })
+      state->runRenderPass(renderer, commands, edge);
+    else
+      score::gfx::GenericNodeRenderer::runRenderPass(renderer, commands, edge);
   }
 };
 
 template <typename Node_T>
   requires(
-    (avnd::texture_output_introspection<Node_T>::size + avnd::buffer_output_introspection<Node_T>::size + avnd::geometry_output_introspection<Node_T>::size) >= 1
+    (avnd::texture_output_introspection<Node_T>::size + avnd::buffer_output_introspection<Node_T>::size + avnd::geometry_output_introspection<Node_T>::size + scene_output_introspection<Node_T>::size + avnd::gpu_render_target_output_port_output_introspection<Node_T>::size) >= 1
   )
 struct GfxNode<Node_T> final
     : CustomGfxNodeBase
diff --git a/src/plugins/score-plugin-avnd/Crousti/GppCoroutines.hpp b/src/plugins/score-plugin-avnd/Crousti/GppCoroutines.hpp
index 676468cded..afebcbe7a7 100644
--- a/src/plugins/score-plugin-avnd/Crousti/GppCoroutines.hpp
+++ b/src/plugins/score-plugin-avnd/Crousti/GppCoroutines.hpp
@@ -266,6 +266,7 @@ struct handle_update
           requires { C::vertex; } || requires { C::index; })
       {
         auto buf = rhi.newBuffer(buffer_type<C>(), usage<C>(), command.size);
+        buf->setName("GppCoroutines::vbuf_or_ibuf");
         buf->create();
         return reinterpret_cast<typename C::return_type>(buf);
       }
@@ -279,6 +280,7 @@ struct handle_update
           requires { C::ubo; } || requires { C::storage; })
       {
         auto buf = rhi.newBuffer(buffer_type<C>(), usage<C>(), command.size);
+        buf->setName("GppCoroutines::ubo_or_ssbo");
         buf->create();
 
         // Replace it in our bindings
diff --git a/src/plugins/score-plugin-avnd/Crousti/GppShaders.hpp b/src/plugins/score-plugin-avnd/Crousti/GppShaders.hpp
index 9456a45446..c4d8951803 100644
--- a/src/plugins/score-plugin-avnd/Crousti/GppShaders.hpp
+++ b/src/plugins/score-plugin-avnd/Crousti/GppShaders.hpp
@@ -154,7 +154,7 @@ struct generate_shaders
             ,
             C::binding(), C::name());
 
-        boost::pfr::for_each_field(field, write_binding{shader});
+        avnd::pfr::for_each_field(field, write_binding{shader});
 
         shader += fmt::format("}};\n\n");
       }
@@ -166,7 +166,7 @@ struct generate_shaders
             ,
             C::binding(), C::name());
 
-        boost::pfr::for_each_field(field, write_binding{shader});
+        avnd::pfr::for_each_field(field, write_binding{shader});
 
         shader += fmt::format("}};\n\n");
       }
@@ -180,24 +180,24 @@ struct generate_shaders
     std::string shader = "#version 450\n\n";
 
     if constexpr(requires { lay.vertex_input; })
-      boost::pfr::for_each_field(lay.vertex_input, write_input{shader});
+      avnd::pfr::for_each_field(lay.vertex_input, write_input{shader});
     else if constexpr(requires { typename T::vertex_input; })
-      boost::pfr::for_each_field(typename T::vertex_input{}, write_input{shader});
+      avnd::pfr::for_each_field(typename T::vertex_input{}, write_input{shader});
     else
-      boost::pfr::for_each_field(
+      avnd::pfr::for_each_field(
           DefaultPipeline::layout::vertex_input{}, write_input{shader});
 
     if constexpr(requires { lay.vertex_output; })
-      boost::pfr::for_each_field(lay.vertex_output, write_output{shader});
+      avnd::pfr::for_each_field(lay.vertex_output, write_output{shader});
     else if constexpr(requires { typename T::vertex_output; })
-      boost::pfr::for_each_field(typename T::vertex_output{}, write_output{shader});
+      avnd::pfr::for_each_field(typename T::vertex_output{}, write_output{shader});
 
     shader += "\n";
 
     if constexpr(requires { lay.bindings; })
-      boost::pfr::for_each_field(lay.bindings, write_bindings{shader});
+      avnd::pfr::for_each_field(lay.bindings, write_bindings{shader});
     else if constexpr(requires { typename T::bindings; })
-      boost::pfr::for_each_field(typename T::bindings{}, write_bindings{shader});
+      avnd::pfr::for_each_field(typename T::bindings{}, write_bindings{shader});
 
     return shader;
   }
@@ -208,21 +208,21 @@ struct generate_shaders
     std::string shader = "#version 450\n\n";
 
     if constexpr(requires { lay.fragment_input; })
-      boost::pfr::for_each_field(lay.fragment_input, write_input{shader});
+      avnd::pfr::for_each_field(lay.fragment_input, write_input{shader});
     else if constexpr(requires { typename T::fragment_input; })
-      boost::pfr::for_each_field(typename T::fragment_input{}, write_input{shader});
+      avnd::pfr::for_each_field(typename T::fragment_input{}, write_input{shader});
 
     if constexpr(requires { lay.fragment_output; })
-      boost::pfr::for_each_field(lay.fragment_output, write_output{shader});
+      avnd::pfr::for_each_field(lay.fragment_output, write_output{shader});
     else if constexpr(requires { typename T::fragment_output; })
-      boost::pfr::for_each_field(typename T::fragment_output{}, write_output{shader});
+      avnd::pfr::for_each_field(typename T::fragment_output{}, write_output{shader});
 
     shader += "\n";
 
     if constexpr(requires { lay.bindings; })
-      boost::pfr::for_each_field(lay.bindings, write_bindings{shader});
+      avnd::pfr::for_each_field(lay.bindings, write_bindings{shader});
     else if constexpr(requires { typename T::bindings; })
-      boost::pfr::for_each_field(typename T::bindings{}, write_bindings{shader});
+      avnd::pfr::for_each_field(typename T::bindings{}, write_bindings{shader});
 
     return shader;
   }
@@ -250,7 +250,7 @@ struct generate_shaders
     fstr.resize(fstr.size() - 2);
     fstr += ") in;\n\n";
 
-    boost::pfr::for_each_field(lay.bindings, write_bindings{fstr});
+    avnd::pfr::for_each_field(lay.bindings, write_bindings{fstr});
 
     return fstr;
   }
diff --git a/src/plugins/score-plugin-avnd/Crousti/GpuComputeNode.hpp b/src/plugins/score-plugin-avnd/Crousti/GpuComputeNode.hpp
index 8f5d2b6308..7ff165505e 100644
--- a/src/plugins/score-plugin-avnd/Crousti/GpuComputeNode.hpp
+++ b/src/plugins/score-plugin-avnd/Crousti/GpuComputeNode.hpp
@@ -65,6 +65,7 @@ struct GpuComputeRenderer final : ComputeRendererBaseType<Node_T>
   QRhiComputePipeline* m_pipeline{};
 
   bool m_createdPipeline{};
+  bool m_initialized{};
 
   int sampler_k = 0;
   int ubo_k = 0;
@@ -175,7 +176,7 @@ struct GpuComputeRenderer final : ComputeRendererBaseType<Node_T>
     QVarLengthArray<QRhiShaderResourceBinding, 8> bindings;
 
     using bindings_type = decltype(Node_T::layout::bindings);
-    boost::pfr::for_each_field(
+    avnd::pfr::for_each_field(
         bindings_type{}, [&](auto f) { bindings.push_back(initBinding(renderer, f)); });
 
     srb->setBindings(bindings.begin(), bindings.end());
@@ -230,8 +231,28 @@ struct GpuComputeRenderer final : ComputeRendererBaseType<Node_T>
     createdUbos[ubo_type::binding()] = ubo;
   }
 
-  void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override
+  // Compute renderers own a single shared compute pipeline + SRB; they
+  // don't allocate any per-output-edge state. Edge add/remove is a no-op
+  // for them. These overrides are required because NodeRenderer
+  // ::removeOutputPass is now pure-virtual, and Graph.cpp's incremental
+  // path drives renderers through addOutputPass (the per-edge passes a
+  // compute node simply doesn't have).
+  void removeOutputPass(score::gfx::RenderList&, score::gfx::Edge&) override { }
+  void addOutputPass(
+      score::gfx::RenderList&, score::gfx::Edge&, QRhiResourceUpdateBatch&) override
   {
+  }
+
+  // All edge-independent setup lives in initState(), mirroring
+  // CustomGpuRenderer in GpuNode.hpp. The incremental edge-rewire path
+  // (Graph.cpp) only calls initState()/releaseState()/addOutputPass() on
+  // newly-spawned renderers; a compute node inserted live would otherwise
+  // never allocate its pipeline/SRB and run against uninitialised state.
+  void initState(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override
+  {
+    if(m_initialized)
+      return;
+
     auto& parent = node();
     if constexpr(requires { state->prepare(); })
     {
@@ -255,6 +276,13 @@ struct GpuComputeRenderer final : ComputeRendererBaseType<Node_T>
       SCORE_ASSERT(m_pipeline->create());
       m_createdPipeline = true;
     }
+
+    m_initialized = true;
+  }
+
+  void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override
+  {
+    initState(renderer, res);
   }
 
   std::vector<QRhiShaderResourceBinding> tmp;
@@ -337,8 +365,11 @@ struct GpuComputeRenderer final : ComputeRendererBaseType<Node_T>
     }
   }
 
-  void release(score::gfx::RenderList& r) override
+  void releaseState(score::gfx::RenderList& r) override
   {
+    if(!m_initialized)
+      return;
+
     m_createdPipeline = false;
 
     // Release the object's internal states
@@ -382,6 +413,13 @@ struct GpuComputeRenderer final : ComputeRendererBaseType<Node_T>
 
     sampler_k = 0;
     ubo_k = 0;
+
+    m_initialized = false;
+  }
+
+  void release(score::gfx::RenderList& r) override
+  {
+    releaseState(r);
   }
 
   void runCompute(
diff --git a/src/plugins/score-plugin-avnd/Crousti/GpuNode.hpp b/src/plugins/score-plugin-avnd/Crousti/GpuNode.hpp
index c202ab2d27..83827b5f9b 100644
--- a/src/plugins/score-plugin-avnd/Crousti/GpuNode.hpp
+++ b/src/plugins/score-plugin-avnd/Crousti/GpuNode.hpp
@@ -6,6 +6,7 @@
 #include <Crousti/Metadatas.hpp>
 #include <Gfx/Graph/NodeRenderer.hpp>
 #include <Gfx/Graph/RenderList.hpp>
+#include <ossia/detail/algorithms.hpp>
 #include <Gfx/Graph/Uniforms.hpp>
 
 // #include <gpp/ports.hpp>
@@ -27,9 +28,17 @@ struct CustomGpuRenderer final : score::gfx::NodeRenderer
 
   score::gfx::PassMap m_p;
 
+  // Per-pass "pipeline + SRB created" flags, kept index-parallel with m_p
+  // and `states` (same push_back in addOutputPass / same erase in
+  // removeOutputPass). A single global m_createdPipeline could not handle
+  // a pass added live onto an update()-driven node: the first frame would
+  // (re)create already-live passes, or skip the new one entirely. Each
+  // pass now gates its own srb->create()/pipeline->create().
+  ossia::small_vector<bool, 2> m_passCreated;
+
   score::gfx::MeshBuffers m_meshBuffer{};
 
-  bool m_createdPipeline{};
+  QRhiShaderResourceBindings* m_srb{};
 
   int sampler_k = 0;
   ossia::flat_map<int, QRhiBuffer*> createdUbos;
@@ -113,14 +122,14 @@ struct CustomGpuRenderer final : score::gfx::NodeRenderer
     if constexpr(requires { decltype(Node_T::layout::bindings){}; })
     {
       using bindings_type = decltype(Node_T::layout::bindings);
-      boost::pfr::for_each_field(bindings_type{}, [&](auto f) {
+      avnd::pfr::for_each_field(bindings_type{}, [&](auto f) {
         bindings.push_back(initBinding(renderer, f));
       });
     }
     else if constexpr(requires { sizeof(typename Node_T::layout::bindings); })
     {
       using bindings_type = typename Node_T::layout::bindings;
-      boost::pfr::for_each_field(bindings_type{}, [&](auto f) {
+      avnd::pfr::for_each_field(bindings_type{}, [&](auto f) {
         bindings.push_back(initBinding(renderer, f));
       });
     }
@@ -201,18 +210,18 @@ struct CustomGpuRenderer final : score::gfx::NodeRenderer
     createdUbos[ubo_type::binding()] = ubo;
   }
 
-  void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override
+  void initState(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override
   {
-    auto& parent = node();
-    if constexpr(requires { states[0].prepare(); })
-    {
-      for(auto& state : states)
-      {
-        parent.processControlIn(
-            *this, *state, m_last_message, parent.last_message, parent.m_ctx);
-        state.prepare();
-      }
-    }
+    if(m_initialized)
+      return;
+
+    // NB: prepare()/processControlIn for graphics nodes is NOT invoked
+    // here — `states` is empty at initState time (states are constructed
+    // per-edge in addOutputPass), so there is nothing to prepare. The old
+    // `states[0].prepare()` detection was also doubly-wrong: `states[0]`
+    // is a shared_ptr, so the requires-expression never matched, and even
+    // if it had, indexing an empty vector is UB. The prepare/control-in
+    // call now happens in addOutputPass right after each state is built.
 
     if(m_meshBuffer.buffers.empty())
     {
@@ -224,34 +233,154 @@ struct CustomGpuRenderer final : score::gfx::NodeRenderer
     avnd::input_introspection<Node_T>::for_all(
         [this, &renderer](auto f) { init_input(renderer, f); });
 
-    // Create the initial srbs
-    // TODO when implementing multi-pass, we may have to
-    // move this back inside the loop below as they may depend on the pipelines...
-    auto srb = initBindings(renderer);
+    // Create the shared shader resource bindings
+    m_srb = initBindings(renderer);
 
-    // Create the states and pipelines
-    for(score::gfx::Edge* edge : parent.output[0]->edges)
+    m_initialized = true;
+  }
+
+  void addOutputPass(
+      score::gfx::RenderList& renderer, score::gfx::Edge& edge,
+      QRhiResourceUpdateBatch& res) override
+  {
+    auto& parent = node();
+    auto rt = renderer.renderTargetForOutput(edge);
+    if(rt.renderTarget)
     {
-      auto rt = renderer.renderTargetForOutput(*edge);
-      if(rt.renderTarget)
+      states.push_back(std::make_shared<Node_T>());
+      prepareNewState(states.back(), parent);
+
+      // Graphics nodes that declare prepare(): apply any pending control
+      // input and run prepare() on the freshly-constructed state, here —
+      // not in initState, where `states` is still empty. Detection uses
+      // operator-> because states.back() is a shared_ptr<Node_T>.
+      if constexpr(requires { states.back()->prepare(); })
       {
-        states.push_back(std::make_shared<Node_T>());
-        prepareNewState(states.back(), parent);
+        parent.processControlIn(
+            *this, *states.back(), m_last_message, parent.last_message, parent.m_ctx);
+        states.back()->prepare();
+      }
+
+      auto ps = createRenderPipeline(renderer, rt);
+      ps->setShaderResourceBindings(m_srb);
+
+      m_p.emplace_back(&edge, score::gfx::Pass{rt, score::gfx::Pipeline{ps, m_srb}, nullptr});
+      m_passCreated.push_back(false);
+
+      // No update step: we can directly create this pass's pipeline here.
+      // The SRB is shared across all passes (m_srb); creating it is
+      // idempotent for our purposes, and the per-pass flag tracks the
+      // pipeline that is genuinely per-edge.
+      if constexpr(!requires { &Node_T::update; })
+      {
+        SCORE_ASSERT(m_srb->create());
+        SCORE_ASSERT(ps->create());
+        m_passCreated.back() = true;
+      }
+    }
+  }
 
-        auto ps = createRenderPipeline(renderer, rt);
-        ps->setShaderResourceBindings(srb);
+  bool hasOutputPassForEdge(score::gfx::Edge& edge) const override
+  {
+    return ossia::find_if(m_p, [&](const auto& p) { return p.first == &edge; })
+           != m_p.end();
+  }
+
+  void removeOutputPass(score::gfx::RenderList&, score::gfx::Edge& edge) override
+  {
+    // Mirror addOutputPass: each edge owns one entry in m_p (pipeline +
+    // SRB) and one parallel entry in `states`. Release both. The shared
+    // m_srb pointer is owned by initState; Pass::p.srb refers to the
+    // SAME pointer (see addOutputPass), so null it out before
+    // Pipeline::release() to avoid double-deleteLater of the shared SRB.
+    auto it
+        = ossia::find_if(m_p, [&](const auto& p) { return p.first == &edge; });
+    if(it == m_p.end())
+      return;
+    const auto idx = std::distance(m_p.begin(), it);
+    it->second.p.srb = nullptr; // shared with siblings — owned by initState
+    it->second.release();
+    m_p.erase(it);
+    if((std::size_t)idx < states.size())
+      states.erase(states.begin() + idx);
+    if((std::size_t)idx < m_passCreated.size())
+      m_passCreated.erase(m_passCreated.begin() + idx);
+  }
+
+  void releaseState(score::gfx::RenderList& r) override
+  {
+    if(!m_initialized)
+      return;
 
-        m_p.emplace_back(edge, score::gfx::Pipeline{ps, srb});
+    m_passCreated.clear();
 
-        // No update step: we can directly create the pipeline here
-        if constexpr(!requires { &Node_T::update; })
+    // Release the object's internal states
+    if constexpr(requires { &Node_T::release; })
+    {
+      for(auto& state : states)
+      {
+        for(auto& promise : state->release())
         {
-          SCORE_ASSERT(srb->create());
-          SCORE_ASSERT(ps->create());
-          m_createdPipeline = true;
+          gpp::qrhi::handle_release handler{*r.state.rhi};
+          visit(handler, promise.current_command);
         }
       }
     }
+    states.clear();
+
+    // Release the allocated mesh buffers
+    m_meshBuffer = {};
+
+    // Release the allocated textures
+    for(auto& [id, tex] : this->createdTexs)
+      tex->deleteLater();
+    this->createdTexs.clear();
+
+    // Release the allocated samplers
+    for(auto& [id, sampl] : this->createdSamplers)
+      sampl->deleteLater();
+    this->createdSamplers.clear();
+
+    // Release the allocated ubos
+    for(auto& [id, ubo] : this->createdUbos)
+      ubo->deleteLater();
+    this->createdUbos.clear();
+
+    // Release the allocated rts
+    for(auto [port, rt] : m_rts)
+      rt.release();
+    m_rts.clear();
+
+    // Release the allocated pipelines. Each Pass::p.srb refers to the
+    // SAME shared m_srb (see addOutputPass); null it out per-pass before
+    // Pipeline::release() so the shared SRB isn't deleteLater'd once per
+    // pass (it survived previously only via QRhi's QSet dedup), then
+    // delete it exactly once below — covering the m_p-empty case too,
+    // which formerly leaked m_srb. Mirrors removeOutputPass.
+    for(auto& pass : m_p)
+    {
+      pass.second.p.srb = nullptr; // shared — owned by initState
+      pass.second.release();
+    }
+    m_p.clear();
+    if(m_srb)
+      m_srb->deleteLater();
+    m_srb = nullptr;
+
+    m_meshBuffer = {};
+
+    sampler_k = 0;
+
+    m_initialized = false;
+  }
+
+  void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override
+  {
+    initState(renderer, res);
+
+    auto& parent = node();
+    for(score::gfx::Edge* edge : parent.output[0]->edges)
+      addOutputPass(renderer, *edge, res);
   }
 
   std::vector<QRhiShaderResourceBinding> tmp;
@@ -289,14 +418,22 @@ struct CustomGpuRenderer final : score::gfx::NodeRenderer
       // as we have to take into account that buffers could be allocated, freed, etc.
       // and thus updated in the shader resource bindings
       SCORE_ASSERT(states.size() == m_p.size());
+      SCORE_ASSERT(states.size() == m_passCreated.size());
       //SCORE_SOFT_ASSERT(state.size() == edges);
       for(int k = 0; k < states.size(); k++)
       {
         auto& state = *states[k];
         auto& pass = m_p[k].second;
 
+        // Per-pass creation flag: a pass added live (e.g. a new output
+        // edge onto an update()-driven node) starts at false and gets its
+        // srb/pipeline created on the next update; passes already live
+        // keep their pipeline. A single global flag would skip the new
+        // pass entirely (or needlessly destroy the live ones).
+        const bool created = m_passCreated[k];
+
         bool srb_touched{false};
-        tmp.assign(pass.srb->cbeginBindings(), pass.srb->cendBindings());
+        tmp.assign(pass.p.srb->cbeginBindings(), pass.p.srb->cendBindings());
         for(auto& promise : state.update())
         {
           using ret_type = decltype(promise.feedback_value);
@@ -307,75 +444,24 @@ struct CustomGpuRenderer final : score::gfx::NodeRenderer
 
         if(srb_touched)
         {
-          if(m_createdPipeline)
-            pass.srb->destroy();
+          if(created)
+            pass.p.srb->destroy();
 
-          pass.srb->setBindings(tmp.begin(), tmp.end());
+          pass.p.srb->setBindings(tmp.begin(), tmp.end());
         }
 
-        if(!m_createdPipeline)
+        if(!created)
         {
-          SCORE_ASSERT(pass.srb->create());
-          SCORE_ASSERT(pass.pipeline->create());
+          SCORE_ASSERT(pass.p.srb->create());
+          SCORE_ASSERT(pass.p.pipeline->create());
+          m_passCreated[k] = true;
         }
       }
-      m_createdPipeline = true;
       tmp.clear();
     }
   }
 
-  void release(score::gfx::RenderList& r) override
-  {
-    m_createdPipeline = false;
-
-    // Release the object's internal states
-    if constexpr(requires { &Node_T::release; })
-    {
-      for(auto& state : states)
-      {
-        for(auto& promise : state->release())
-        {
-          gpp::qrhi::handle_release handler{*r.state.rhi};
-          visit(handler, promise.current_command);
-        }
-      }
-    }
-    states.clear();
-
-    // Release the allocated mesh buffers
-    m_meshBuffer = {};
-
-    // Release the allocated textures
-    for(auto& [id, tex] : this->createdTexs)
-      tex->deleteLater();
-    this->createdTexs.clear();
-
-    // Release the allocated samplers
-    for(auto& [id, sampl] : this->createdSamplers)
-      sampl->deleteLater();
-    this->createdSamplers.clear();
-
-    // Release the allocated ubos
-    for(auto& [id, ubo] : this->createdUbos)
-      ubo->deleteLater();
-    this->createdUbos.clear();
-
-    // Release the allocated rts
-    // TODO investigate why reference does not work here:
-    for(auto [port, rt] : m_rts)
-      rt.release();
-    m_rts.clear();
-
-    // Release the allocated pipelines
-    for(auto& pass : m_p)
-      pass.second.release();
-    m_p.clear();
-
-    m_meshBuffer = {};
-    m_createdPipeline = false;
-
-    sampler_k = 0;
-  }
+  void release(score::gfx::RenderList& r) override { releaseState(r); }
 
   void runInitialPasses(
       score::gfx::RenderList& renderer, QRhiCommandBuffer& commands,
diff --git a/src/plugins/score-plugin-avnd/Crousti/GpuUtils.hpp b/src/plugins/score-plugin-avnd/Crousti/GpuUtils.hpp
index a18b24f0fa..99ce89117a 100644
--- a/src/plugins/score-plugin-avnd/Crousti/GpuUtils.hpp
+++ b/src/plugins/score-plugin-avnd/Crousti/GpuUtils.hpp
@@ -8,6 +8,7 @@
 #include <Crousti/GppCoroutines.hpp>
 #include <Crousti/GppShaders.hpp>
 #include <Crousti/MessageBus.hpp>
+#include <Crousti/SceneConcepts.hpp>
 #include <Crousti/TextureConversion.hpp>
 #include <Crousti/TextureFormat.hpp>
 #include <Gfx/GfxExecNode.hpp>
@@ -35,6 +36,7 @@
 #include <avnd/introspection/output.hpp>
 #include <fmt/format.h>
 #include <gpp/layout.hpp>
+#include <halp/texture.hpp>
 
 #include <score_plugin_avnd_export.h>
 
@@ -170,6 +172,8 @@ struct GpuProcessIns
   {
     using node_type = std::remove_cvref_t<decltype(gpu.node())>;
     auto& node = const_cast<node_type&>(gpu.node());
+    if(field_index >= mess.input.size())
+      return;
     auto val = ossia::get_if<ossia::render_target_spec>(&mess.input[field_index]);
     if(!val)
       return;
@@ -181,6 +185,8 @@ struct GpuProcessIns
   {
     using node_type = std::remove_cvref_t<decltype(gpu.node())>;
     auto& node = const_cast<node_type&>(gpu.node());
+    if(field_index >= mess.input.size())
+      return;
     auto val = ossia::get_if<ossia::render_target_spec>(&mess.input[field_index]);
     if(!val)
       return;
@@ -190,10 +196,24 @@ struct GpuProcessIns
   template <avnd::geometry_port Field, std::size_t NField>
   void operator()(Field& t, avnd::field_index<NField> field_index)
   {
-    using node_type = std::remove_cvref_t<decltype(gpu.node())>;
-    auto& node = const_cast<node_type&>(gpu.node());
+    // Intentional no-op. Geometry data flows through its own publish path
+    // (geometry_inputs_storage::readInputGeometries / etc.); the
+    // GpuProcessIns visitor only handles per-message control fields
+    // (texture/parameter) — geometry data is not in the control message.
+    // The empty body keeps GpuProcessIns instantiable for nodes whose
+    // input list contains geometry fields without forcing them to hit
+    // the `= delete` catch-all at the end of this struct.
+  }
 
-    // FIXME
+  template <scene_port Field, std::size_t NField>
+  void operator()(Field& t, avnd::field_index<NField> field_index)
+  {
+    // Intentional no-op — same reasoning as the geometry_port overload above.
+    // Scene data flows through scene_inputs_storage / scene_outputs_storage
+    // separately; GpuProcessIns only handles per-message control fields.
+    // The empty body keeps GpuProcessIns instantiable for nodes whose
+    // input list contains scene_port fields without hitting the `= delete`
+    // catch-all at the end of this struct.
   }
 
   void operator()(auto& t, auto field_index) = delete;
@@ -423,12 +443,24 @@ struct port_to_type_enum
   {
     return score::gfx::Types::Image;
   }
+  template <std::size_t I, avnd::gpu_render_target_output_port F>
+  constexpr auto operator()(avnd::field_reflection<I, F> p)
+  {
+    return score::gfx::Types::Image;
+  }
 
   template <std::size_t I, avnd::geometry_port F>
   constexpr auto operator()(avnd::field_reflection<I, F> p)
   {
     return score::gfx::Types::Geometry;
   }
+  // Scene ports reuse Types::Geometry — a scene is a richer form of geometry.
+  template <std::size_t I, scene_port F>
+    requires(!avnd::geometry_port<F>)
+  constexpr auto operator()(avnd::field_reflection<I, F> p)
+  {
+    return score::gfx::Types::Geometry;
+  }
   template <std::size_t I, avnd::mono_audio_port F>
   constexpr auto operator()(avnd::field_reflection<I, F> p)
   {
@@ -459,9 +491,25 @@ struct port_to_type_enum
   {
     using value_type = std::remove_cvref_t<decltype(F::value)>;
 
-    if constexpr(std::is_aggregate_v<value_type>)
+    if constexpr(std::is_array_v<value_type>)
+    {
+      static constexpr int sz = sizeof(value_type) / sizeof(value_type{}[0]);
+      if constexpr(sz == 2)
+      {
+        return score::gfx::Types::Vec2;
+      }
+      else if constexpr(sz == 3)
+      {
+        return score::gfx::Types::Vec3;
+      }
+      else if constexpr(sz == 4)
+      {
+        return score::gfx::Types::Vec4;
+      }
+    }
+    else if constexpr(std::is_aggregate_v<value_type>)
     {
-      constexpr int sz = boost::pfr::tuple_size_v<value_type>;
+      static constexpr int sz = avnd::pfr::tuple_size_v<value_type>;
       if constexpr(sz == 2)
       {
         return score::gfx::Types::Vec2;
@@ -484,19 +532,71 @@ struct port_to_type_enum
   }
 };
 
+// Compile-time port flags derived from a field's declarative metadata.
+// Inspects:
+//   - `texture_target` (texture_kind_of) — non-2D textures bypass the
+//     local-RT allocation and grab the upstream texture directly.
+//   - `samplable_depth` (samplable_depth_of) — opt-in to having the
+//     framework allocate a sampleable depth attachment on the producing
+//     edge's RT and expose its handle through `texture.depth_handle`,
+//     mirroring the semantics CSF/ISF shaders get via "DEPTH": true.
+template <typename Field>
+constexpr score::gfx::Flag port_flags_for_field() noexcept
+{
+  if constexpr(avnd::gpu_texture_port<Field>)
+  {
+    constexpr auto kind = halp::texture_kind_of<Field>();
+    constexpr bool nonD2 = (kind != halp::texture_kind::texture_2d);
+    constexpr bool depth = halp::samplable_depth_of<Field>();
+    if constexpr(nonD2 && depth)
+      return score::gfx::Flag::GrabsFromSource | score::gfx::Flag::SamplableDepth;
+    else if constexpr(nonD2)
+      return score::gfx::Flag::GrabsFromSource;
+    else if constexpr(depth)
+      return score::gfx::Flag::SamplableDepth;
+  }
+  return score::gfx::Flag{};
+}
+
+// Map QRhi's depth-format taxonomy onto halp's depth_format_t.
+// The 4-arg subset matches every depth format score's createRenderTarget
+// can produce (today always D32F, but the API accepts the others).
+inline constexpr halp::gpu_texture::depth_format_t qrhiToHalpDepthFormat(
+    QRhiTexture::Format f) noexcept
+{
+  using D = halp::gpu_texture::depth_format_t;
+  switch(f)
+  {
+    case QRhiTexture::D16:   return D::D16;
+    case QRhiTexture::D24:   return D::D24;
+    case QRhiTexture::D24S8: return D::D24S8;
+    case QRhiTexture::D32F:  return D::D32F;
+    default: break;
+  }
+  return D::D32F;
+}
+
 template <typename Node_T>
 inline void initGfxPorts(auto* self, auto& input, auto& output)
 {
   avnd::input_introspection<Node_T>::for_all(
       [self, &input]<typename Field, std::size_t I>(avnd::field_reflection<I, Field> f) {
     static constexpr auto type = port_to_type_enum{}(f);
-    input.push_back(new score::gfx::Port{self, {}, type, {}, {}});
+    static constexpr auto flags = port_flags_for_field<Field>();
+    input.push_back(new score::gfx::Port{self, {}, type, flags, {}});
   });
   avnd::output_introspection<Node_T>::for_all(
       [self,
        &output]<typename Field, std::size_t I>(avnd::field_reflection<I, Field> f) {
     static constexpr auto type = port_to_type_enum{}(f);
-    output.push_back(new score::gfx::Port{self, {}, type, {}, {}});
+    // port_flags_for_field encodes INPUT-side sink semantics
+    // (GrabsFromSource → "sample the upstream's texture directly";
+    // SamplableDepth → "ask the producer for a sampleable depth
+    // attachment"). Neither has any meaning on an OUTPUT port — emitting
+    // them here would make the graph treat this node's own output as if it
+    // grabbed from / sampled some upstream source. Outputs carry no such
+    // flags.
+    output.push_back(new score::gfx::Port{self, {}, type, score::gfx::Flag{}, {}});
   });
 }
 
@@ -690,6 +790,13 @@ struct geometry_inputs_storage<T>
           allocated.push_back(buf);
           meshes.buffers[buffer_index] = buf;
         }
+        else if(auto* existing = meshes.buffers[buffer_index];
+                existing && existing->size() < bytesize)
+        {
+          // Buffer exists but is too small — resize it.
+          existing->setSize(bytesize);
+          existing->create();
+        }
 
         res->uploadStaticBuffer(meshes.buffers[buffer_index], 0, bytesize, data);
       }, [&](auto& write_buf, int buffer_index, void* handle) {
@@ -727,9 +834,11 @@ template <typename T>
   requires(avnd::geometry_input_introspection<T>::size == 0)
 struct geometry_inputs_storage<T>
 {
-  static void readInputBuffers(auto&&...) { }
+  static void readInputGeometries(auto&&...) { }
 
   static void inputAboutToFinish(auto&&...) { }
+
+  static void release(auto&&...) { }
 };
 
 template<typename T>
@@ -1034,7 +1143,7 @@ struct texture_inputs_storage<T>
   template <typename Tex>
   QRhiTexture* createInput(
       score::gfx::RenderList& renderer, score::gfx::Port* port, Tex& texture_spec,
-      const score::gfx::RenderTargetSpecs& spec)
+      const score::gfx::RenderTargetSpecs& spec, bool wantsSamplableDepth = false)
   {
     static constexpr auto flags
         = QRhiTexture::RenderTarget | QRhiTexture::UsedAsTransferSource;
@@ -1054,8 +1163,14 @@ struct texture_inputs_storage<T>
         fmt, spec.size, 1, flags);
 
     SCORE_ASSERT(texture->create());
+    // wantsSamplableDepth implies wantsDepth: createRenderTarget allocates
+    // a sampleable single-sample depth texture (with MSAA-resolve when
+    // available) instead of a renderbuffer / non-resolve depth target.
+    // Same shape ISF/CSF inputs get when their port has SamplableDepth.
+    const bool wantsDepth = renderer.requiresDepth(*port) || wantsSamplableDepth;
     m_rts[port] = score::gfx::createRenderTarget(
-        renderer.state, texture, renderer.samples(), renderer.requiresDepth(*port));
+        renderer.state, texture, renderer.samples(),
+        wantsDepth, wantsSamplableDepth);
     return texture;
   }
 
@@ -1065,6 +1180,21 @@ struct texture_inputs_storage<T>
     avnd::texture_input_introspection<T>::for_all_n2(
         avnd::get_inputs<T>(*self.state),
         [&]<typename F, std::size_t K, std::size_t N>(F& t, avnd::predicate_index<K>, avnd::field_index<N>) {
+      // Non-2D GPU texture inputs (cube / array / 3D) don't get a local
+      // render target — the port carries Flag::GrabsFromSource (set by
+      // initGfxPorts via texture_kind_of<F>()), the graph will populate
+      // t.texture.handle through updateInputTexture when the edge
+      // resolves. Skipping the allocation here avoids wasting a 2D
+      // colour attachment that would never be rendered into anyway.
+      if constexpr(avnd::gpu_texture_port<F>
+                   && halp::texture_kind_of<F>() != halp::texture_kind::texture_2d)
+      {
+        t.texture.kind = halp::texture_kind_of<F>();
+        // Handle + size populated later by updateInputTexture once the
+        // upstream is resolved.
+        return;
+      }
+
       auto& parent = self.node();
       auto spec = parent.resolveRenderTargetSpecs(N, renderer);
       if constexpr(requires {
@@ -1076,7 +1206,10 @@ struct texture_inputs_storage<T>
         spec.size.rheight() = t.request_height;
       }
 
-      auto tex = createInput(renderer, parent.input[N], t.texture, spec);
+      constexpr bool wantsSamplableDepth
+          = avnd::gpu_texture_port<F> && halp::samplable_depth_of<F>();
+      auto tex = createInput(
+          renderer, parent.input[N], t.texture, spec, wantsSamplableDepth);
       if constexpr(avnd::cpu_texture_port<F>)
       {
         t.texture.width = spec.size.width();
@@ -1087,6 +1220,16 @@ struct texture_inputs_storage<T>
         t.texture.handle = tex;
         t.texture.width = spec.size.width();
         t.texture.height = spec.size.height();
+        if constexpr(wantsSamplableDepth)
+        {
+          // The local RT just allocated owns a sampleable depth texture
+          // that the upstream renders into when the edge runs — same
+          // pointer, stable for the RT's lifetime, no per-frame refresh.
+          const auto& rt = m_rts[parent.input[N]];
+          t.texture.depth_handle = rt.depthTexture;
+          if(rt.depthTexture)
+            t.texture.depth_format = qrhiToHalpDepthFormat(rt.depthTexture->format());
+        }
       }
     });
   }
@@ -1196,7 +1339,7 @@ struct texture_inputs_storage<T>
 template <avnd::cpu_texture Tex>
 static QRhiTexture* updateTexture(auto& self, score::gfx::RenderList& renderer, int k, const Tex& cpu_tex)
 {
-  auto& [sampler, texture] = self.m_samplers[k];
+  auto& [sampler, texture, fb_] = self.m_samplers[k];
   if(texture)
   {
     auto sz = texture->pixelSize();
@@ -1213,8 +1356,8 @@ static QRhiTexture* updateTexture(auto& self, score::gfx::RenderList& renderer,
         QRhiTexture::Flag{});
     newtex->create();
     for(auto& [edge, pass] : self.m_p)
-      if(pass.srb)
-        score::gfx::replaceTexture(*pass.srb, sampler, newtex);
+      if(pass.p.srb)
+        score::gfx::replaceTexture(*pass.p.srb, sampler, newtex);
     texture = newtex;
 
     if(oldtex && oldtex != &renderer.emptyTexture())
@@ -1227,8 +1370,8 @@ static QRhiTexture* updateTexture(auto& self, score::gfx::RenderList& renderer,
   else
   {
     for(auto& [edge, pass] : self.m_p)
-      if(pass.srb)
-        score::gfx::replaceTexture(*pass.srb, sampler, &renderer.emptyTexture());
+      if(pass.p.srb)
+        score::gfx::replaceTexture(*pass.p.srb, sampler, &renderer.emptyTexture());
 
     return &renderer.emptyTexture();
   }
@@ -1362,7 +1505,7 @@ struct texture_outputs_storage<T>
   void release(auto& self, score::gfx::RenderList& r)
   {
     // Free outputs
-    for(auto& [sampl, texture] : self.m_samplers)
+    for(auto& [sampl, texture, fb_] : self.m_samplers)
     {
       if(texture != &r.emptyTexture())
         texture->deleteLater();
@@ -1491,7 +1634,7 @@ struct geometry_outputs_storage<T>
       SCORE_ASSERT(it != edge_sink->node->input.end());
       int n = it - edge_sink->node->input.begin();
 
-      rendered_node->second->process(n, spc);
+      rendered_node->second->process(n, spc, edge.source);
 
       // 3. Same for transform3d
 
@@ -1518,6 +1661,12 @@ struct geometry_outputs_storage<T>
         avnd::get_outputs(state),
         [&](auto& field, auto pred) { this->upload(renderer, field, edge, pred); });
   }
+
+  // Lifecycle parity with the other *_outs storages. The geometry_spec
+  // wrapper carries non-owning pointers + transform values today, so
+  // release is a no-op — wired so future RHI handles on the storage
+  // release cleanly.
+  void release(score::gfx::RenderList&) noexcept { }
 };
 
 
@@ -1529,7 +1678,131 @@ struct geometry_outputs_storage<T>
   {
 
   }
+  static void release(auto&&...) noexcept { }
+};
+
+// Scene output support (Crousti-side pending promotion to avendish).
+// The `scene_port` concept and `scene_dirt_flags` live in SceneConcepts.hpp
+// so the port-creation visitor in ProcessModelPortInit.hpp can reuse them.
+
+template <typename Field>
+using is_scene_port_t = boost::mp11::mp_bool<scene_port<Field>>;
+
+template <typename T>
+using scene_output_introspection =
+    avnd::predicate_introspection<typename avnd::outputs_type<T>::type, is_scene_port_t>;
+
+template <typename T>
+using scene_input_introspection =
+    avnd::predicate_introspection<typename avnd::inputs_type<T>::type, is_scene_port_t>;
+
+// Scene input transport: NodeRenderer::process(port, scene_spec, source)
+// already merges multi-producer scenes into `this->scene`, so scene_inputs_storage
+// only needs to copy that merged scene_spec into each halp scene input field
+// before operator()() runs. Cheap (shared_ptr assignment), no decode.
+template <typename T>
+struct scene_inputs_storage;
+
+template <typename T>
+  requires(scene_input_introspection<T>::size > 0)
+struct scene_inputs_storage<T>
+{
+  void readInputScenes(const ossia::scene_spec& scene, auto& state)
+  {
+    scene_input_introspection<T>::for_all(
+        avnd::get_inputs<T>(state), [&](auto& field) { field.scene = scene; });
+  }
+
+  static void release(score::gfx::RenderList&) { }
+};
+
+template <typename T>
+  requires(scene_input_introspection<T>::size == 0)
+struct scene_inputs_storage<T>
+{
+  static void readInputScenes(auto&&...) { }
+  static void release(auto&&...) { }
+};
+
+template <typename T>
+struct scene_outputs_storage;
+
+template <typename T>
+  requires(scene_output_introspection<T>::size > 0)
+struct scene_outputs_storage<T>
+{
+  template <scene_port Field, std::size_t N>
+  void upload(
+      score::gfx::RenderList& renderer, Field& ctrl, score::gfx::Edge& edge,
+      avnd::predicate_index<N>)
+  {
+    // Publish the scene every frame. The old behaviour skipped the push
+    // when `ctrl.dirty == 0` — but that broke multi-producer graphs: any
+    // other producer on the same downstream inlet (e.g. a legacy Geometry
+    // outlet of the same loader, or a Light node) pushes every frame
+    // unconditionally, and the consumer's NodeRenderer::process(...) logic
+    // replaces `this->scene` on the first push of each frame when
+    // `sceneChanged` is false (i.e. at frame start). A once-only scene push
+    // then gets overwritten every subsequent frame and its transforms are
+    // lost. Downstream consumers already short-circuit via shared_ptr
+    // identity + version (ScenePreprocessor checks m_cachedSceneState), so
+    // pushing every frame is cheap — just a few atomic refcount bumps.
+    //
+    // Producers can still use `ctrl.dirty` to track what changed for their
+    // own purposes; we don't consume the bits here anymore.
+    if(!ctrl.scene.state)
+      return;
+
+    auto* edge_sink = edge.sink;
+    if(!edge_sink || !edge_sink->node)
+      return;
+
+    auto rendered_node = edge_sink->node->renderedNodes.find(&renderer);
+    if(rendered_node == edge_sink->node->renderedNodes.end())
+      return;
+
+    auto it = std::find(
+        edge_sink->node->input.begin(), edge_sink->node->input.end(), edge_sink);
+    if(it == edge_sink->node->input.end())
+      return;
+    int n = it - edge_sink->node->input.begin();
+
+    // NodeRenderer::process(port, scene_spec, source_key) handles additive
+    // merging across multiple producers converging on the same sink port
+    // (keyed on the source edge's producer Port pointer), extracts a legacy
+    // geometry_spec for downstream consumers that only understand geometry,
+    // and sets sceneChanged=true.
+    rendered_node->second->process(n, ctrl.scene, edge.source);
+
+    if constexpr(requires { ctrl.dirty; })
+      ctrl.dirty = 0;
+  }
+
+  void upload(score::gfx::RenderList& renderer, auto& state, score::gfx::Edge& edge)
+  {
+    scene_output_introspection<T>::for_all_n(
+        avnd::get_outputs(state),
+        [&](auto& field, auto pred) { this->upload(renderer, field, edge, pred); });
+  }
+
+  // Lifecycle parity with texture_outputs_storage / buffer_outputs_storage:
+  // the storage owns no QRhi resources today (the scene_spec is a value-
+  // semantics struct + a shared_ptr to scene_state, both managed by their
+  // own destructors), so release is a documented no-op. Mirror the call
+  // site naming so future RHI handles added to the storage have a release
+  // hook ready, and so CpuFilterNode / CpuAnalysisNode releaseState calls
+  // are symmetric across all storages.
+  void release(score::gfx::RenderList&) noexcept { }
+};
+
+template <typename T>
+  requires(scene_output_introspection<T>::size == 0)
+struct scene_outputs_storage<T>
+{
+  static void upload(auto&&...) { }
+  static void release(auto&&...) noexcept { }
 };
+
 }
 
 #endif
diff --git a/src/plugins/score-plugin-avnd/Crousti/Layer.hpp b/src/plugins/score-plugin-avnd/Crousti/Layer.hpp
index da0ee9ef5d..74f84bffd5 100644
--- a/src/plugins/score-plugin-avnd/Crousti/Layer.hpp
+++ b/src/plugins/score-plugin-avnd/Crousti/Layer.hpp
@@ -15,6 +15,8 @@
 
 #include <avnd/concepts/layout.hpp>
 
+#include <avnd/common/aggregates.hpp>
+
 namespace oscr
 {
 template <typename Item>
@@ -333,13 +335,11 @@ struct LayoutBuilder final : Process::LayoutBuilderBase
     createdLayouts.push_back(new_l);
 
     {
-      using namespace boost::pfr;
-      using namespace boost::pfr::detail;
-      static constexpr int N = boost::pfr::tuple_size_v<Item>;
-      auto t = boost::pfr::structure_tie(item);
+      static constexpr int N = avnd::pfr::tuple_size_v<Item>;
+      auto t = avnd::pfr::detail::tie_as_tuple(item);
       [&]<std::size_t... I>(std::index_sequence<I...>) {
         using namespace std;
-        using namespace boost::pfr;
+        using namespace avnd::pfr;
 
         (this->walkLayout(get<I>(t), recursive_members...), ...);
       }(std::make_index_sequence<N>{});
diff --git a/src/plugins/score-plugin-avnd/Crousti/MessageBus.hpp b/src/plugins/score-plugin-avnd/Crousti/MessageBus.hpp
index f358ccfbde..4bc8fb57a3 100644
--- a/src/plugins/score-plugin-avnd/Crousti/MessageBus.hpp
+++ b/src/plugins/score-plugin-avnd/Crousti/MessageBus.hpp
@@ -19,7 +19,7 @@ struct Serializer
     if constexpr(std::is_arithmetic_v<F>)
       r.stream().stream << f;
     else if constexpr(std::is_aggregate_v<F>)
-      boost::pfr::for_each_field(f, *this);
+      avnd::pfr::for_each_field(f, *this);
     else if constexpr(avnd::list_ish<F>)
     {
       r.stream().stream << (int64_t)std::ssize(f);
@@ -115,8 +115,11 @@ struct Deserializer
   DataStreamWriter& r;
 
   template <typename F>
-  requires std::is_aggregate_v<F>
-  void operator()(F& f) { boost::pfr::for_each_field(f, *this); }
+    requires std::is_aggregate_v<F>
+  void operator()(F& f)
+  {
+    avnd::pfr::for_each_field(f, *this);
+  }
 
   template <typename F>
     requires(std::is_arithmetic_v<F>)
diff --git a/src/plugins/score-plugin-avnd/Crousti/Metadata.hpp b/src/plugins/score-plugin-avnd/Crousti/Metadata.hpp
index e8336fa5fd..8b9a2a0762 100644
--- a/src/plugins/score-plugin-avnd/Crousti/Metadata.hpp
+++ b/src/plugins/score-plugin-avnd/Crousti/Metadata.hpp
@@ -2,6 +2,7 @@
 #include <Process/ProcessMetadata.hpp>
 
 #include <Crousti/Concepts.hpp>
+#include <Crousti/SceneConcepts.hpp>
 
 #include <QString>
 
@@ -154,12 +155,25 @@ struct ProcessPortVisitor
     this->texture();
   }
 
+  template <std::size_t N, avnd::gpu_render_target_output_port Port>
+  void operator()(const avnd::field_reflection<N, Port>)
+  {
+    this->texture();
+  }
   template <std::size_t N, avnd::geometry_port Port>
   void operator()(const avnd::field_reflection<N, Port>)
   {
     this->geometry();
   }
 
+  // Scene ports travel through the same Process::PortType::Geometry slot.
+  template <std::size_t N, oscr::scene_port Port>
+    requires(!avnd::geometry_port<Port>)
+  void operator()(const avnd::field_reflection<N, Port>)
+  {
+    this->geometry();
+  }
+
   template <std::size_t N, avnd::curve_port Port>
   void operator()(const avnd::field_reflection<N, Port>)
   {
diff --git a/src/plugins/score-plugin-avnd/Crousti/Metadatas.hpp b/src/plugins/score-plugin-avnd/Crousti/Metadatas.hpp
index f617a3ede6..50348b4f58 100644
--- a/src/plugins/score-plugin-avnd/Crousti/Metadatas.hpp
+++ b/src/plugins/score-plugin-avnd/Crousti/Metadatas.hpp
@@ -1,35 +1,51 @@
 #pragma once
 
 #include <Crousti/Concepts.hpp>
+#include <Crousti/SceneConcepts.hpp>
 
 #include <ossia/dataflow/safe_nodes/port.hpp>
 
 #include <boost/mp11/algorithm.hpp>
-#include <boost/pfr.hpp>
 
 #include <avnd/introspection/input.hpp>
 #include <avnd/introspection/output.hpp>
 namespace oscr
 {
 template <typename T>
-concept GpuNode = avnd::texture_input_introspection<T>::size > 0
-                  || avnd::texture_output_introspection<T>::size > 0
-                  || avnd::buffer_input_introspection<T>::size > 0
-                  || avnd::buffer_output_introspection<T>::size > 0
-                  || avnd::geometry_input_introspection<T>::size > 0
-                  || avnd::geometry_output_introspection<T>::size > 0;
-
+concept GpuNode
+    = avnd::texture_input_introspection<T>::size > 0
+      || avnd::texture_output_introspection<T>::size > 0
+      || avnd::buffer_input_introspection<T>::size > 0
+      || avnd::buffer_output_introspection<T>::size > 0
+      || avnd::geometry_input_introspection<T>::size > 0
+      || avnd::geometry_output_introspection<T>::size > 0
+      || scene_input_introspection<T>::size > 0
+      || scene_output_introspection<T>::size > 0
+      || avnd::gpu_render_target_output_port_output_introspection<T>::size > 0;
+
+// Halp shader nodes (vertex+fragment / compute) currently route through
+// CustomGpuRenderer / GpuComputeRenderer, neither of which carries
+// geometry_ / scene_ I/O storage today. Exclude nodes that declare those
+// ports from the GpuGraphicsNode2 / GpuComputeNode2 dispatch so they fall
+// through to GfxNode<> (which has the proper storage via CpuFilterNode /
+// CpuAnalysisNode). When CustomGpuRenderer / GpuComputeRenderer gain
+// dedicated scene_ / geometry_ storage, drop the requires-clause exclusion
+// here and add init_input + readInput / upload paths in those renderers.
 template <typename T>
-concept GpuGraphicsNode2 = requires
-{
-  T::layout::graphics;
-};
+concept GpuGraphicsNode2
+    = requires { T::layout::graphics; }
+      && (avnd::geometry_input_introspection<T>::size == 0)
+      && (avnd::geometry_output_introspection<T>::size == 0)
+      && (scene_input_introspection<T>::size == 0)
+      && (scene_output_introspection<T>::size == 0);
 
 template <typename T>
-concept GpuComputeNode2 = requires
-{
-  T::layout::compute;
-};
+concept GpuComputeNode2
+    = requires { T::layout::compute; }
+      && (avnd::geometry_input_introspection<T>::size == 0)
+      && (avnd::geometry_output_introspection<T>::size == 0)
+      && (scene_input_introspection<T>::size == 0)
+      && (scene_output_introspection<T>::size == 0);
 
 template <typename T>
 concept is_gpu = GpuNode<T> || GpuGraphicsNode2<T> || GpuComputeNode2<T>;
diff --git a/src/plugins/score-plugin-avnd/Crousti/ProcessModel.hpp b/src/plugins/score-plugin-avnd/Crousti/ProcessModel.hpp
index f019937881..5a1cd5be99 100644
--- a/src/plugins/score-plugin-avnd/Crousti/ProcessModel.hpp
+++ b/src/plugins/score-plugin-avnd/Crousti/ProcessModel.hpp
@@ -19,8 +19,6 @@
 #include <ossia/detail/type_if.hpp>
 #include <ossia/detail/typelist.hpp>
 
-#include <boost/pfr.hpp>
-
 #include <QTimer>
 
 #include <avnd/binding/ossia/data_node.hpp>
diff --git a/src/plugins/score-plugin-avnd/Crousti/ProcessModelPortInit.hpp b/src/plugins/score-plugin-avnd/Crousti/ProcessModelPortInit.hpp
index 39c3de7bc4..89e6bb2931 100644
--- a/src/plugins/score-plugin-avnd/Crousti/ProcessModelPortInit.hpp
+++ b/src/plugins/score-plugin-avnd/Crousti/ProcessModelPortInit.hpp
@@ -4,6 +4,7 @@
 #include <Process/Process.hpp>
 
 #include <Crousti/Concepts.hpp>
+#include <Crousti/SceneConcepts.hpp>
 #include <Media/Sound/Drop/SoundDrop.hpp>
 
 #include <avnd/binding/ossia/port_base.hpp>
@@ -276,6 +277,22 @@ struct InletInitFunc
 #endif
   }
 
+  // Scene inputs reuse Gfx::GeometryInlet — a scene is a richer form of
+  // geometry and travels through the same Process-layer port. Mirror of the
+  // outlet overload below. Needed so scene-modifying halp nodes (Transform,
+  // SceneFilter, ...) can declare `struct { ossia::scene_spec scene; } scene_in;`
+  // in their inputs{} struct and get wired up by the framework.
+  template <scene_port T>
+    requires(!avnd::geometry_port<T>)
+  void operator()(const T& in, auto idx)
+  {
+#if SCORE_PLUGIN_GFX
+    auto p = new Gfx::GeometryInlet(portName<T>(), Id<Process::Port>(inlet++), &self);
+    setupNewPort(in, p);
+    ins.push_back(p);
+#endif
+  }
+
   template <std::size_t Idx, avnd::message T>
   void operator()(const avnd::field_reflection<Idx, T>& in, auto dummy)
   {
@@ -407,6 +424,16 @@ struct OutletInitFunc
 #endif
   }
 
+  template <avnd::gpu_render_target_output_port T>
+  void operator()(const T& out, auto idx)
+  {
+#if SCORE_PLUGIN_GFX
+    auto p = new Gfx::TextureOutlet(portName<T>(), Id<Process::Port>(outlet++), &self);
+    setupNewPort(out, p);
+    outs.push_back(p);
+#endif
+  }
+
   template <avnd::geometry_port T>
   void operator()(const T& out, auto idx)
   {
@@ -417,6 +444,20 @@ struct OutletInitFunc
 #endif
   }
 
+  // Scene outputs reuse Gfx::GeometryOutlet — a scene is a richer form of
+  // geometry that travels through the same Process-layer port. The Crousti
+  // upload path publishes scene_spec via NodeRenderer::process(scene_spec).
+  template <scene_port T>
+    requires(!avnd::geometry_port<T>)
+  void operator()(const T& out, auto idx)
+  {
+#if SCORE_PLUGIN_GFX
+    auto p = new Gfx::GeometryOutlet(portName<T>(), Id<Process::Port>(outlet++), &self);
+    setupNewPort(out, p);
+    outs.push_back(p);
+#endif
+  }
+
   template <avnd::curve_port T>
   void operator()(const T& out, auto idx)
   {
diff --git a/src/plugins/score-plugin-avnd/Crousti/SceneConcepts.hpp b/src/plugins/score-plugin-avnd/Crousti/SceneConcepts.hpp
new file mode 100644
index 0000000000..abe4e50fa0
--- /dev/null
+++ b/src/plugins/score-plugin-avnd/Crousti/SceneConcepts.hpp
@@ -0,0 +1,45 @@
+#pragma once
+
+// Scene port concept — shared between Crousti's port setup (type dispatch,
+// port factory) and the GPU upload path.
+//
+// A halp output struct field is a "scene port" when it carries an
+// `ossia::scene_spec scene` field. Scene output travels through the
+// existing Gfx::GeometryOutlet / Types::Geometry: a scene is a richer form
+// of geometry, same pattern as Process::TexturePort carrying any GPU
+// resource.
+//
+// Once the design proves out, this should be promoted to avendish itself
+// (3rdparty/avendish/include/avnd/concepts/gfx.hpp) under a corresponding
+// scene concept alongside `geometry_port`.
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <concepts>
+#include <cstdint>
+
+namespace oscr
+{
+
+template <typename T>
+concept scene_port = requires(T t) {
+  { t.scene } -> std::convertible_to<const ossia::scene_spec&>;
+};
+
+// Dirty-flag lexicon mirrors ossia::scene_port::dirt_flags so shader authors
+// can signal fine-grained changes without republishing the whole scene.
+// Users set bits on the halp field's `dirty` member; the upload path clears
+// them after publishing.
+namespace scene_dirt_flags
+{
+constexpr uint8_t transform   = 0x01;
+constexpr uint8_t geometry    = 0x02;
+constexpr uint8_t materials   = 0x04;
+constexpr uint8_t lights      = 0x08;
+constexpr uint8_t animation   = 0x10;
+constexpr uint8_t environment = 0x20;
+constexpr uint8_t structure   = 0x40;
+constexpr uint8_t all         = 0xFF;
+}
+
+}
diff --git a/src/plugins/score-plugin-fx/Fx/VelToNote.hpp b/src/plugins/score-plugin-fx/Fx/VelToNote.hpp
index 7e93b8ed3e..547fa06ebe 100644
--- a/src/plugins/score-plugin-fx/Fx/VelToNote.hpp
+++ b/src/plugins/score-plugin-fx/Fx/VelToNote.hpp
@@ -4,8 +4,6 @@
 #include <ossia/dataflow/value_port.hpp>
 #include <ossia/detail/math.hpp>
 
-#include <boost/pfr.hpp>
-
 #include <halp/controls.hpp>
 #include <halp/meta.hpp>
 #include <halp/midi.hpp>
diff --git a/src/plugins/score-plugin-gfx/3rdparty/libisf/src/isf.cpp b/src/plugins/score-plugin-gfx/3rdparty/libisf/src/isf.cpp
index 66cb9114c9..c4500a5da0 100644
--- a/src/plugins/score-plugin-gfx/3rdparty/libisf/src/isf.cpp
+++ b/src/plugins/score-plugin-gfx/3rdparty/libisf/src/isf.cpp
@@ -41,10 +41,14 @@ layout(location = 0) out vec2 isf_FragNormCoord;
   static constexpr auto vertexInitFunc = R"_(
 void isf_vertShaderInit()
 {
-  gl_Position = clipSpaceCorrMatrix * vec4( position, 0.0, 1.0 );
+  gl_Position = clipSpaceCorrMatrix * vec4(position, 0.0, 1.0);
   isf_FragNormCoord = vec2((gl_Position.x+1.0)/2.0, (gl_Position.y+1.0)/2.0);
+}
+
+void isf_vertShaderFinish()
+{
 #if defined(QSHADER_SPIRV) || defined(QSHADER_HLSL) || defined(QSHADER_MSL)
-  gl_Position.y = - gl_Position.y;
+  gl_Position.y = -gl_Position.y;
 #endif
 }
 )_";
@@ -53,6 +57,7 @@ void isf_vertShaderInit()
 void main()
 {
   isf_vertShaderInit();
+  isf_vertShaderFinish();
 }
 )_";
 
@@ -67,12 +72,18 @@ layout(std140, binding = 0) uniform renderer_t {
   mat4 clipSpaceCorrMatrix_;
 
   vec2 RENDERSIZE_;
+  // MSAA sample count of the active output target (1 when MSAA is off).
+  // Mirrors RenderList::samples(); needed because glslang strips
+  // gl_NumSamples under SPIR-V. _pad0 keeps the struct vec4-aligned.
+  int MSAA_SAMPLES_;
+  int _renderer_pad0_;
 } isf_renderer_uniforms;
 
 // This dance is needed because otherwise
 // spirv-cross may generate different struct names in the vertex & fragment, causing crashes..
 // but we have to keep compat with ISF
 #define clipSpaceCorrMatrix isf_renderer_uniforms.clipSpaceCorrMatrix_
+#define MSAA_SAMPLES isf_renderer_uniforms.MSAA_SAMPLES_
 
 // Time-dependent uniforms, only relevant during execution
 layout(std140, binding = 1) uniform process_t {
@@ -86,6 +97,15 @@ layout(std140, binding = 1) uniform process_t {
 
   vec2 RENDERSIZE_;
   vec4 DATE_;
+  // Mirrors gl_NumWorkGroups for compute shaders. SPIRV-Cross's HLSL
+  // backend refuses to emit code for the NumWorkgroups built-in unless
+  // remap_num_workgroups_builtin() is set up on both the cross-compiler
+  // and the QRhi side; QShaderBaker exposes neither, so any compute
+  // shader using gl_NumWorkGroups silently fails to bake to HLSL on
+  // D3D11/D3D12. We sidestep that by routing references through this
+  // uniform — populated host-side just before each dispatch — and
+  // textually shadowing the built-in via the #define below.
+  uvec3 NUMWORKGROUPS_;
 } isf_process_uniforms;
 
 #define TIME isf_process_uniforms.TIME_
@@ -95,12 +115,29 @@ layout(std140, binding = 1) uniform process_t {
 #define FRAMEINDEX isf_process_uniforms.FRAMEINDEX_
 #define RENDERSIZE isf_process_uniforms.RENDERSIZE_
 #define DATE isf_process_uniforms.DATE_
+#define SAMPLERATE isf_process_uniforms.SAMPLERATE_
+#define gl_NumWorkGroups isf_process_uniforms.NUMWORKGROUPS_
+#define isf_NumWorkGroups isf_process_uniforms.NUMWORKGROUPS_
 )_";
 
   static constexpr auto defaultFunctions =
       R"_(
+// GLSL's textureSize is overloaded by sampler dimensionality — sampler2D
+// returns ivec2, sampler3D returns ivec3. Authors typically reach for
+// TEX_DIMENSIONS regardless of 2D/3D; the *_2D / *_3D aliases below make
+// the intended dimensionality explicit in shader source.
 #define TEX_DIMENSIONS(tex) textureSize(tex, 0)
+#define TEX_DIMENSIONS_2D(tex) textureSize(tex, 0)
+#define TEX_DIMENSIONS_3D(tex) textureSize(tex, 0)
 #define IMG_SIZE(tex) textureSize(tex, 0)
+#define IMG_SIZE_3D(tex) textureSize(tex, 0)
+
+// IMG_CUBE(tex, dir) — canonical colour-cube read; same in both coord systems
+// since a direction vector has no Y-flip. IMG_CUBE_DEPTH(tex, dir) —
+// canonical depth-cube read for inputs declared DEPTH: true on a cubemap,
+// hides the internal `_depth` companion binding.
+#define IMG_CUBE(tex, dir) texture(tex, dir)
+#define IMG_CUBE_DEPTH(tex, dir) texture(tex##_depth, dir).r
 
 #if defined(QSHADER_SPIRV)
 #define isf_FragCoord vec4(gl_FragCoord.x, RENDERSIZE.y - gl_FragCoord.y, gl_FragCoord.z, gl_FragCoord.w)
@@ -384,6 +421,86 @@ static bool parse_input_impl(sajson::value& v, bool)
   return v.get_type() == sajson::TYPE_TRUE;
 }
 
+// Parse sampler-config fields from a JSON input object directly (flat fields,
+// no nested "SAMPLER" object). All fields optional; missing = keep default.
+static void parse_sampler_config(sampler_config& s, const sajson::value& v)
+{
+  auto str_field = [&](const char* key, std::string& out) {
+    if(auto k = v.find_object_key_insensitive(sajson::literal(key));
+       k != v.get_length())
+    {
+      auto val = v.get_object_value(k);
+      if(val.get_type() == sajson::TYPE_STRING)
+        out = val.as_string();
+    }
+  };
+  auto float_field = [&](const char* key, std::optional<float>& out) {
+    if(auto k = v.find_object_key_insensitive(sajson::literal(key));
+       k != v.get_length())
+    {
+      auto val = v.get_object_value(k);
+      if(is_number(val))
+        out = (float)val.get_number_value();
+    }
+  };
+
+  str_field("WRAP",          s.wrap);
+  str_field("WRAP_S",        s.wrap_s);
+  str_field("WRAP_T",        s.wrap_t);
+  str_field("WRAP_R",        s.wrap_r);
+  str_field("FILTER",        s.filter);
+  str_field("MIN_FILTER",    s.min_filter);
+  str_field("MAG_FILTER",    s.mag_filter);
+  str_field("MIPMAP_MODE",   s.mipmap_mode);
+  str_field("BORDER_COLOR",  s.border_color);
+  str_field("COMPARE",       s.compare);
+  float_field("ANISOTROPY",  s.anisotropy);
+  float_field("LOD_BIAS",    s.lod_bias);
+  float_field("MIN_LOD",     s.min_lod);
+  float_field("MAX_LOD",     s.max_lod);
+}
+
+// Audio inputs expose only FILTER and WRAP — audio textures are 1-mip
+// 2D samplers so the rest of sampler_config (COMPARE / BORDER_COLOR / LOD
+// / anisotropy) has no meaningful effect.
+static void parse_audio_sampler_config(audio_sampler_config& s, const sajson::value& v)
+{
+  auto str_field = [&](const char* key, std::string& out) {
+    if(auto k = v.find_object_key_insensitive(sajson::literal(key));
+       k != v.get_length())
+    {
+      auto val = v.get_object_value(k);
+      if(val.get_type() == sajson::TYPE_STRING)
+        out = val.as_string();
+    }
+  };
+  str_field("FILTER", s.filter);
+  str_field("WRAP",   s.wrap);
+}
+
+// Drop COMPARE from a sampler config whose texture shape has no corresponding
+// *Shadow GLSL sampler type. A non-"never" COMPARE makes the runtime call
+// QRhiSampler::setTextureCompareOp, which on Vulkan requires the shader-side
+// binding to be a shadow sampler (compareEnable=VK_TRUE is a validation
+// error otherwise) and on the other backends produces undefined reads. The
+// only core-GLSL shape without a shadow variant is 3D — sampler3DShadow is
+// not a core type. 2D / 2D-array / cube / cube-array all have shadow
+// counterparts and are handled by the emitter.
+static void drop_unsupported_compare_3d(sampler_config& s, const char* where)
+{
+  if(s.compare.empty()) return;
+  std::string c = s.compare;
+  for(auto& ch : c) ch = (char)tolower(ch);
+  if(c == "never") return;
+  fmt::print(
+      stderr,
+      "[isf] {}: COMPARE is set but sampler3DShadow is not a core GLSL "
+      "sampler type — ignoring. Use a 2D, 2D-array, cubemap or cubemap-array "
+      "shadow sampler instead.\n",
+      where);
+  s.compare.clear();
+}
+
 static void parse_input(image_input& inp, const sajson::value& v)
 {
   if(auto k = v.find_object_key_insensitive(sajson::literal("DIMENSIONS"));
@@ -391,15 +508,64 @@ static void parse_input(image_input& inp, const sajson::value& v)
   {
     auto val = v.get_object_value(k);
     if(val.get_type() == sajson::TYPE_INTEGER)
-      inp.dimensions = val.get_integer_value();
+    {
+      auto d = val.get_integer_value();
+      if(d != 2 && d != 3)
+        throw invalid_file{
+            "image_input DIMENSIONS must be 2 or 3 (got " + std::to_string(d)
+            + "). 1D and 4D textures are not supported."};
+      inp.dimensions = d;
+    }
   }
   if(auto k = v.find_object_key_insensitive(sajson::literal("DEPTH"));
      k != v.get_length())
   {
     inp.depth = v.get_object_value(k).get_type() == sajson::TYPE_TRUE;
   }
+  if(auto k = v.find_object_key_insensitive(sajson::literal("IS_ARRAY"));
+     k != v.get_length())
+  {
+    inp.is_array = v.get_object_value(k).get_type() == sajson::TYPE_TRUE;
+  }
+  else if(auto k2 = v.find_object_key_insensitive(sajson::literal("ARRAY"));
+          k2 != v.get_length())
+  {
+    inp.is_array = v.get_object_value(k2).get_type() == sajson::TYPE_TRUE;
+  }
+  // STATIC: shader author opts into "upstream publishes a long-lived
+  // QRhiTexture, bind it directly". Engine path = same Flag::GrabsFromSource
+  // already used for cube / 3D / array inputs (those grab implicitly
+  // because they can't be 2D color attachments). For plain 2D texture
+  // inputs both modes are valid — RT-render (compositor pattern) is the
+  // safe default; STATIC: true opts into direct binding for static-LUT /
+  // IBL-bake / asset-cache producers (avnd gpu_texture_output, etc.).
+  if(auto k = v.find_object_key_insensitive(sajson::literal("STATIC"));
+     k != v.get_length())
+  {
+    inp.is_static = v.get_object_value(k).get_type() == sajson::TYPE_TRUE;
+  }
+  parse_sampler_config(inp.sampler, v);
+  if(inp.dimensions == 3)
+  {
+    drop_unsupported_compare_3d(inp.sampler, "image input (DIMENSIONS: 3)");
+    if(inp.is_array)
+    {
+      throw invalid_file{
+          "image input: DIMENSIONS: 3 with ARRAY: true is not supported — "
+          "sampler3DArray is not a core GLSL type. Use a 3D texture and drop "
+          "ARRAY, or a 2D-array texture and drop DIMENSIONS: 3."};
+    }
+  }
+}
+static void parse_input(cubemap_input& inp, const sajson::value& v)
+{
+  if(auto k = v.find_object_key_insensitive(sajson::literal("DEPTH"));
+     k != v.get_length())
+  {
+    inp.depth = v.get_object_value(k).get_type() == sajson::TYPE_TRUE;
+  }
+  parse_sampler_config(inp.sampler, v);
 }
-static void parse_input(cubemap_input& inp, const sajson::value& v) { }
 
 static void parse_input(event_input& inp, const sajson::value& v) { }
 
@@ -419,6 +585,7 @@ static void parse_input(audio_input& inp, const sajson::value& v)
       }
     }
   }
+  parse_audio_sampler_config(inp.sampler, v);
 }
 
 static void parse_input(audioHist_input& inp, const sajson::value& v)
@@ -437,6 +604,7 @@ static void parse_input(audioHist_input& inp, const sajson::value& v)
       }
     }
   }
+  parse_audio_sampler_config(inp.sampler, v);
 }
 
 // CSF-specific parsing functions
@@ -497,6 +665,106 @@ static void parse_input(storage_input& inp, const sajson::value& v)
       if(val.get_type() == sajson::TYPE_STRING)
         inp.buffer_usage = val.as_string();
     }
+    else if(k == "PERSISTENT")
+    {
+      inp.persistent = v.get_object_value(i).get_type() == sajson::TYPE_TRUE;
+    }
+    else if(k == "VISIBILITY")
+    {
+      auto val = v.get_object_value(i);
+      if(val.get_type() == sajson::TYPE_STRING)
+        inp.visibility = val.as_string();
+    }
+  }
+
+  // Warn on semantically-impossible combinations. PERSISTENT allocates a
+  // ping-pong pair and always emits `_prev` as a readonly buffer — if the
+  // primary is write_only, nothing ever writes the data that _prev is
+  // supposed to read back, so it's silently always zero.
+  if(inp.persistent && inp.access == "write_only")
+  {
+    throw invalid_file{
+        "storage input declared as PERSISTENT + ACCESS: write_only is "
+        "invalid — _prev would always read zero (no read path exists to "
+        "populate it). Use ACCESS: read_write or read_only with PERSISTENT, "
+        "or drop PERSISTENT if you don't need frame history."};
+  }
+
+  // Reject empty LAYOUT for non-indirect storage_inputs. The graphics
+  // emit at isf_emit_graphics_storage / isf_emit_ssbo_decl produces an
+  // empty `readonly buffer NAME_buf { };` block which is invalid GLSL
+  // (`buffer { };` requires at least one member declarator). shaderc
+  // then fails with a cryptic message pointing at the auto-emitted
+  // block. uniform_input has the symmetric check at parse_input(uniform).
+  // Indirect-draw SSBOs LEGITIMATELY have empty LAYOUT — they are
+  // skipped from graphics emit (isf.cpp:3361-3363) when buffer_usage is
+  // non-empty. Match that gate here so legitimate indirect-draw paths
+  // pass through unchallenged.
+  if(inp.layout.empty() && inp.buffer_usage.empty())
+  {
+    throw invalid_file{
+        "storage_input declares an empty LAYOUT and no BUFFER_USAGE — "
+        "the SSBO graphics emit would produce `readonly buffer NAME_buf "
+        "{ };` which is invalid GLSL (a buffer block must have at least "
+        "one member declarator). Empty LAYOUT only makes sense for "
+        "indirect-draw SSBOs which set BUFFER_USAGE: \"indirect_draw\" "
+        "or \"indirect_draw_indexed\". Either declare members in LAYOUT "
+        "or set BUFFER_USAGE."};
+  }
+}
+
+static void parse_input(uniform_input& inp, const sajson::value& v)
+{
+  std::size_t N = v.get_length();
+  for(std::size_t i = 0; i < N; i++)
+  {
+    auto k = v.get_object_key(i).as_string();
+    if(k == "LAYOUT")
+    {
+      auto val = v.get_object_value(i);
+      if(val.get_type() == sajson::TYPE_ARRAY)
+      {
+        std::size_t layout_size = val.get_length();
+        inp.layout.reserve(layout_size);
+        for(std::size_t j = 0; j < layout_size; j++)
+        {
+          auto field = val.get_array_element(j);
+          if(field.get_type() != sajson::TYPE_OBJECT)
+            continue;
+          uniform_input::layout_field lf;
+          for(std::size_t f = 0; f < field.get_length(); f++)
+          {
+            auto fk = field.get_object_key(f).as_string();
+            if(fk == "NAME")
+            {
+              auto nv = field.get_object_value(f);
+              if(nv.get_type() == sajson::TYPE_STRING)
+                lf.name = nv.as_string();
+            }
+            else if(fk == "TYPE")
+            {
+              auto tv = field.get_object_value(f);
+              if(tv.get_type() == sajson::TYPE_STRING)
+                lf.type = tv.as_string();
+            }
+          }
+          inp.layout.push_back(lf);
+        }
+      }
+    }
+    else if(k == "VISIBILITY")
+    {
+      auto val = v.get_object_value(i);
+      if(val.get_type() == sajson::TYPE_STRING)
+        inp.visibility = val.as_string();
+    }
+  }
+  if(inp.layout.empty())
+  {
+    throw invalid_file{
+        "uniform_input declares an empty LAYOUT — std140 interface blocks "
+        "must contain at least one field. Either declare its members in "
+        "LAYOUT: [{ NAME, TYPE }, ...] or remove the input."};
   }
 }
 
@@ -507,8 +775,18 @@ static void parse_input(texture_input& inp, const sajson::value& v)
   {
     auto val = v.get_object_value(k);
     if(val.get_type() == sajson::TYPE_INTEGER)
-      inp.dimensions = val.get_integer_value();
+    {
+      auto d = val.get_integer_value();
+      if(d != 2 && d != 3)
+        throw invalid_file{
+            "texture_input DIMENSIONS must be 2 or 3 (got " + std::to_string(d)
+            + "). 1D and 4D textures are not supported."};
+      inp.dimensions = d;
+    }
   }
+  parse_sampler_config(inp.sampler, v);
+  if(inp.dimensions == 3)
+    drop_unsupported_compare_3d(inp.sampler, "texture input (DIMENSIONS: 3)");
 }
 
 // Parse a COPY_FROM JSON object.
@@ -540,17 +818,213 @@ parse_copy_from(const sajson::value& obj)
   return cf;
 }
 
-// Parse an AUXILIARY JSON array into a vector of auxiliary_request.
+// Detect whether an AUXILIARY entry declares a texture (TYPE: "image" /
+// "cubemap" / "texture") rather than a buffer. Buffers are the default
+// (TYPE absent, or "storage" / "buffer").
+// Three-way classification of an AUXILIARY JSON entry:
+//   Ssbo    — default; declared either without TYPE or with TYPE:
+//             "storage" / "buffer" / "ssbo". Layout maps to an std430
+//             `buffer` block bound as bufferLoad / bufferStore / bufferLoadStore.
+//   Ubo     — TYPE: "uniform" / "ubo". Layout maps to an std140 `uniform`
+//             block bound as uniformBuffer.
+//   Texture — TYPE: "image" / "texture" / "cubemap" / "image_cube" /
+//             "storage_*". Goes through the auxiliary_texture_request pool.
+enum class aux_kind { Ssbo, Ubo, Texture };
+
+static aux_kind aux_entry_kind(const sajson::value& aux_obj)
+{
+  auto k = aux_obj.find_object_key_insensitive(sajson::literal("TYPE"));
+  if(k == aux_obj.get_length())
+    return aux_kind::Ssbo;
+  auto v = aux_obj.get_object_value(k);
+  if(v.get_type() != sajson::TYPE_STRING)
+    return aux_kind::Ssbo;
+  std::string t = v.as_string();
+  for(auto& c : t) c = (char)tolower(c);
+  if(t == "image" || t == "texture" || t == "cubemap" || t == "image_cube"
+     || t == "storage_image" || t == "storage_cube"
+     || t == "storage_image_array" || t == "storage_3d")
+    return aux_kind::Texture;
+  if(t == "uniform" || t == "ubo")
+    return aux_kind::Ubo;
+  return aux_kind::Ssbo;
+}
+
+// Parse a single texture auxiliary entry.
+static void parse_auxiliary_texture(
+    const sajson::value& aux_obj,
+    geometry_input::auxiliary_texture_request& out)
+{
+  for(std::size_t f = 0; f < aux_obj.get_length(); f++)
+  {
+    auto fkey = aux_obj.get_object_key(f).as_string();
+    auto fval = aux_obj.get_object_value(f);
+
+    if(fkey == "NAME" && fval.get_type() == sajson::TYPE_STRING)
+      out.name = fval.as_string();
+    else if(fkey == "TYPE" && fval.get_type() == sajson::TYPE_STRING)
+    {
+      std::string t = fval.as_string();
+      for(auto& c : t) c = (char)tolower(c);
+      if(t == "cubemap" || t == "image_cube")
+        out.is_cubemap = true;
+      else if(t == "storage_image")
+        out.is_storage = true;
+      else if(t == "storage_cube")
+      { out.is_storage = true; out.is_cubemap = true; }
+      else if(t == "storage_image_array")
+      { out.is_storage = true; out.is_array = true; }
+      else if(t == "storage_3d")
+      { out.is_storage = true; out.dimensions = 3; }
+    }
+    else if(fkey == "DIMENSIONS")
+    {
+      if(fval.get_type() == sajson::TYPE_INTEGER)
+        out.dimensions = fval.get_integer_value();
+    }
+    else if(fkey == "IS_ARRAY" || fkey == "ARRAY")
+      out.is_array = (fval.get_type() == sajson::TYPE_TRUE);
+    else if(fkey == "DEPTH")
+    {
+      // DEPTH overload — context-dependent:
+      //   "DEPTH": true   → legacy sampleable-depth flag (paired with
+      //                     COMPARE for shadow-comparison samplers)
+      //   "DEPTH": <int>  → 3D-texture depth dimension literal
+      //   "DEPTH": "<expr>" → 3D-texture depth dimension expression
+      // Distinguishable by sajson type so authors can use either form
+      // without the parser silently dropping one.
+      const auto t = fval.get_type();
+      if(t == sajson::TYPE_TRUE)
+        out.is_depth = true;
+      else if(t == sajson::TYPE_FALSE)
+        out.is_depth = false;
+      else if(t == sajson::TYPE_INTEGER)
+        out.depth_expression = std::to_string(fval.get_integer_value());
+      else if(t == sajson::TYPE_DOUBLE)
+        out.depth_expression = std::to_string(fval.get_double_value());
+      else if(t == sajson::TYPE_STRING)
+        out.depth_expression = fval.as_string();
+    }
+    else if(fkey == "STORAGE")
+      out.is_storage = (fval.get_type() == sajson::TYPE_TRUE);
+    else if(fkey == "FORMAT" && fval.get_type() == sajson::TYPE_STRING)
+      out.format = fval.as_string();
+    else if(fkey == "ACCESS" && fval.get_type() == sajson::TYPE_STRING)
+      out.access = fval.as_string();
+    // WIDTH / HEIGHT / LAYERS — same expression-or-literal convention as
+    // csf_image_input. Strings allow `$var` substitution against the
+    // shader's long/float inputs at allocation time.
+    else if(fkey == "WIDTH")
+    {
+      const auto t = fval.get_type();
+      if(t == sajson::TYPE_INTEGER)
+        out.width_expression = std::to_string(fval.get_integer_value());
+      else if(t == sajson::TYPE_DOUBLE)
+        out.width_expression = std::to_string(fval.get_double_value());
+      else if(t == sajson::TYPE_STRING)
+        out.width_expression = fval.as_string();
+    }
+    else if(fkey == "HEIGHT")
+    {
+      const auto t = fval.get_type();
+      if(t == sajson::TYPE_INTEGER)
+        out.height_expression = std::to_string(fval.get_integer_value());
+      else if(t == sajson::TYPE_DOUBLE)
+        out.height_expression = std::to_string(fval.get_double_value());
+      else if(t == sajson::TYPE_STRING)
+        out.height_expression = fval.as_string();
+    }
+    else if(fkey == "LAYERS")
+    {
+      const auto t = fval.get_type();
+      if(t == sajson::TYPE_INTEGER)
+        out.layers_expression = std::to_string(fval.get_integer_value());
+      else if(t == sajson::TYPE_DOUBLE)
+        out.layers_expression = std::to_string(fval.get_double_value());
+      else if(t == sajson::TYPE_STRING)
+        out.layers_expression = fval.as_string();
+    }
+  }
+
+  // depth_expression non-empty implies a 3D texture even if DIMENSIONS
+  // wasn't set explicitly. Mirrors csf_image_input::is3D() semantics —
+  // saves the author from writing both fields.
+  if(!out.depth_expression.empty() && out.dimensions == 2)
+    out.dimensions = 3;
+
+  // Auto-infer storage-image semantics when FORMAT is explicitly set to
+  // anything other than the sampled-texture default (rgba8). Allows
+  // author-friendly declarations like:
+  //
+  //   { "NAME": "voxel_grid", "TYPE": "image", "ACCESS": "read_write",
+  //     "FORMAT": "r32ui", "DIMENSIONS": 3, ... }
+  //
+  // to be parsed as a storage image without forcing the author to
+  // additionally write `"STORAGE": true` or use the more-cryptic
+  // `"TYPE": "storage_3d"`.
+  //
+  // ONLY uses FORMAT — NOT ACCESS — because `access` defaults to
+  // "read_write" in the struct (it's only meaningful when is_storage is
+  // already true), so an ACCESS-based heuristic would mis-fire on every
+  // sampled-aux entry that doesn't explicitly override it. FORMAT
+  // defaults to "rgba8" which is also the sampled-image default, so the
+  // discriminator is "did the author explicitly write a non-rgba8
+  // FORMAT?" — unambiguous either way. If you want a storage rgba8
+  // image, write `"STORAGE": true` explicitly.
+  if(!out.is_storage)
+  {
+    const bool format_implies_storage
+        = !out.format.empty() && out.format != "rgba8";
+    if(format_implies_storage)
+      out.is_storage = true;
+  }
+  // Inherit the flat sampler_config fields (WRAP/FILTER/COMPARE/…).
+  parse_sampler_config(out.sampler, aux_obj);
+  // Storage images don't use the sampler; regular samplers on a 3D texture
+  // have no shadow variant. Cubemap and 2D-array shapes have shadow variants
+  // and are fine.
+  if(!out.is_storage && !out.is_cubemap && out.dimensions == 3)
+    drop_unsupported_compare_3d(
+        out.sampler,
+        fmt::format("auxiliary texture '{}' (DIMENSIONS: 3)", out.name).c_str());
+  // Cube-arrays (samplerCubeArray / imageCubeArray) are unsupported: every
+  // QRhi backend silently collapses `CubeMap | TextureArray` to one flag or
+  // the other at view-creation time (Vulkan qrhivulkan.cpp:7736+,
+  // D3D12:1160+, Metal:4025+, GL:6124+), so the shader-side type and the
+  // bound resource disagree. Reject at parse time rather than ship broken
+  // bindings. Same story for 3D cubemaps (nonsensical).
+  if(out.is_cubemap && out.is_array)
+  {
+    throw invalid_file{
+        "auxiliary texture '" + out.name
+        + "': cubemap + ARRAY is not supported on any QRhi backend "
+          "(cube-array views are not constructible). Use a plain cubemap, "
+          "or decompose to a 2D array and do face math in the shader."};
+  }
+  if(out.is_cubemap && out.dimensions == 3)
+  {
+    fmt::print(
+        stderr,
+        "[isf] auxiliary texture '{}': cubemap with DIMENSIONS: 3 is "
+        "meaningless (cube faces are 2D). Ignoring DIMENSIONS.\n",
+        out.name);
+    out.dimensions = 2;
+  }
+}
+
+// Parse an AUXILIARY JSON array, dispatching each entry by TYPE into
+// either the buffer list or the texture list.
 // Shared by geometry_input parsing and top-level AUXILIARY key.
 static void parse_auxiliary_array(
     const sajson::value& val,
-    std::vector<geometry_input::auxiliary_request>& out)
+    std::vector<geometry_input::auxiliary_request>& out_buffers,
+    std::vector<geometry_input::auxiliary_texture_request>& out_textures)
 {
   if(val.get_type() != sajson::TYPE_ARRAY)
     return;
 
   std::size_t aux_count = val.get_length();
-  out.reserve(aux_count);
+  out_buffers.reserve(out_buffers.size() + aux_count);
 
   for(std::size_t j = 0; j < aux_count; j++)
   {
@@ -558,7 +1032,21 @@ static void parse_auxiliary_array(
     if(aux_obj.get_type() != sajson::TYPE_OBJECT)
       continue;
 
+    const aux_kind kind = aux_entry_kind(aux_obj);
+    if(kind == aux_kind::Texture)
+    {
+      geometry_input::auxiliary_texture_request tr;
+      parse_auxiliary_texture(aux_obj, tr);
+      if(!tr.name.empty())
+        out_textures.push_back(std::move(tr));
+      continue;
+    }
+
     geometry_input::auxiliary_request ar;
+    // UBO kind: flag set on the request so both parser-side GLSL emission
+    // and runtime-side binding know to treat it as a std140 uniform block.
+    // Buffer-kind SSBO is the default (is_uniform stays false).
+    ar.is_uniform = (kind == aux_kind::Ubo);
 
     for(std::size_t f = 0; f < aux_obj.get_length(); f++)
     {
@@ -611,12 +1099,61 @@ static void parse_auxiliary_array(
       {
         ar.forward = parse_copy_from(fval);
       }
+      else if(fkey == "PERSISTENT")
+      {
+        if(fval.get_type() == sajson::TYPE_TRUE)
+          ar.persistent = true;
+        else if(fval.get_type() == sajson::TYPE_FALSE)
+          ar.persistent = false;
+      }
     }
 
     if(ar.access.empty())
       ar.access = "read_only";
 
-    out.push_back(std::move(ar));
+    out_buffers.push_back(std::move(ar));
+  }
+}
+
+// Validate that every geometry_input ATTRIBUTE.TYPE either names a
+// built-in GLSL scalar/vector/matrix type or matches a user-defined
+// struct declared in descriptor::types. Run AFTER both RESOURCES and
+// TYPES are parsed (TYPES may appear in any order in the JSON) — i.e.
+// once at the end of parse_csf / parse_raw_raster_pipeline. Catches
+// typos in TYPE strings at parse time instead of as a confusing
+// "undefined identifier" GLSL compile error 30 lines deep into the
+// generated shader.
+static void validate_attribute_types(const descriptor& d)
+{
+  static constexpr std::string_view builtins[] = {
+    "float", "int",   "uint",  "bool",
+    "vec2",  "vec3",  "vec4",
+    "ivec2", "ivec3", "ivec4",
+    "uvec2", "uvec3", "uvec4",
+    "mat2",  "mat3",  "mat4"
+  };
+  auto is_builtin = [](std::string_view t) noexcept {
+    for(auto b : builtins) if(t == b) return true;
+    return false;
+  };
+  auto is_user_type = [&](std::string_view t) noexcept {
+    for(const auto& td : d.types) if(td.name == t) return true;
+    return false;
+  };
+  for(const auto& inp : d.inputs)
+  {
+    auto* gi = ossia::get_if<geometry_input>(&inp.data);
+    if(!gi) continue;
+    for(const auto& ar : gi->attributes)
+    {
+      if(ar.type.empty()) continue;
+      if(is_builtin(ar.type) || is_user_type(ar.type)) continue;
+      throw invalid_file{
+          "ATTRIBUTES \"" + ar.name + "\" on geometry resource \"" + inp.name
+          + "\" declares TYPE \"" + ar.type
+          + "\", which is neither a built-in GLSL scalar/vector/matrix type "
+            "nor a user-defined type from the TYPES section."};
+    }
   }
 }
 
@@ -703,27 +1240,79 @@ static void parse_input(geometry_input& inp, const sajson::value& v)
       else if(val.get_type() == sajson::TYPE_DOUBLE)
         inp.instance_count = std::to_string((int)val.get_double_value());
     }
+    else if(k == "FORMAT_ID")
+    {
+      // String tag stamped on the consumer geometry's filter_tag
+      // (rapidhash truncated to 32 bits). Lets a CSF that produces
+      // primitive-cloud-shaped output declare its format identity in
+      // the JSON header without engine-side knowledge of the format.
+      auto val = v.get_object_value(i);
+      if(val.get_type() == sajson::TYPE_STRING)
+        inp.format_id = val.as_string();
+    }
     else if(k == "AUXILIARY")
     {
-      parse_auxiliary_array(v.get_object_value(i), inp.auxiliary);
+      parse_auxiliary_array(v.get_object_value(i), inp.auxiliary, inp.auxiliary_textures);
     }
-    else if(k == "INDIRECT_DRAW")
+    else if(k == "INDIRECT")
     {
       auto val = v.get_object_value(i);
-      if(val.get_type() == sajson::TYPE_TRUE)
-        inp.indirect_draw = true;
-      else if(val.get_type() == sajson::TYPE_FALSE)
-        inp.indirect_draw = false;
+      if(val.get_type() == sajson::TYPE_OBJECT)
+      {
+        geometry_input::indirect_request req;
+        for(std::size_t j = 0; j < val.get_length(); j++)
+        {
+          auto ik = val.get_object_key(j).as_string();
+          boost::algorithm::to_upper(ik);
+          if(ik == "COUNT")
+          {
+            auto iv = val.get_object_value(j);
+            if(iv.get_type() == sajson::TYPE_STRING)
+              req.count = iv.as_string();
+            else if(iv.get_type() == sajson::TYPE_INTEGER)
+              req.count = std::to_string(iv.get_integer_value());
+            else if(iv.get_type() == sajson::TYPE_DOUBLE)
+              req.count = std::to_string((int)iv.get_double_value());
+          }
+        }
+        if(req.count.empty())
+          req.count = "1";
+        inp.indirect = req;
+      }
     }
-    else if(k == "INDIRECT_DRAW_TYPE")
+    else if(k == "INDIRECT_DRAW")
     {
       auto val = v.get_object_value(i);
-      if(val.get_type() == sajson::TYPE_STRING)
-        inp.indirect_draw_type = val.as_string();
+      if(val.get_type() == sajson::TYPE_TRUE)
+        inp.indirect = geometry_input::indirect_request{.count = "1"};
     }
   }
 }
 
+// Known GLSL image format qualifiers. Used for a parse-time sanity check —
+// lets the shader author see a typo ("rgba16" vs "rgba16f") before the
+// runtime silently falls back to rgba8. Strict GLSL image-format typing
+// validation (matching imageStore argument types to declared formats) would
+// need a full GLSL AST which this parser does not build; the most useful
+// check we can do cheaply is reject unknown format strings.
+static bool isf_is_known_image_format(std::string fmt)
+{
+  boost::algorithm::to_lower(fmt);
+  static const ossia::hash_set<std::string> known{
+      "rgba8",  "rgba8_snorm",  "rgba8ui", "rgba8i",
+      "rgba16", "rgba16_snorm", "rgba16f", "rgba16ui", "rgba16i",
+      "rgba32f","rgba32ui",     "rgba32i",
+      "rg8",    "rg8_snorm",    "rg8ui",   "rg8i",
+      "rg16",   "rg16_snorm",   "rg16f",   "rg16ui", "rg16i",
+      "rg32f",  "rg32ui",       "rg32i",
+      "r8",     "r8_snorm",     "r8ui",    "r8i",
+      "r16",    "r16_snorm",    "r16f",    "r16ui",  "r16i",
+      "r32f",   "r32ui",        "r32i",
+      "rgb10_a2", "rgb10_a2ui", "r11f_g11f_b10f",
+      "bgra8"};
+  return known.count(fmt) > 0;
+}
+
 static void parse_input(csf_image_input& inp, const sajson::value& v)
 {
   std::size_t N = v.get_length();
@@ -741,7 +1330,18 @@ static void parse_input(csf_image_input& inp, const sajson::value& v)
     {
       auto val = v.get_object_value(i);
       if(val.get_type() == sajson::TYPE_STRING)
+      {
         inp.format = val.as_string();
+        if(!inp.format.empty() && !isf_is_known_image_format(inp.format))
+        {
+          fmt::print(
+              stderr,
+              "[isf] csf_image_input FORMAT \"{}\" is not a recognised GLSL "
+              "image qualifier — will fall back to rgba8 at runtime. Check "
+              "for typos (e.g. \"rgba16\" vs \"rgba16f\").\n",
+              inp.format);
+        }
+      }
     }
     else if(k == "WIDTH")
     {
@@ -798,10 +1398,90 @@ static void parse_input(csf_image_input& inp, const sajson::value& v)
     {
       auto val = v.get_object_value(i);
       if(val.get_type() == sajson::TYPE_INTEGER)
-        inp.dimensions = val.get_integer_value();
+      {
+        auto d = val.get_integer_value();
+        if(d != 2 && d != 3)
+          throw invalid_file{
+              "csf_image_input DIMENSIONS must be 2 or 3 (got " + std::to_string(d)
+              + "). 1D and 4D textures are not supported."};
+        inp.dimensions = d;
+      }
       else if(val.get_type() == sajson::TYPE_DOUBLE)
-        inp.dimensions = (int)val.get_double_value();
+      {
+        auto d = (int)val.get_double_value();
+        if(d != 2 && d != 3)
+          throw invalid_file{
+              "csf_image_input DIMENSIONS must be 2 or 3 (got " + std::to_string(d)
+              + "). 1D and 4D textures are not supported."};
+        inp.dimensions = d;
+      }
+    }
+    else if(k == "VISIBILITY")
+    {
+      auto val = v.get_object_value(i);
+      if(val.get_type() == sajson::TYPE_STRING)
+        inp.visibility = val.as_string();
+    }
+    else if(k == "PERSISTENT")
+    {
+      inp.persistent = v.get_object_value(i).get_type() == sajson::TYPE_TRUE;
+    }
+    else if(k == "GENERATE_MIPS")
+    {
+      inp.generate_mips = v.get_object_value(i).get_type() == sajson::TYPE_TRUE;
+    }
+    else if(k == "IS_ARRAY" || k == "ARRAY")
+    {
+      inp.is_array = v.get_object_value(i).get_type() == sajson::TYPE_TRUE;
     }
+    else if(k == "LAYERS")
+    {
+      auto val = v.get_object_value(i);
+      auto t = val.get_type();
+      if(t == sajson::TYPE_STRING)
+        inp.layers_expression = val.as_string();
+      else if(t == sajson::TYPE_INTEGER)
+        inp.layers_expression = std::to_string(val.get_integer_value());
+      else if(t == sajson::TYPE_DOUBLE)
+        inp.layers_expression = std::to_string(val.get_double_value());
+    }
+    else if(k == "CUBEMAP" || k == "IS_CUBE")
+    {
+      inp.cubemap = v.get_object_value(i).get_type() == sajson::TYPE_TRUE;
+    }
+  }
+
+  // See the matching note on storage_input — persistent + write_only has no
+  // useful semantics because _prev is readonly and nothing writes it.
+  if(inp.persistent && inp.access == "write_only")
+  {
+    throw invalid_file{
+        "csf_image_input declared as PERSISTENT + ACCESS: write_only is "
+        "invalid — _prev would always read zero (no read path exists to "
+        "populate it). Use ACCESS: read_write or read_only with PERSISTENT, "
+        "or drop PERSISTENT."};
+  }
+
+  // Cube-array writable images are unsupported (see sampler-side analysis in
+  // parse_auxiliary_texture / isf.hpp). Reject here so downstream allocators
+  // and the GLSL emitter can assume the combo never shows up.
+  if(inp.is_array && inp.cubemap)
+  {
+    throw invalid_file{
+        "csf_image_input: IS_ARRAY + image_cube is not supported — "
+        "imageCubeArray views are broken on every QRhi backend. Bind N "
+        "separate cubemaps or use image2DArray and do face math in the "
+        "shader."};
+  }
+  // 3D arrays do not exist as a core GLSL image type either.
+  if(inp.is_array && inp.is3D())
+  {
+    fmt::print(
+        stderr,
+        "[isf] csf_image_input: IS_ARRAY + 3D image (DIMENSIONS: 3 or DEPTH "
+        "expression) is not a valid GLSL type (image3DArray is not core). "
+        "Dropping IS_ARRAY.\n");
+    inp.is_array = false;
   }
 }
 
@@ -821,6 +1501,7 @@ static void parse_input(audioFFT_input& inp, const sajson::value& v)
       }
     }
   }
+  parse_audio_sampler_config(inp.sampler, v);
 }
 
 static void parse_input(long_input& inp, const sajson::value& v)
@@ -1010,6 +1691,13 @@ static void parse_input(Input_T& inp, const sajson::value& v)
       auto val = v.get_object_value(i);
       inp.def = parse_input_impl(val, value_type{});
     }
+    else if(k == "AS_COLOR")
+    {
+      if constexpr(requires { inp.as_color; })
+      {
+        inp.as_color = v.get_object_value(i).get_type() == sajson::TYPE_TRUE;
+      }
+    }
   }
 
   // Handle shaders without min / max
@@ -1120,6 +1808,170 @@ input parse(const sajson::value& v)
   return i;
 }
 
+// --- PIPELINE_STATE / MULTIVIEW parsing helpers ---------------------------
+
+static bool get_bool(const sajson::value& v, bool& out)
+{
+  if(v.get_type() == sajson::TYPE_TRUE) { out = true;  return true; }
+  if(v.get_type() == sajson::TYPE_FALSE){ out = false; return true; }
+  return false;
+}
+static bool get_float(const sajson::value& v, float& out)
+{
+  if(v.get_type() == sajson::TYPE_DOUBLE)  { out = (float)v.get_double_value();  return true; }
+  if(v.get_type() == sajson::TYPE_INTEGER) { out = (float)v.get_integer_value(); return true; }
+  return false;
+}
+static bool get_int(const sajson::value& v, int& out)
+{
+  if(v.get_type() == sajson::TYPE_INTEGER) { out = v.get_integer_value(); return true; }
+  if(v.get_type() == sajson::TYPE_DOUBLE)  { out = (int)v.get_double_value(); return true; }
+  return false;
+}
+static bool get_uint(const sajson::value& v, uint32_t& out)
+{
+  int x{};
+  if(get_int(v, x)) { out = (uint32_t)x; return true; }
+  return false;
+}
+static bool get_str(const sajson::value& v, std::string& out)
+{
+  if(v.get_type() == sajson::TYPE_STRING) { out = v.as_string(); return true; }
+  return false;
+}
+
+static void parse_blend_attachment(const sajson::value& v, blend_attachment& out)
+{
+  if(v.get_type() != sajson::TYPE_OBJECT)
+    return;
+  std::size_t n = v.get_length();
+  for(std::size_t i = 0; i < n; i++)
+  {
+    auto k = v.get_object_key(i).as_string();
+    auto val = v.get_object_value(i);
+    bool b{};
+    if     (k == "ENABLE"     ) { get_bool(val, b); out.enable = b; }
+    else if(k == "SRC_COLOR"  ) get_str(val, out.src_color);
+    else if(k == "DST_COLOR"  ) get_str(val, out.dst_color);
+    else if(k == "OP_COLOR"   ) get_str(val, out.op_color);
+    else if(k == "SRC_ALPHA"  ) get_str(val, out.src_alpha);
+    else if(k == "DST_ALPHA"  ) get_str(val, out.dst_alpha);
+    else if(k == "OP_ALPHA"   ) get_str(val, out.op_alpha);
+    else if(k == "COLOR_WRITE") get_str(val, out.color_write);
+    // Legacy shorter names
+    else if(k == "SRC"        ) { get_str(val, out.src_color); out.src_alpha = out.src_color; }
+    else if(k == "DST"        ) { get_str(val, out.dst_color); out.dst_alpha = out.dst_color; }
+    else if(k == "OP"         ) { get_str(val, out.op_color);  out.op_alpha  = out.op_color;  }
+  }
+}
+
+static void parse_stencil_op_state(const sajson::value& v, stencil_op_state& out)
+{
+  if(v.get_type() != sajson::TYPE_OBJECT)
+    return;
+  std::size_t n = v.get_length();
+  for(std::size_t i = 0; i < n; i++)
+  {
+    auto k = v.get_object_key(i).as_string();
+    auto val = v.get_object_value(i);
+    if     (k == "FAIL_OP"      ) get_str(val, out.fail_op);
+    else if(k == "DEPTH_FAIL_OP") get_str(val, out.depth_fail_op);
+    else if(k == "PASS_OP"      ) get_str(val, out.pass_op);
+    else if(k == "COMPARE_OP"   ) get_str(val, out.compare_op);
+    else if(k == "COMPARE"      ) get_str(val, out.compare_op);
+  }
+}
+
+static void parse_pipeline_state(const sajson::value& v, pipeline_state& out)
+{
+  if(v.get_type() != sajson::TYPE_OBJECT)
+    return;
+  std::size_t n = v.get_length();
+  for(std::size_t i = 0; i < n; i++)
+  {
+    auto k = v.get_object_key(i).as_string();
+    auto val = v.get_object_value(i);
+    bool b{};
+    float f{};
+    uint32_t u{};
+    std::string s;
+
+    if     (k == "DEPTH_TEST" )             { if(get_bool(val, b)) out.depth_test  = b; }
+    else if(k == "DEPTH_WRITE")             { if(get_bool(val, b)) out.depth_write = b; }
+    else if(k == "DEPTH_COMPARE")           { if(get_str(val, s))  out.depth_compare = s; }
+    else if(k == "DEPTH_BIAS")              { if(get_float(val, f)) out.depth_bias = f; }
+    else if(k == "SLOPE_SCALED_DEPTH_BIAS") { if(get_float(val, f)) out.slope_scaled_depth_bias = f; }
+    else if(k == "CULL_MODE")               { if(get_str(val, s))  out.cull_mode = s; }
+    else if(k == "FRONT_FACE")              { if(get_str(val, s))  out.front_face = s; }
+    else if(k == "POLYGON_MODE")            { if(get_str(val, s))  out.polygon_mode = s; }
+    else if(k == "LINE_WIDTH")              { if(get_float(val, f)) out.line_width = f; }
+    else if(k == "VERTEX_COUNT")            { if(get_uint(val, u)) out.vertex_count = u; }
+    else if(k == "INSTANCE_COUNT")          { if(get_uint(val, u)) out.instance_count = u; }
+    else if(k == "TOPOLOGY")                { if(get_str(val, s))  out.topology = s; }
+    else if(k == "BLEND")
+    {
+      // Shortcut: "BLEND": true/false turns on the default alpha-blend.
+      if(val.get_type() == sajson::TYPE_TRUE || val.get_type() == sajson::TYPE_FALSE)
+      {
+        blend_attachment a{};
+        a.enable = val.get_type() == sajson::TYPE_TRUE;
+        out.blend_all = a;
+      }
+      else if(val.get_type() == sajson::TYPE_OBJECT)
+      {
+        blend_attachment a{};
+        a.enable = true;
+        parse_blend_attachment(val, a);
+        out.blend_all = a;
+      }
+    }
+    else if(k == "BLEND_PER_ATTACHMENT")
+    {
+      if(val.get_type() == sajson::TYPE_ARRAY)
+      {
+        std::size_t m = val.get_length();
+        out.blend_per_attachment.clear();
+        out.blend_per_attachment.reserve(m);
+        for(std::size_t j = 0; j < m; j++)
+        {
+          blend_attachment a{};
+          a.enable = true;
+          parse_blend_attachment(val.get_array_element(j), a);
+          out.blend_per_attachment.push_back(a);
+        }
+      }
+    }
+    else if(k == "STENCIL_TEST")       { if(get_bool(val, b)) out.stencil_test = b; }
+    else if(k == "STENCIL_READ_MASK")  { if(get_uint(val, u)) out.stencil_read_mask = u; }
+    else if(k == "STENCIL_WRITE_MASK") { if(get_uint(val, u)) out.stencil_write_mask = u; }
+    else if(k == "STENCIL_FRONT")
+    {
+      stencil_op_state st{};
+      parse_stencil_op_state(val, st);
+      out.stencil_front = st;
+    }
+    else if(k == "STENCIL_BACK")
+    {
+      stencil_op_state st{};
+      parse_stencil_op_state(val, st);
+      out.stencil_back = st;
+    }
+    else if(k == "SHADING_RATE")
+    {
+      if(val.get_type() == sajson::TYPE_ARRAY && val.get_length() >= 2)
+      {
+        int w{}, h{};
+        if(get_int(val.get_array_element(0), w)
+           && get_int(val.get_array_element(1), h)
+           && w >= 1 && h >= 1)
+        {
+          out.shading_rate = std::array<int, 2>{w, h};
+        }
+      }
+    }
+  }
+}
+
 using root_fun = void (*)(descriptor&, const sajson::value&);
 using input_fun = input (*)(const sajson::value&);
 static const ossia::string_map<root_fun>& root_parse{[] {
@@ -1166,6 +2018,7 @@ static const ossia::string_map<root_fun>& root_parse{[] {
 
     // CSF-specific types - note: 'image' in CSF context is csf_image_input, not image_input
     i.insert({"storage", [](const auto& s) { return parse<storage_input>(s); }});
+    i.insert({"uniform", [](const auto& s) { return parse<uniform_input>(s); }});
     i.insert({"texture", [](const auto& s) { return parse<texture_input>(s); }});
     i.insert({"geometry", [](const auto& s) { return parse<geometry_input>(s); }});
 
@@ -1185,20 +2038,87 @@ static const ossia::string_map<root_fun>& root_parse{[] {
           auto k = obj.find_object_key_insensitive(sajson::literal("TYPE"));
           if(k != obj.get_length())
           {
-            std::string type_str = obj.get_object_value(k).as_string();
+            std::string type_str;
+            if(!get_str(obj.get_object_value(k), type_str))
+              continue;
             boost::algorithm::to_lower(type_str);
-            auto inp = input_parse.find(type_str);
-            if(inp != input_parse.end())
-              d.inputs.push_back((inp->second)(obj));
+
+            // "image" with ACCESS or FORMAT → storage image (csf_image_input),
+            // same as the RESOURCES section. This lets users declare storage
+            // images in INPUTS without having to move them to RESOURCES.
+            if(type_str == "image"
+               && (obj.find_object_key_insensitive(sajson::literal("ACCESS")) != obj.get_length()
+                || obj.find_object_key_insensitive(sajson::literal("FORMAT")) != obj.get_length()))
+            {
+              input inp;
+              parse_input_base(inp, obj);
+              csf_image_input ci;
+              parse_input(ci, obj);
+              inp.data = ci;
+              d.inputs.push_back(inp);
+            }
+            else
+            {
+              auto inp = input_parse.find(type_str);
+              if(inp != input_parse.end())
+                d.inputs.push_back((inp->second)(obj));
+            }
           }
           else
           {
+            // No TYPE specified — default to storage (SSBO). Matches the
+            // nested-AUXILIARY default (`aux_entry_kind`, ~L820) so the
+            // top-level INPUTS dispatcher behaves the same as nested
+            // declarations. This is the right default because:
+            //   - The dual-bind UBO/SSBO design (scene_counts etc.) is
+            //     SSBO-only after the cross-backend cleanup; readers
+            //     declare `TYPE: "storage", ACCESS: "read_only"`.
+            //   - Authors who omit TYPE on a buffer-shaped declaration
+            //     almost always mean storage, not uniform — uniforms
+            //     have a much smaller addressable subset (no runtime
+            //     arrays, std140 padding) and writers always need
+            //     storage anyway.
+            //   - The previous behaviour silently dropped the entry
+            //     without an error, so a typo'd `TYPE: "uniform"` →
+            //     missing TYPE flipped scene_counts off entirely with
+            //     no warning. Defaulting to storage means the next
+            //     stage (binding emission) will catch the misuse via
+            //     a layout/std430 check rather than a silent skip.
+            d.inputs.push_back(parse<storage_input>(obj));
           }
         }
       }
     }
   }});
 
+  // How many GLSL interface-block input/output locations a given type
+  // consumes, per GLSL 4.50 spec §4.4.1 "A matrix of sizes matM or matMxN
+  // takes M locations (one per column)". Non-matrix types consume one
+  // location. Doubles of >dvec2 width technically consume two locations
+  // each on desktop GL, but those are vanishingly rare in shader-toy-
+  // style pipelines — if anyone hits the edge they can pin LOCATION
+  // explicitly. The mat{M,MxN} cases matter because every existing
+  // preset that wants mat4 per-instance or per-vertex would otherwise
+  // have its subsequent attribute collide with column 2/3/4 of the
+  // matrix.
+  static constexpr auto locations_consumed = [](attribute_type t) noexcept -> int {
+    using A = attribute_type;
+    switch(t)
+    {
+      case A::Mat2:    case A::Mat2x3:  case A::Mat2x4:
+      case A::DMat2:   case A::DMat2x3: case A::DMat2x4:
+        return 2;
+      case A::Mat3:    case A::Mat3x2:  case A::Mat3x4:
+      case A::DMat3:   case A::DMat3x2: case A::DMat3x4:
+        return 3;
+      case A::Mat4:    case A::Mat4x2:  case A::Mat4x3:
+      case A::DMat4:   case A::DMat4x2: case A::DMat4x3:
+        return 4;
+      default:
+        return 1;
+    }
+  };
+
   static constexpr auto parse_attributes
       = []<typename T, auto member>(descriptor& d, const sajson::value& v) {
     using namespace std::literals;
@@ -1223,33 +2143,140 @@ static const ossia::string_map<root_fun>& root_parse{[] {
             }
             else if(loc_obj.get_type() == sajson::TYPE_STRING)
             {
-              // Parse as integer, e.g. "LOCATION": "3"
-              ip.location = std::stoi(loc_obj.as_string());
+              // Parse as integer, e.g. "LOCATION": "3". std::stoi throws
+              // std::invalid_argument (a logic_error, not runtime_error)
+              // on non-numeric input — catch it locally and surface a
+              // useful invalid_file message instead. The previous
+              // unguarded call escaped through the parser's outer
+              // catch(const std::runtime_error&) and either terminated
+              // (when the parser was invoked from a noexcept context;
+              // see ProcessDropHandler.cpp) or surfaced as the generic
+              // "Unknown error" via the catch(...) fallback at
+              // ShaderProgram.cpp.
               // FIXME parse standard locations from ossia::geometry_port
+              try
+              {
+                ip.location = std::stoi(loc_obj.as_string());
+              }
+              catch(const std::exception&)
+              {
+                throw invalid_file{
+                  std::string("LOCATION must be integer or numeric "
+                              "string, got: \"")
+                  + std::string(loc_obj.as_string()) + "\""};
+              }
             }
           }
 
           if(auto k = obj.find_object_key_insensitive(sajson::literal("TYPE"));
              k != obj.get_length())
           {
-            std::string type_str = obj.get_object_value(k).as_string();
-            boost::algorithm::to_lower(type_str);
-            auto inp = attribute_type_parse.find(type_str);
-            if(inp != attribute_type_parse.end())
-              ip.type = inp->second;
+            std::string type_str;
+            if(get_str(obj.get_object_value(k), type_str))
+            {
+              boost::algorithm::to_lower(type_str);
+              auto inp = attribute_type_parse.find(type_str);
+              if(inp != attribute_type_parse.end())
+                ip.type = inp->second;
+            }
           }
 
           if(auto k = obj.find_object_key_insensitive(sajson::literal("NAME"));
              k != obj.get_length())
           {
-            ip.name = obj.get_object_value(k).as_string();
+            get_str(obj.get_object_value(k), ip.name);
+          }
+
+          // SEMANTIC (only meaningful on vertex_input): explicit ossia
+          // attribute semantic name to use for upstream-buffer matching.
+          // When omitted, name is used as the semantic key. When set to
+          // "custom" the runtime falls back to NAME-based matching.
+          if(auto k = obj.find_object_key_insensitive(sajson::literal("SEMANTIC"));
+             k != obj.get_length())
+          {
+            auto val = obj.get_object_value(k);
+            if(val.get_type() == sajson::TYPE_STRING)
+              ip.semantic = val.as_string();
+          }
+
+          // Interpolation qualifier: "smooth" (default, not emitted), "flat",
+          // "noperspective", "centroid", "sample". Applies to vertex outputs
+          // and fragment inputs (no effect on vertex inputs / fragment outputs).
+          if(auto k = obj.find_object_key_insensitive(sajson::literal("INTERPOLATION"));
+             k != obj.get_length())
+          {
+            auto val = obj.get_object_value(k);
+            if(val.get_type() == sajson::TYPE_STRING)
+              ip.interpolation = val.as_string();
           }
 
-          // If LOCATION was not specified, assign sequentially
-          // FIXME maybe try to match it from the name ?
+          // REQUIRED / DEFAULT: only meaningful on vertex_input (raw raster
+          // pipeline's strictness-vs-fallback control). Silently ignored on
+          // vertex_output / fragment_input / fragment_output — their matching
+          // rules are author-owned, not upstream-dependent.
+          if constexpr (std::is_same_v<T, vertex_input>)
+          {
+            if(auto k = obj.find_object_key_insensitive(sajson::literal("REQUIRED"));
+               k != obj.get_length())
+            {
+              const auto& rv = obj.get_object_value(k);
+              if(rv.get_type() == sajson::TYPE_FALSE)
+                ip.required = false;
+              else if(rv.get_type() == sajson::TYPE_TRUE)
+                ip.required = true;
+              // Other JSON types left at default (true). No error here —
+              // strict JSON typing is already enforced upstream by sajson.
+            }
+
+            if(auto k = obj.find_object_key_insensitive(sajson::literal("DEFAULT"));
+               k != obj.get_length())
+            {
+              const auto& dv = obj.get_object_value(k);
+              if(dv.get_type() == sajson::TYPE_ARRAY)
+              {
+                const std::size_t len = dv.get_length();
+                ip.default_val.reserve(len);
+                for(std::size_t j = 0; j < len; ++j)
+                {
+                  const auto& e = dv.get_array_element(j);
+                  if(e.get_type() == sajson::TYPE_INTEGER)
+                    ip.default_val.push_back((double)e.get_integer_value());
+                  else if(e.get_type() == sajson::TYPE_DOUBLE)
+                    ip.default_val.push_back(e.get_double_value());
+                  // Non-numeric entries silently skipped — the runtime's
+                  // component-pad rule will fill missing slots with zero.
+                }
+              }
+              else if(dv.get_type() == sajson::TYPE_INTEGER)
+              {
+                // Allow a bare scalar for 1-wide types: "DEFAULT": 1
+                ip.default_val.push_back((double)dv.get_integer_value());
+              }
+              else if(dv.get_type() == sajson::TYPE_DOUBLE)
+              {
+                ip.default_val.push_back(dv.get_double_value());
+              }
+            }
+          }
+
+          // If LOCATION was not specified, assign sequentially with
+          // per-type location counts so mat3/mat4 and their rectangular
+          // cousins claim the right number of slots (matMxN consumes M
+          // consecutive locations under GLSL 4.50 §4.4.1). Previously
+          // this was `(int)(d.*member).size()` — off-by-3 the moment a
+          // shader declared any mat4 input, and the next attribute
+          // would land inside the matrix, which the driver rejects.
+          //
+          // For mixed explicit / auto layouts the cumulative-sum above
+          // can collide with a user-pinned LOCATION; that's a pre-existing
+          // policy tradeoff left untouched here — the simpler "always
+          // auto" pattern is what 99% of shipped shaders use.
           if(ip.location < 0 && !ip.name.empty())
           {
-            ip.location = (int)(d.*member).size();
+            int next_loc = 0;
+            for(const auto& prev : d.*member)
+              next_loc += locations_consumed(prev.type);
+            ip.location = next_loc;
           }
 
           if(ip.type != attribute_type::Unknown && ip.location >= 0 && !ip.name.empty())
@@ -1277,9 +2304,12 @@ static const ossia::string_map<root_fun>& root_parse{[] {
     parse_attributes.operator()<fragment_output, &descriptor::fragment_outputs>(d, v);
   }});
 
-  // Top-level AUXILIARY for RAW_RASTER_PIPELINE: SSBOs expected from upstream geometry
+  // Top-level AUXILIARY for RAW_RASTER_PIPELINE: SSBOs AND textures travelling
+  // bundled with the upstream geometry. Buffer entries (default / TYPE:
+  // "storage") land in d.auxiliary; texture entries (TYPE: "image" /
+  // "texture" / "cubemap" / "image_cube") land in d.auxiliary_textures.
   p.insert({"AUXILIARY", [](descriptor& d, const sajson::value& v) {
-    parse_auxiliary_array(v, d.auxiliary);
+    parse_auxiliary_array(v, d.auxiliary, d.auxiliary_textures);
   }});
 
   // Add RESOURCES parsing for CSF (which can contain both inputs and resources)
@@ -1296,16 +2326,22 @@ static const ossia::string_map<root_fun>& root_parse{[] {
           auto k = obj.find_object_key_insensitive(sajson::literal("TYPE"));
           if(k != obj.get_length())
           {
-            std::string type_str = obj.get_object_value(k).as_string();
+            std::string type_str;
+            if(!get_str(obj.get_object_value(k), type_str))
+              continue;
 
             boost::algorithm::to_lower(type_str);
-            // Handle special case for CSF image type
-            if(type_str == "image")
+            // Handle special cases for CSF image types
+            //   "image"      → 2D / 3D storage image (image2D / image3D)
+            //   "image_cube" → writable cubemap storage image (imageCube)
+            if(type_str == "image" || type_str == "image_cube")
             {
               input inp;
               parse_input_base(inp, obj);
               csf_image_input ci;
               parse_input(ci, obj);
+              if(type_str == "image_cube")
+                ci.cubemap = true;
               inp.data = ci;
               d.inputs.push_back(inp);
             }
@@ -1548,8 +2584,8 @@ static const ossia::string_map<root_fun>& root_parse{[] {
                = obj.find_object_key_insensitive(sajson::literal("TARGET"));
                target_k != obj.get_length())
             {
-              p.target = obj.get_object_value(target_k).as_string();
-              if(!p.target.empty())
+              if(get_str(obj.get_object_value(target_k), p.target)
+                 && !p.target.empty())
               {
                 d.pass_targets.push_back(p.target);
               }
@@ -1619,6 +2655,54 @@ static const ossia::string_map<root_fun>& root_parse{[] {
               }
             }
 
+            // LAYER: render to a specific layer of a texture-array output.
+            if(auto layer_k
+               = obj.find_object_key_insensitive(sajson::literal("LAYER"));
+               layer_k != obj.get_length())
+            {
+              int lyr{};
+              if(get_int(obj.get_object_value(layer_k), lyr))
+                p.layer = lyr;
+            }
+
+            // Z: render to a specific Z-slice of a 3D target. Stored as an
+            // expression so it can reference $USER or input sizes; resolved
+            // at render time.
+            if(auto z_k = obj.find_object_key_insensitive(sajson::literal("Z"));
+               z_k != obj.get_length())
+            {
+              auto t = obj.get_object_value(z_k).get_type();
+              if(t == sajson::TYPE_STRING)
+                p.z_expression = obj.get_object_value(z_k).as_string();
+              else if(t == sajson::TYPE_INTEGER)
+                p.z_expression
+                    = std::to_string(obj.get_object_value(z_k).get_integer_value());
+              else if(t == sajson::TYPE_DOUBLE)
+                p.z_expression
+                    = std::to_string((int)obj.get_object_value(z_k).get_double_value());
+            }
+
+            // FORMAT: override the intermediate-render-target format for
+            // this pass only. Useful for separable-filter chains where one
+            // intermediate wants extra precision (rgba16f) but the final
+            // output is RGBA8.
+            if(auto fmt_k
+               = obj.find_object_key_insensitive(sajson::literal("FORMAT"));
+               fmt_k != obj.get_length())
+            {
+              auto v2 = obj.get_object_value(fmt_k);
+              if(v2.get_type() == sajson::TYPE_STRING)
+                p.format = v2.as_string();
+            }
+
+            // PIPELINE_STATE: per-pass pipeline state overrides.
+            if(auto ps_k
+               = obj.find_object_key_insensitive(sajson::literal("PIPELINE_STATE"));
+               ps_k != obj.get_length())
+            {
+              parse_pipeline_state(obj.get_object_value(ps_k), p.override_state);
+            }
+
             d.passes.push_back(std::move(p));
           }
         }
@@ -1640,25 +2724,203 @@ static const ossia::string_map<root_fun>& root_parse{[] {
           if(auto name_k = obj.find_object_key_insensitive(sajson::literal("NAME"));
              name_k != obj.get_length())
           {
-            out.name = obj.get_object_value(name_k).as_string();
+            get_str(obj.get_object_value(name_k), out.name);
           }
 
           if(auto type_k = obj.find_object_key_insensitive(sajson::literal("TYPE"));
              type_k != obj.get_length())
           {
-            out.type = obj.get_object_value(type_k).as_string();
+            get_str(obj.get_object_value(type_k), out.type);
           }
 
           // Default type to "color" if not specified
           if(out.type.empty())
             out.type = "color";
 
+          // LAYERS: >1 allocates a texture array with this many layers.
+          if(auto layers_k = obj.find_object_key_insensitive(sajson::literal("LAYERS"));
+             layers_k != obj.get_length())
+          {
+            int l{};
+            if(get_int(obj.get_object_value(layers_k), l) && l > 0)
+              out.layers = l;
+          }
+
+          // DEPTH: >1 allocates a 3D texture with this depth. Passes targeting
+          // this output can specify Z to write into a specific slice.
+          if(auto depth_k = obj.find_object_key_insensitive(sajson::literal("DEPTH"));
+             depth_k != obj.get_length())
+          {
+            int d_val{};
+            if(get_int(obj.get_object_value(depth_k), d_val) && d_val > 0)
+              out.depth = d_val;
+          }
+
+          // FORMAT: optional explicit texture format (e.g. "rgba16f", "r32f", "d32f").
+          if(auto fmt_k = obj.find_object_key_insensitive(sajson::literal("FORMAT"));
+             fmt_k != obj.get_length())
+          {
+            auto v2 = obj.get_object_value(fmt_k);
+            if(v2.get_type() == sajson::TYPE_STRING)
+              out.format = v2.as_string();
+          }
+
+          // SAMPLES: MSAA sample count (1, 2, 4, 8, 16, ...).
+          if(auto s_k = obj.find_object_key_insensitive(sajson::literal("SAMPLES"));
+             s_k != obj.get_length())
+          {
+            int s{};
+            if(get_int(obj.get_object_value(s_k), s) && s >= 1)
+              out.samples = s;
+          }
+
+          // CUBEMAP: when true the layered output is allocated as a cubemap
+          // (six faces sampled via samplerCube downstream) rather than a
+          // plain 2D array. Combines with `LAYERS: 6` + `MULTIVIEW: 6` for
+          // the IBL precompute case (one draw writes all six faces of the
+          // target cube). Consumer shaders declare a matching
+          // `TYPE: "cubemap"` INPUT to read it.
+          if(auto cube_k = obj.find_object_key_insensitive(sajson::literal("CUBEMAP"));
+             cube_k != obj.get_length())
+          {
+            auto v2 = obj.get_object_value(cube_k);
+            if(v2.get_type() == sajson::TYPE_TRUE)
+              out.is_cubemap = true;
+            else if(v2.get_type() == sajson::TYPE_INTEGER)
+              out.is_cubemap = (v2.get_integer_value() != 0);
+          }
+
+          // GENERATE_MIPS: post-pass mip-chain auto-fill. Implies the
+          // MipMapped + UsedWithGenerateMips allocator flags. Runtime
+          // issues a QRhiResourceUpdateBatch::generateMips after the
+          // render loop (and after any CUBEMAP+MULTIVIEW cube-copy).
+          if(auto gm_k = obj.find_object_key_insensitive(sajson::literal("GENERATE_MIPS"));
+             gm_k != obj.get_length())
+          {
+            auto v2 = obj.get_object_value(gm_k);
+            if(v2.get_type() == sajson::TYPE_TRUE)
+              out.generate_mips = true;
+            else if(v2.get_type() == sajson::TYPE_INTEGER)
+              out.generate_mips = (v2.get_integer_value() != 0);
+          }
+
+          // WIDTH / HEIGHT: explicit offscreen target size. Integer
+          // literal (fast path) or string expression (evaluated at
+          // init time against input-image sizes / scalar ports,
+          // mirroring CSF dispatch-expression semantics). Zero /
+          // unset → fall back to renderer.state.renderSize.
+          if(auto w_k = obj.find_object_key_insensitive(sajson::literal("WIDTH"));
+             w_k != obj.get_length())
+          {
+            auto v2 = obj.get_object_value(w_k);
+            if(v2.get_type() == sajson::TYPE_INTEGER)
+              out.width = v2.get_integer_value();
+            else if(v2.get_type() == sajson::TYPE_DOUBLE)
+              out.width = (int)v2.get_double_value();
+            else if(v2.get_type() == sajson::TYPE_STRING)
+              out.width_expression = v2.as_string();
+          }
+          if(auto h_k = obj.find_object_key_insensitive(sajson::literal("HEIGHT"));
+             h_k != obj.get_length())
+          {
+            auto v2 = obj.get_object_value(h_k);
+            if(v2.get_type() == sajson::TYPE_INTEGER)
+              out.height = v2.get_integer_value();
+            else if(v2.get_type() == sajson::TYPE_DOUBLE)
+              out.height = (int)v2.get_double_value();
+            else if(v2.get_type() == sajson::TYPE_STRING)
+              out.height_expression = v2.as_string();
+          }
+
           d.outputs.push_back(std::move(out));
         }
       }
     }
   }});
 
+  p.insert({"PIPELINE_STATE", [](descriptor& d, const sajson::value& v) {
+    parse_pipeline_state(v, d.default_state);
+  }});
+
+  p.insert({"MULTIVIEW", [](descriptor& d, const sajson::value& v) {
+    if(v.get_type() == sajson::TYPE_INTEGER)
+      d.multiview_count = v.get_integer_value();
+    else if(v.get_type() == sajson::TYPE_DOUBLE)
+      d.multiview_count = (int)v.get_double_value();
+    else if(v.get_type() == sajson::TYPE_TRUE)
+      d.multiview_count = 2; // "MULTIVIEW": true => 2 views by default
+  }});
+
+  // EXECUTION_MODEL (top-level, RAW_RASTER_PIPELINE). Shape:
+  //   "EXECUTION_MODEL": {
+  //     "TYPE":   "SINGLE" | "PER_MIP" | "PER_CUBE_FACE" | "PER_LAYER" | "MANUAL",
+  //     "TARGET": "<output name>",    // PER_MIP / PER_CUBE_FACE / PER_LAYER
+  //     "COUNT":  "<expression>"      // MANUAL (int literal accepted too)
+  //   }
+  // Distinct from the per-pass EXECUTION_MODEL inside DISPATCH / PASSES
+  // (CSF compute), which lives in `dispatch_info::execution_type`.
+  p.insert({"EXECUTION_MODEL", [](descriptor& d, const sajson::value& v) {
+    if(v.get_type() != sajson::TYPE_OBJECT)
+      return;
+    if(auto type_k
+       = v.find_object_key_insensitive(sajson::literal("TYPE"));
+       type_k != v.get_length())
+    {
+      auto tv = v.get_object_value(type_k);
+      if(tv.get_type() == sajson::TYPE_STRING)
+        d.execution_model.type = tv.as_string();
+    }
+    if(auto target_k
+       = v.find_object_key_insensitive(sajson::literal("TARGET"));
+       target_k != v.get_length())
+    {
+      auto tv = v.get_object_value(target_k);
+      if(tv.get_type() == sajson::TYPE_STRING)
+        d.execution_model.target = tv.as_string();
+    }
+    if(auto count_k
+       = v.find_object_key_insensitive(sajson::literal("COUNT"));
+       count_k != v.get_length())
+    {
+      auto tv = v.get_object_value(count_k);
+      if(tv.get_type() == sajson::TYPE_STRING)
+        d.execution_model.count_expression = tv.as_string();
+      else if(tv.get_type() == sajson::TYPE_INTEGER)
+        d.execution_model.count_expression
+            = std::to_string(tv.get_integer_value());
+    }
+  }});
+
+  p.insert({"CLIP_DISTANCES", [](descriptor& d, const sajson::value& v) {
+    int n{};
+    if(get_int(v, n) && n > 0 && n <= 8)
+      d.clip_distances = n;
+  }});
+
+  p.insert({"CULL_DISTANCES", [](descriptor& d, const sajson::value& v) {
+    int n{};
+    if(get_int(v, n) && n > 0 && n <= 8)
+      d.cull_distances = n;
+  }});
+
+  p.insert({"DEPTH_LAYOUT", [](descriptor& d, const sajson::value& v) {
+    if(v.get_type() == sajson::TYPE_STRING)
+      d.depth_layout = v.as_string();
+  }});
+
+  p.insert({"EXTENSIONS", [](descriptor& d, const sajson::value& v) {
+    if(v.get_type() != sajson::TYPE_ARRAY)
+      return;
+    std::size_t n = v.get_length();
+    d.extensions.reserve(d.extensions.size() + n);
+    for(std::size_t i = 0; i < n; i++)
+    {
+      auto e = v.get_array_element(i);
+      if(e.get_type() == sajson::TYPE_STRING)
+        d.extensions.emplace_back(e.as_string());
+    }
+  }});
+
   p.insert({"POINT_COUNT", [](descriptor& d, const sajson::value& v) {
     if(v.get_type() == sajson::TYPE_INTEGER)
       d.point_count = v.get_integer_value();
@@ -1708,7 +2970,7 @@ static const ossia::string_map<root_fun>& root_parse{[] {
           auto name_key = obj.find_object_key_insensitive(sajson::literal("NAME"));
           if(name_key != obj.get_length())
           {
-            type_def.name = obj.get_object_value(name_key).as_string();
+            get_str(obj.get_object_value(name_key), type_def.name);
           }
 
           // Parse LAYOUT field
@@ -1731,7 +2993,7 @@ static const ossia::string_map<root_fun>& root_parse{[] {
                       = field_obj.find_object_key_insensitive(sajson::literal("NAME"));
                   if(field_name_key != field_obj.get_length())
                   {
-                    field.name = field_obj.get_object_value(field_name_key).as_string();
+                    get_str(field_obj.get_object_value(field_name_key), field.name);
                   }
 
                   // Parse field TYPE
@@ -1739,7 +3001,7 @@ static const ossia::string_map<root_fun>& root_parse{[] {
                       = field_obj.find_object_key_insensitive(sajson::literal("TYPE"));
                   if(field_type_key != field_obj.get_length())
                   {
-                    field.type = field_obj.get_object_value(field_type_key).as_string();
+                    get_str(field_obj.get_object_value(field_type_key), field.type);
                   }
 
                   type_def.layout.push_back(field);
@@ -1757,6 +3019,18 @@ static const ossia::string_map<root_fun>& root_parse{[] {
   return p;
 }()};
 
+// A non-empty compare op different from "never" turns the sampler into a
+// shadow/comparison sampler. Mirrors QRhiSampler::CompareOp interpretation.
+static bool isf_is_comparison_sampler(const sampler_config& s)
+{
+  if(s.compare.empty())
+    return false;
+  std::string c = s.compare;
+  for(auto& ch : c) ch = (char)tolower(ch);
+  return c != "never";
+}
+
+
 struct create_val_visitor_450
 {
   struct return_type
@@ -1771,14 +3045,43 @@ struct create_val_visitor_450
   return_type operator()(const point2d_input&) { return {"vec2", false}; }
   return_type operator()(const point3d_input&) { return {"vec3", false}; }
   return_type operator()(const color_input&) { return {"vec4", false}; }
-  return_type operator()(const image_input& i) { return {i.dimensions == 3 ? "uniform sampler3D" : "uniform sampler2D", true}; }
-  return_type operator()(const cubemap_input&) { return {"uniform samplerCube", true}; }
+  return_type operator()(const image_input& i)
+  {
+    const bool cmp = isf_is_comparison_sampler(i.sampler);
+    if(i.dimensions == 3)
+      return {"uniform sampler3D", true}; // 3D shadow samplers not commonly used
+    if(i.is_array)
+      return {cmp ? "uniform sampler2DArrayShadow" : "uniform sampler2DArray", true};
+    return {cmp ? "uniform sampler2DShadow" : "uniform sampler2D", true};
+  }
+  return_type operator()(const cubemap_input& c)
+  {
+    return {isf_is_comparison_sampler(c.sampler) ? "uniform samplerCubeShadow"
+                                                 : "uniform samplerCube",
+            true};
+  }
   return_type operator()(const audio_input&) { return {"uniform sampler2D", true}; }
   return_type operator()(const audioFFT_input&) { return {"uniform sampler2D", true}; }
   return_type operator()(const audioHist_input&) { return {"uniform sampler2D", true}; }
   return_type operator()(const storage_input&) { return {"buffer", true}; }
-  return_type operator()(const texture_input& i) { return {i.dimensions == 3 ? "uniform sampler3D" : "uniform sampler2D", true}; }
-  return_type operator()(const csf_image_input& i) { return {i.is3D() ? "uniform image3D" : "uniform image2D", true}; }
+  return_type operator()(const uniform_input&) { return {"uniform", true}; }
+  return_type operator()(const texture_input& i)
+  {
+    const bool cmp = isf_is_comparison_sampler(i.sampler);
+    if(i.dimensions == 3)
+      return {"uniform sampler3D", true};
+    return {cmp ? "uniform sampler2DShadow" : "uniform sampler2D", true};
+  }
+  return_type operator()(const csf_image_input& i)
+  {
+    if(i.isCube())
+      return {"uniform imageCube", true};
+    if(i.is3D())
+      return {"uniform image3D", true};
+    if(i.is_array)
+      return {"uniform image2DArray", true};
+    return {"uniform image2D", true};
+  }
   return_type operator()(const geometry_input&) { return {"buffer", true}; }
 };
 
@@ -1942,6 +3245,251 @@ void parser::parse_geometry_filter()
   m_geometry_filter = filter_ubo + geomWithoutISF + "\n";
 }
 
+// --- GLSL helpers for graphics-visible storage resources ----------------
+//
+// Derive GLSL image/sampler prefix from a format string.
+// Unsigned integer formats (R32UI, RGBA16UI, ...) → "u"
+// Signed integer formats (R32I, RGBA16I, ...) → "i"
+// Float/unorm formats (R32F, RGBA8, ...) → ""
+static std::string isf_glsl_type_prefix(const std::string& format)
+{
+  if(format.empty())
+    return "";
+  std::string fmt = format;
+  for(auto& c : fmt) c = (char)toupper(c);
+  if(fmt.find("UI") != std::string::npos)
+    return "u";
+  if(fmt.size() >= 2 && fmt.back() == 'I' && fmt[fmt.size() - 2] != 'U')
+    return "i";
+  return "";
+}
+
+// Returns true when the visibility string indicates this resource should be
+// declared in a graphics pipeline (vertex or fragment stage).
+static bool is_graphics_visibility(std::string_view vis)
+{
+  return vis == "fragment" || vis == "vertex" || vis == "vertex+fragment"
+         || vis == "both" || vis == "graphics";
+}
+
+// Emit GLSL `struct <name> { <fields...> };` declarations from the TYPES
+// section. Must be injected BEFORE any SSBO/UBO body that references the
+// struct, in BOTH vertex and fragment stages — otherwise scene shaders that
+// declare e.g. `Light` and use `readonly buffer { Light entries[]; }` fail
+// VS compilation when the SSBO leaks into a vertex pipeline that never
+// included the struct (the fragment-only TYPES emission was the long-standing
+// bug here). The compute path has its own copy of this logic at
+// parse_compute_shader; this helper is shared by parse_isf and
+// parse_raw_raster_pipeline.
+static std::string isf_emit_types_struct(const std::vector<descriptor::type_definition>& types)
+{
+  if(types.empty())
+    return {};
+
+  std::string out;
+  out += "// Struct definitions from TYPES section\n";
+  for(const auto& type_def : types)
+  {
+    out += "struct " + type_def.name + " {\n";
+    for(const auto& field : type_def.layout)
+    {
+      auto bracket = field.type.find('[');
+      if(bracket != std::string::npos)
+        out += "    " + field.type.substr(0, bracket) + " " + field.name
+               + field.type.substr(bracket) + ";\n";
+      else
+        out += "    " + field.type + " " + field.name + ";\n";
+    }
+    out += "};\n\n";
+  }
+  return out;
+}
+
+static std::string isf_emit_ssbo_decl(
+    int binding, std::string_view name, const storage_input& s, bool alias_prev)
+{
+  std::string out;
+  out += "layout(binding = ";
+  out += std::to_string(binding);
+  out += ", std430) ";
+  if(alias_prev || s.access == "read_only")
+    out += "readonly ";
+  else if(s.access == "write_only")
+    out += "writeonly ";
+  else
+    out += "restrict ";
+  out += "buffer ";
+  out += name;
+  out += "_buf {\n";
+  for(const auto& field : s.layout)
+  {
+    auto bracket = field.type.find('[');
+    if(bracket != std::string::npos)
+      out += "    " + field.type.substr(0, bracket) + " " + field.name
+             + field.type.substr(bracket) + ";\n";
+    else
+      out += "    " + field.type + " " + field.name + ";\n";
+  }
+  out += "} ";
+  out += name;
+  out += ";\n\n";
+  return out;
+}
+
+static std::string isf_emit_ubo_decl(
+    int binding, std::string_view name, const uniform_input& u)
+{
+  std::string out;
+  out += "layout(binding = ";
+  out += std::to_string(binding);
+  out += ", std140) uniform ";
+  out += name;
+  out += "_t {\n";
+  for(const auto& field : u.layout)
+  {
+    auto bracket = field.type.find('[');
+    if(bracket != std::string::npos)
+      out += "    " + field.type.substr(0, bracket) + " " + field.name
+             + field.type.substr(bracket) + ";\n";
+    else
+      out += "    " + field.type + " " + field.name + ";\n";
+  }
+  out += "} ";
+  out += name;
+  out += ";\n\n";
+  return out;
+}
+
+static std::string isf_emit_image_decl(
+    int binding, std::string_view name, const csf_image_input& img,
+    bool alias_prev = false)
+{
+  std::string out;
+  out += "layout(binding = ";
+  out += std::to_string(binding);
+  std::string fmt = img.format.empty() ? "rgba8" : img.format;
+  boost::algorithm::to_lower(fmt);
+  out += ", ";
+  out += fmt;
+  out += ") ";
+  if(alias_prev || img.access == "read_only")
+    out += "readonly ";
+  else if(img.access == "write_only")
+    out += "writeonly ";
+  else
+    out += "restrict ";
+  auto prefix = isf_glsl_type_prefix(img.format);
+  out += "uniform ";
+  out += prefix;
+  // Shape dispatch must mirror the compute-stage emit at isf_emit_compute_-
+  // image_decl below: parser admits CUBEMAP / IS_ARRAY / 3D shapes; the
+  // bound texture's QRhi flags must agree with the GLSL declaration.
+  // Cube and array variants on graphics-stage csf_image_input were
+  // previously emitted as flat image2D, mismatching the cube/array texture
+  // bound by IsfBindingsBuilder's allocator and triggering Vulkan
+  // VUID-VkGraphicsPipelineCreateInfo-layout-07990.
+  // Priority: cubemap > 3D > array > 2D (matches the parser's own reject
+  // table at isf.cpp:1446-1463 which forbids cube+array and array+3D).
+  const char* shape = "image2D ";
+  if(img.isCube())      shape = "imageCube ";
+  else if(img.is3D())   shape = "image3D ";
+  else if(img.is_array) shape = "image2DArray ";
+  out += shape;
+  out += name;
+  out += ";\n";
+  return out;
+}
+
+// Emit declarations for storage_input / csf_image_input inputs for a graphics
+// shader (ISF or RawRaster). Starts at `binding`, returns the next free binding.
+// Also emits `name_prev` readonly declarations for persistent SSBOs.
+static int isf_emit_graphics_storage(
+    std::string& out, int binding, const std::vector<input>& inputs)
+{
+  for(const auto& inp : inputs)
+  {
+    if(auto* s = ossia::get_if<storage_input>(&inp.data))
+    {
+      if(!is_graphics_visibility(s->visibility))
+        continue;
+      // Indirect-draw buffers don't need shader visibility.
+      if(!s->buffer_usage.empty())
+        continue;
+      out += isf_emit_ssbo_decl(binding, inp.name, *s, /*alias_prev=*/false);
+      binding++;
+      if(s->persistent)
+      {
+        out += isf_emit_ssbo_decl(
+            binding, inp.name + "_prev", *s, /*alias_prev=*/true);
+        binding++;
+      }
+    }
+    else if(auto* img = ossia::get_if<csf_image_input>(&inp.data))
+    {
+      if(!is_graphics_visibility(img->visibility))
+        continue;
+      out += isf_emit_image_decl(binding, inp.name, *img, /*alias_prev=*/false);
+      binding++;
+      if(img->persistent)
+      {
+        out += isf_emit_image_decl(
+            binding, inp.name + "_prev", *img, /*alias_prev=*/true);
+        binding++;
+      }
+    }
+    else if(auto* u = ossia::get_if<uniform_input>(&inp.data))
+    {
+      if(!is_graphics_visibility(u->visibility))
+        continue;
+      out += isf_emit_ubo_decl(binding, inp.name, *u);
+      binding++;
+    }
+  }
+  return binding;
+}
+
+// The #extension pragma must come BEFORE any declarations — emit it separately
+// so it can be prepended right after #version.
+static std::string isf_emit_multiview_extension(int view_count)
+{
+  std::string out;
+  out += "#extension GL_EXT_multiview : require\n";
+  out += "#define VIEW_INDEX gl_ViewIndex\n";
+  out += "#define NUM_VIEWS ";
+  out += std::to_string(view_count);
+  out += "\n";
+  return out;
+}
+
+// User-declared EXTENSIONS from the descriptor. Emitted alongside the
+// multiview extension, each as `#extension <name> : require`. Advanced
+// effects (subgroup ops, atomic floats, ray queries, …) go through here.
+static std::string isf_emit_user_extensions(const std::vector<std::string>& exts)
+{
+  std::string out;
+  for(const auto& e : exts)
+  {
+    if(e.empty())
+      continue;
+    out += "#extension ";
+    out += e;
+    out += " : require\n";
+  }
+  return out;
+}
+
+// Emit the multiview view-projection UBO.
+static std::string isf_emit_multiview_ubo(int binding, int view_count)
+{
+  std::string out;
+  out += "layout(std140, binding = ";
+  out += std::to_string(binding);
+  out += ") uniform multiview_t { mat4 viewProjection[";
+  out += std::to_string(view_count);
+  out += "]; } isf_mv;\n";
+  return out;
+}
+
 void parser::parse_isf()
 {
   using namespace std::literals;
@@ -1960,6 +3508,35 @@ void parser::parse_isf()
     m_desc.passes.push_back(isf::pass{});
   }
 
+  // Fragment-mode ISF cannot drive PASSES that target a 3D / Z-sliced
+  // OUTPUT: that requires per-Z-slice color attachments / 3D image
+  // storage plumbing through the pass-target allocator and the
+  // beginPass site, which the RenderedISFNode renderer does not yet
+  // wire end-to-end. Authors should use a CSF compute shader
+  // (EXECUTION_MODEL: 3D_IMAGE) for true volumetric writes; refusing
+  // to load here is loud and prevents a silent 2D downgrade that
+  // would make every imageStore / fragment write target the wrong
+  // memory.
+  for(const auto& pass : m_desc.passes)
+  {
+    bool target_is_3d = false;
+    for(const auto& out : m_desc.outputs)
+    {
+      if(out.name == pass.target && out.depth > 1)
+      {
+        target_is_3d = true;
+        break;
+      }
+    }
+    if(!pass.z_expression.empty() || target_is_3d)
+    {
+      throw invalid_file{
+          "fragment-mode ISF with PASSES targeting Z / 3D OUTPUTS is not "
+          "yet supported in this engine — use CSF compute "
+          "(EXECUTION_MODEL: 3D_IMAGE) for volumetric writes."};
+    }
+  }
+
   auto& d = m_desc;
 
   // We start from empty strings.
@@ -1972,9 +3549,17 @@ void parser::parse_isf()
   switch(m_version)
   {
     case 450: {
+      // Extensions pragma block — must come right after #version, before
+      // any layout/uniform/in/out declarations.
+      std::string extensions_prelude;
+      if(d.multiview_count >= 2)
+        extensions_prelude += isf_emit_multiview_extension(d.multiview_count);
+      extensions_prelude += isf_emit_user_extensions(d.extensions);
+
       // Setup vertex shader
       {
         m_vertex = GLSL45.versionPrelude;
+        m_vertex += extensions_prelude;
 
         if(m_sourceVertex.empty())
         {
@@ -1990,6 +3575,18 @@ void parser::parse_isf()
       {
         // Setup fragment shader
         m_fragment = GLSL45.versionPrelude;
+        m_fragment += extensions_prelude;
+
+        // LAYER_INDEX for layered / multi-layer outputs: the vertex shader writes
+        // to gl_Layer and the fragment shader receives it via a flat varying.
+        bool has_layered_output = (d.multiview_count >= 2);
+        for(const auto& out : d.outputs)
+          if(out.layers > 1)
+            has_layered_output = true;
+        if(has_layered_output)
+        {
+          m_fragment += "#define LAYER_INDEX gl_Layer\n";
+        }
 
         if(d.outputs.empty())
         {
@@ -2027,11 +3624,34 @@ void parser::parse_isf()
             }
           }
         }
+
+        // Conservative-depth qualifier on gl_FragDepth (ISF path).
+        if(!d.depth_layout.empty())
+        {
+          std::string dl = d.depth_layout;
+          for(auto& c : dl) c = (char)tolower(c);
+          const char* q = nullptr;
+          if(dl == "greater")        q = "depth_greater";
+          else if(dl == "less")      q = "depth_less";
+          else if(dl == "unchanged") q = "depth_unchanged";
+          else if(dl == "any")       q = "depth_any";
+          if(q)
+          {
+            m_fragment += "layout(";
+            m_fragment += q;
+            m_fragment += ") out float gl_FragDepth;\n";
+          }
+        }
       }
 
       // Setup the parameters UBOs
       std::string material_ubos = GLSL45.defaultUniforms;
 
+      // TYPES section structs must be visible in BOTH stages because SSBO
+      // declarations referencing them (e.g. `Light entries[]`) are appended
+      // to material_ubos, which is in turn injected into both VS and FS.
+      material_ubos += isf_emit_types_struct(d.types);
+
       int sampler_binding = 3;
 
       if(!d.inputs.empty() || !d.pass_targets.empty())
@@ -2043,6 +3663,14 @@ void parser::parse_isf()
         uniforms += "layout(std140, binding = 2) uniform material_t {\n";
         for(const isf::input& val : d.inputs)
         {
+          // Storage buffers / storage images are declared separately after
+          // samplers — skip them here to avoid emitting invalid GLSL.
+          if(ossia::get_if<isf::storage_input>(&val.data)
+             || ossia::get_if<isf::csf_image_input>(&val.data)
+             || ossia::get_if<isf::geometry_input>(&val.data)
+             || ossia::get_if<isf::uniform_input>(&val.data))
+            continue;
+
           auto [type, isSampler] = ossia::visit(create_val_visitor_450{}, val.data);
 
           if(isSampler)
@@ -2069,6 +3697,18 @@ void parser::parse_isf()
                 sampler_binding++;
               }
             }
+            else if(auto* cube = ossia::get_if<isf::cubemap_input>(&val.data))
+            {
+              if(cube->depth)
+              {
+                samplers += "layout(binding = ";
+                samplers += std::to_string(sampler_binding);
+                samplers += ") uniform samplerCube ";
+                samplers += val.name;
+                samplers += "_depth;\n";
+                sampler_binding++;
+              }
+            }
           }
           else
           {
@@ -2088,8 +3728,25 @@ void parser::parse_isf()
           }
         }
 
+        // Pass targets are bound as sampler2D for cross-pass reads. Two
+        // independent dedup checks:
+        //   1) the same TARGET can appear in multiple PASSES entries (e.g.
+        //      LAYERS where each layer is a pass writing to the same target)
+        //      — we must only emit one sampler per distinct name.
+        //   2) a TARGET may also appear as a FRAGMENT_OUTPUT for the current
+        //      pass (typical for OUTPUTS with LAYERS) — those collide with
+        //      the `out vec4 <name>;` declaration emitted above and would
+        //      cause "redefinition" at GLSL compile time.
+        std::set<std::string> output_names;
+        for(const auto& out : d.outputs)
+          output_names.insert(out.name);
+        std::set<std::string> emitted_targets;
         for(const std::string& target : d.pass_targets)
         {
+          if(output_names.count(target))
+            continue;
+          if(!emitted_targets.insert(target).second)
+            continue;
           samplers += "layout(binding = ";
           samplers += std::to_string(sampler_binding);
           samplers += ") uniform sampler2D ";
@@ -2110,6 +3767,21 @@ void parser::parse_isf()
         }
 
         material_ubos += samplers;
+
+        // Storage buffers (SSBOs) and storage images visible to the graphics
+        // pipeline. Bindings continue after samplers.
+        sampler_binding = isf_emit_graphics_storage(
+            material_ubos, sampler_binding, d.inputs);
+
+        // Multiview UBO: injected when MULTIVIEW >= 2 in the descriptor.
+        // Only the UBO here — the #extension pragma must come right after
+        // #version, so it's emitted separately below.
+        if(d.multiview_count >= 2)
+        {
+          material_ubos += isf_emit_multiview_ubo(
+              sampler_binding, d.multiview_count);
+          sampler_binding++;
+        }
       }
 
       m_vertex += material_ubos;
@@ -2159,6 +3831,17 @@ void parser::parse_raw_raster_pipeline()
 
   m_desc.mode = isf::descriptor::RawRaster;
 
+  // If FRAGMENT_OUTPUTS declares multiple outputs but OUTPUTS was not
+  // explicitly provided, auto-populate desc.outputs so the node graph
+  // creates the right number of output ports (one per attachment).
+  if(m_desc.outputs.empty() && m_desc.fragment_outputs.size() > 1)
+  {
+    for(const auto& fo : m_desc.fragment_outputs)
+    {
+      m_desc.outputs.push_back(output_declaration{.name = fo.name, .type = "color"});
+    }
+  }
+
   // Add the raw raster uniforms
   {
     static const auto default_ins = [] {
@@ -2240,8 +3923,56 @@ void parser::parse_raw_raster_pipeline()
   m_vertex = GLSL45.versionPrelude;
   m_fragment = GLSL45.versionPrelude;
 
+  // Extensions pragma block — must come right after #version.
+  // GL_ARB_shader_draw_parameters exposes gl_BaseInstance / gl_BaseVertex /
+  // gl_DrawIDARB in the vertex shader. Required by MDI shaders that index
+  // per-draw data (per_draws[gl_BaseInstance], etc.). Harmless when unused.
+  m_vertex += "#extension GL_ARB_shader_draw_parameters : require\n";
+
+  if(m_desc.multiview_count >= 2)
+  {
+    std::string ext = isf_emit_multiview_extension(m_desc.multiview_count);
+    m_vertex += ext;
+    m_fragment += ext;
+  }
+
+  {
+    std::string user_ext = isf_emit_user_extensions(m_desc.extensions);
+    m_vertex += user_ext;
+    m_fragment += user_ext;
+  }
+
+  // LAYER_INDEX for layered outputs.
+  {
+    bool has_layered_output = (m_desc.multiview_count >= 2);
+    for(const auto& out : m_desc.outputs)
+      if(out.layers > 1)
+        has_layered_output = true;
+    if(has_layered_output)
+      m_fragment += "#define LAYER_INDEX gl_Layer\n";
+  }
+
   // Write down the inputs / outputs
   {
+    // Integer / boolean types require the `flat` interpolation qualifier on
+    // varyings (VERTEX_OUTPUTS → FRAGMENT_INPUTS). Without it, Vulkan GLSL
+    // compilation fails: "'uint' : must be qualified as flat in".
+    auto needs_flat = [](attribute_type t) {
+      return (t >= attribute_type::Int && t <= attribute_type::Uint4)
+          || (t >= attribute_type::Bool && t <= attribute_type::Bool4);
+    };
+
+    // Interpolation qualifier for a varying: user-specified (if valid) wins
+    // over the auto "flat" promotion for integer/bool types.
+    auto interp_qualifier = [&](const vertex_attribute& a) -> const char* {
+      if(a.interpolation == "flat") return "flat";
+      if(a.interpolation == "noperspective") return "noperspective";
+      if(a.interpolation == "centroid") return "centroid";
+      if(a.interpolation == "sample") return "sample";
+      if(a.interpolation == "smooth") return ""; // default, no keyword needed
+      return needs_flat(a.type) ? "flat" : "";
+    };
+
     // Vertex
     for(auto& attr : m_desc.vertex_inputs)
       m_vertex += fmt::format(
@@ -2249,22 +3980,56 @@ void parser::parse_raw_raster_pipeline()
           attribute_type_map.at((int)attr.type), attr.name);
     for(auto& attr : m_desc.vertex_outputs)
       m_vertex += fmt::format(
-          "layout(location = {}) out {} {};\n", attr.location,
+          "layout(location = {}) {} out {} {};\n", attr.location,
+          interp_qualifier(attr),
           attribute_type_map.at((int)attr.type), attr.name);
 
     for(auto& attr : m_desc.fragment_inputs)
       m_fragment += fmt::format(
-          "layout(location = {}) in {} {};\n", attr.location,
+          "layout(location = {}) {} in {} {};\n", attr.location,
+          interp_qualifier(attr),
           attribute_type_map.at((int)attr.type), attr.name);
     for(auto& attr : m_desc.fragment_outputs)
       m_fragment += fmt::format(
           "layout(location = {}) out {} {};\n", attr.location,
           attribute_type_map.at((int)attr.type), attr.name);
+
+    // Clip / cull distances: user-declared count controls the size of the
+    // gl_ClipDistance / gl_CullDistance arrays. Required on some GLSL
+    // profiles; always explicit on Vulkan GLSL.
+    if(m_desc.clip_distances > 0)
+      m_vertex += fmt::format(
+          "out float gl_ClipDistance[{}];\n", m_desc.clip_distances);
+    if(m_desc.cull_distances > 0)
+      m_vertex += fmt::format(
+          "out float gl_CullDistance[{}];\n", m_desc.cull_distances);
+
+    // Conservative-depth qualifier on gl_FragDepth. Allowed values map to
+    // GLSL layout qualifiers: greater/less/unchanged/any.
+    if(!m_desc.depth_layout.empty())
+    {
+      std::string dl = m_desc.depth_layout;
+      for(auto& c : dl) c = (char)tolower(c);
+      const char* q = nullptr;
+      if(dl == "greater")        q = "depth_greater";
+      else if(dl == "less")      q = "depth_less";
+      else if(dl == "unchanged") q = "depth_unchanged";
+      else if(dl == "any")       q = "depth_any";
+      if(q)
+        m_fragment += fmt::format(
+            "layout({}) out float gl_FragDepth;\n", q);
+    }
   }
   {
     // Setup the parameters UBOs
     std::string material_ubos = GLSL45.defaultUniforms;
 
+    // TYPES section structs visible in BOTH stages — see the matching emit
+    // in parse_isf for the rationale (SSBO bodies referencing user structs
+    // leak into VS via material_ubos and previously failed to compile when
+    // VISIBILITY was fragment-only).
+    material_ubos += isf_emit_types_struct(d.types);
+
     int sampler_binding = 3;
 
     if(!d.inputs.empty())
@@ -2276,6 +4041,44 @@ void parser::parse_raw_raster_pipeline()
       uniforms += "layout(std140, binding = 2) uniform material_t {\n";
       for(const isf::input& val : d.inputs)
       {
+        // Storage buffers / storage images / geometry inputs / UBOs are declared
+        // separately after samplers. BUT their synthesized host-side size ints
+        // (storage flex-array size, geometry $USER counts) ARE packed into this
+        // material blob, so they must be declared here too — otherwise every
+        // uniform after them reads shifted. Mirrors the CSF Params block.
+        if(auto* storage = ossia::get_if<isf::storage_input>(&val.data))
+        {
+          if(storage->access.find("write") != std::string::npos
+             && !storage->layout.empty()
+             && storage->layout.back().type.find("[]") != std::string::npos)
+          {
+            num_uniform++;
+            uniforms += "int " + val.name + "_size;\n";
+            globalvars += "int " + val.name + "_size = isf_material_uniforms."
+                          + val.name + "_size;\n";
+          }
+          continue;
+        }
+        if(auto* geo = ossia::get_if<isf::geometry_input>(&val.data))
+        {
+          auto emit_synth_int = [&](const std::string& nm) {
+            num_uniform++;
+            uniforms += "int " + nm + ";\n";
+            globalvars += "int " + nm + " = isf_material_uniforms." + nm + ";\n";
+          };
+          if(geo->vertex_count.find("$USER") != std::string::npos)
+            emit_synth_int(val.name + "_vertex_count");
+          if(geo->instance_count.find("$USER") != std::string::npos)
+            emit_synth_int(val.name + "_instance_count");
+          for(const auto& aux : geo->auxiliary)
+            if(aux.size.find("$USER") != std::string::npos)
+              emit_synth_int(val.name + "_" + aux.name + "_size");
+          continue;
+        }
+        if(ossia::get_if<isf::csf_image_input>(&val.data)
+           || ossia::get_if<isf::uniform_input>(&val.data))
+          continue;
+
         auto [type, isSampler] = ossia::visit(create_val_visitor_450{}, val.data);
 
         if(isSampler)
@@ -2302,6 +4105,18 @@ void parser::parse_raw_raster_pipeline()
               sampler_binding++;
             }
           }
+          else if(auto* cube = ossia::get_if<isf::cubemap_input>(&val.data))
+          {
+            if(cube->depth)
+            {
+              samplers += "layout(binding = ";
+              samplers += std::to_string(sampler_binding);
+              samplers += ") uniform samplerCube ";
+              samplers += val.name;
+              samplers += "_depth;\n";
+              sampler_binding++;
+            }
+          }
         }
         else
         {
@@ -2337,39 +4152,153 @@ void parser::parse_raw_raster_pipeline()
       material_ubos += samplers;
     }
 
+    // Storage buffers (SSBOs) and storage images declared via INPUTS with
+    // TYPE=storage or TYPE=image (visible to graphics stages).
+    sampler_binding = isf_emit_graphics_storage(
+        material_ubos, sampler_binding, d.inputs);
+
     // Auxiliary SSBOs (from top-level AUXILIARY key)
     std::string ssbo_decls;
-    for(const auto& aux : d.auxiliary)
-    {
-      ssbo_decls += "layout(binding = " + std::to_string(sampler_binding) + ", std430) ";
 
-      if(aux.access == "read_only")
-        ssbo_decls += "readonly ";
-      else if(aux.access == "write_only")
-        ssbo_decls += "writeonly ";
+    // Emit a single buffer block for an auxiliary. `qualifier` is the std430
+    // access qualifier ("readonly" / "writeonly" / "restrict") and `var` is
+    // the variable name (differs from `aux.name` for the _prev ping-pong
+    // slot).
+    auto emit_aux_block
+        = [&](const geometry_input::auxiliary_request& aux, int binding,
+              const char* qualifier, const std::string& var) {
+      if(aux.is_uniform)
+      {
+        // std140 UBO: no access qualifier (UBOs are inherently read-only
+        // from GLSL), `uniform` instead of `buffer`.
+        ssbo_decls += "layout(std140, binding = " + std::to_string(binding) + ") uniform ";
+      }
       else
-        ssbo_decls += "restrict ";
-
-      ssbo_decls += "buffer " + aux.name + "_buf {\n";
+      {
+        ssbo_decls += "layout(binding = " + std::to_string(binding) + ", std430) ";
+        ssbo_decls += qualifier;
+        ssbo_decls += " buffer ";
+      }
+      ssbo_decls += var;
+      ssbo_decls += "_buf {\n";
       for(const auto& field : aux.layout)
       {
-        // Handle array types: "vec4[512]" → "vec4 entries[512];"
         auto bracket = field.type.find('[');
         if(bracket != std::string::npos)
-        {
           ssbo_decls += "    " + field.type.substr(0, bracket) + " " + field.name
                         + field.type.substr(bracket) + ";\n";
-        }
         else
-        {
           ssbo_decls += "    " + field.type + " " + field.name + ";\n";
+      }
+      ssbo_decls += "} ";
+      ssbo_decls += var;
+      ssbo_decls += ";\n\n";
+    };
+
+    for(const auto& aux : d.auxiliary)
+    {
+      const char* access_qualifier
+          = (aux.access == "read_only")  ? "readonly"
+          : (aux.access == "write_only") ? "writeonly"
+                                         : "restrict";
+
+      // Persistent ping-pong only makes sense for writable SSBOs. UBOs
+      // declared persistent silently fall back to a single-block decl
+      // (the flag is ignored by the runtime allocator on the UBO path).
+      if(aux.persistent && !aux.is_uniform)
+      {
+        // Ping-pong pair: _prev is the previous frame's read-only snapshot,
+        // <name> is the current frame's writable buffer. Runtime swaps
+        // the two buffer pointers each frame.
+        emit_aux_block(aux, sampler_binding, "readonly", aux.name + "_prev");
+        sampler_binding++;
+        emit_aux_block(aux, sampler_binding, access_qualifier, aux.name);
+        sampler_binding++;
+      }
+      else
+      {
+        emit_aux_block(aux, sampler_binding, access_qualifier, aux.name);
+        sampler_binding++;
+      }
+    }
+    material_ubos += ssbo_decls;
+
+    // Auxiliary textures (from top-level AUXILIARY with TYPE: image /
+    // texture / cubemap / image_cube / storage_*). No input port; the
+    // renderer resolves them from ossia::geometry::auxiliary_textures
+    // by name. Sampled textures emit `sampler*` decls with texture()
+    // semantics; storage images emit `image*` decls with imageLoad /
+    // imageStore semantics.
+    std::string aux_tex_decls;
+    for(const auto& atx : d.auxiliary_textures)
+    {
+      if(atx.is_storage)
+      {
+        // Storage image: imageLoad/Store target. FORMAT layout qualifier
+        // is mandatory on writable images; defaults to rgba8.
+        // Cube-arrays are parser-rejected so no imageCubeArray branch.
+        const char* image_type = "image2D";
+        if(atx.is_cubemap)                 image_type = "imageCube";
+        else if(atx.dimensions == 3)       image_type = "image3D";
+        else if(atx.is_array)              image_type = "image2DArray";
+
+        const char* access_q =
+            (atx.access == "read_only") ? "readonly " :
+            (atx.access == "write_only") ? "writeonly " : "";
+
+        // Integer formats (r32ui, r32i, rgba32ui, …) require the
+        // `uimage*` / `iimage*` GLSL variants — the bare `image*` type
+        // paired with an integer layout qualifier is a compile error.
+        // Reuses the same prefix helper csf_image_input declarations
+        // already use, so float / int / uint emission stays consistent
+        // across the rasterizer-aux and csf-input code paths.
+        std::string scalar_prefix = isf_glsl_type_prefix(atx.format);
+
+        aux_tex_decls += "layout(binding = " + std::to_string(sampler_binding)
+                         + ", " + atx.format + ") uniform " + access_q
+                         + scalar_prefix + image_type + " "
+                         + atx.name + ";\n";
+        sampler_binding++;
+      }
+      else
+      {
+        const bool cmp = isf_is_comparison_sampler(atx.sampler);
+        const char* sampler_type = "sampler2D";
+        // Precedence: cubemap > 3D > array > 2D. sampler3D does not nest
+        // with array in core GLSL, so is_array is ignored when dimensions==3.
+        // Cube-arrays (samplerCubeArray) are parser-rejected — no backend
+        // plumbs CubeMap|TextureArray views correctly.
+        if(atx.is_cubemap)
+          sampler_type = cmp ? "samplerCubeShadow" : "samplerCube";
+        else if(atx.dimensions == 3)
+          sampler_type = "sampler3D";
+        else if(atx.is_array)
+          sampler_type = cmp ? "sampler2DArrayShadow" : "sampler2DArray";
+        else
+          sampler_type = cmp ? "sampler2DShadow" : "sampler2D";
+
+        aux_tex_decls += "layout(binding = " + std::to_string(sampler_binding)
+                         + ") uniform " + sampler_type + " " + atx.name + ";\n";
+        sampler_binding++;
+
+        // Paired depth sampler when DEPTH:true on a plain 2D tex.
+        if(atx.is_depth && !atx.is_cubemap && atx.dimensions != 3 && !atx.is_array)
+        {
+          aux_tex_decls += "layout(binding = " + std::to_string(sampler_binding)
+                           + ") uniform sampler2D " + atx.name + "_depth;\n";
+          sampler_binding++;
         }
       }
-      ssbo_decls += "} " + aux.name + ";\n\n";
+    }
+    material_ubos += aux_tex_decls;
 
+    // Multiview UBO: injected when MULTIVIEW >= 2.
+    if(m_desc.multiview_count >= 2)
+    {
+      material_ubos += isf_emit_multiview_ubo(
+          sampler_binding, m_desc.multiview_count);
       sampler_binding++;
     }
-    material_ubos += ssbo_decls;
 
     int model_ubo_binding = sampler_binding;
     material_ubos += fmt::format(
@@ -2385,6 +4314,18 @@ void parser::parse_raw_raster_pipeline()
     m_fragment += material_ubos;
   }
 
+  // The raw-raster path replaces gl_FragCoord → isf_FragCoord for the
+  // same Y-flip behaviour as fullscreen ISF, but unlike ISF the raw-raster
+  // FS prelude didn't define the macro — causing "isf_FragCoord :
+  // undeclared identifier" for any shader using gl_FragCoord.
+  m_fragment += R"_(
+#if defined(QSHADER_SPIRV) || defined(QSHADER_HLSL) || defined(QSHADER_MSL)
+#define isf_FragCoord vec4(gl_FragCoord.x, RENDERSIZE.y - gl_FragCoord.y, gl_FragCoord.z, gl_FragCoord.w)
+#else
+#define isf_FragCoord gl_FragCoord
+#endif
+)_";
+
   // Add the actual vert / frag code
   m_vertex += m_sourceVertex;
   m_fragment += fragWithoutISF;
@@ -2392,6 +4333,9 @@ void parser::parse_raw_raster_pipeline()
   // Replace the special ISF stuff
   boost::replace_all(m_fragment, "gl_FragColor", "isf_FragColor");
   boost::replace_all(m_fragment, "vv_Frag", "isf_Frag");
+
+  // Sanity-check ATTRIBUTES.TYPE references — see helper above.
+  validate_attribute_types(m_desc);
 }
 
 void parser::parse_shadertoy()
@@ -2866,6 +4810,46 @@ void main(void)
 }
 
 // Helper function to escape JSON strings
+// Serialize a sampler_config's non-empty fields as JSON key/value pairs
+// onto `oss`, each prefixed with `", "`. Mirrors parse_sampler_config
+// exactly so the JSON round-trip is lossless. Writes nothing when every
+// field is at its default (empty strings, unset optionals).
+static void emit_sampler_config(std::ostream& oss, const isf::sampler_config& s)
+{
+  auto esc = [](const std::string& x) {
+    std::string out;
+    out.reserve(x.size());
+    for(char c : x)
+    {
+      if(c == '"' || c == '\\') { out += '\\'; out += c; }
+      else out += c;
+    }
+    return out;
+  };
+  auto str_field = [&](const char* key, const std::string& val) {
+    if(!val.empty())
+      oss << ", \"" << key << "\": \"" << esc(val) << "\"";
+  };
+  auto float_field = [&](const char* key, const std::optional<float>& val) {
+    if(val) oss << ", \"" << key << "\": " << *val;
+  };
+
+  str_field("WRAP",         s.wrap);
+  str_field("WRAP_S",       s.wrap_s);
+  str_field("WRAP_T",       s.wrap_t);
+  str_field("WRAP_R",       s.wrap_r);
+  str_field("FILTER",       s.filter);
+  str_field("MIN_FILTER",   s.min_filter);
+  str_field("MAG_FILTER",   s.mag_filter);
+  str_field("MIPMAP_MODE",  s.mipmap_mode);
+  str_field("BORDER_COLOR", s.border_color);
+  str_field("COMPARE",      s.compare);
+  float_field("ANISOTROPY", s.anisotropy);
+  float_field("LOD_BIAS",   s.lod_bias);
+  float_field("MIN_LOD",    s.min_lod);
+  float_field("MAX_LOD",    s.max_lod);
+}
+
 static auto escape_json(const std::string& str) -> std::string
 {
   std::string result;
@@ -2926,6 +4910,24 @@ std::string parser::write_isf() const
       oss << "\n";
     }
     oss << "  ]";
+    if(!m_desc.inputs.empty() || !m_desc.passes.empty()
+       || !m_desc.extensions.empty())
+      oss << ",";
+    oss << "\n";
+  }
+
+  // Add extensions if present
+  if(!m_desc.extensions.empty())
+  {
+    oss << "  \"EXTENSIONS\": [\n";
+    for(size_t i = 0; i < m_desc.extensions.size(); ++i)
+    {
+      oss << "    \"" << escape_json(m_desc.extensions[i]) << "\"";
+      if(i + 1 < m_desc.extensions.size())
+        oss << ",";
+      oss << "\n";
+    }
+    oss << "  ]";
     if(!m_desc.inputs.empty() || !m_desc.passes.empty())
       oss << ",";
     oss << "\n";
@@ -3037,6 +5039,8 @@ std::string parser::write_isf() const
             oss << ",\n      \"DEFAULT\": [" << (*p.def)[0] << ", " << (*p.def)[1]
                 << ", " << (*p.def)[2] << "]";
           }
+          if(p.as_color)
+            oss << ",\n      \"AS_COLOR\": true";
           oss << "\n";
         }
 
@@ -3065,17 +5069,29 @@ std::string parser::write_isf() const
           oss << "      \"TYPE\": \"image\"";
           if(img.depth)
             oss << ",\n      \"DEPTH\": true";
+          if(img.is_array)
+            oss << ",\n      \"IS_ARRAY\": true";
+          if(img.dimensions != 2)
+            oss << ",\n      \"DIMENSIONS\": " << img.dimensions;
+          oss << "\n";
+        }
+        void operator()(const cubemap_input& c)
+        {
+          oss << "      \"TYPE\": \"cubemap\"";
+          if(c.depth)
+            oss << ",\n      \"DEPTH\": true";
           oss << "\n";
         }
-        void operator()(const cubemap_input&) { oss << "      \"TYPE\": \"cubemap\"\n"; }
 
         void operator()(const audio_input& a)
         {
           oss << "      \"TYPE\": \"audio\"";
           if(a.max > 0)
-          {
             oss << ",\n      \"MAX\": " << a.max;
-          }
+          if(!a.sampler.filter.empty())
+            oss << ",\n      \"FILTER\": \"" << escape_json(a.sampler.filter) << "\"";
+          if(!a.sampler.wrap.empty())
+            oss << ",\n      \"WRAP\": \"" << escape_json(a.sampler.wrap) << "\"";
           oss << "\n";
         }
 
@@ -3083,9 +5099,11 @@ std::string parser::write_isf() const
         {
           oss << "      \"TYPE\": \"audioFFT\"";
           if(a.max > 0)
-          {
             oss << ",\n      \"MAX\": " << a.max;
-          }
+          if(!a.sampler.filter.empty())
+            oss << ",\n      \"FILTER\": \"" << escape_json(a.sampler.filter) << "\"";
+          if(!a.sampler.wrap.empty())
+            oss << ",\n      \"WRAP\": \"" << escape_json(a.sampler.wrap) << "\"";
           oss << "\n";
         }
 
@@ -3093,9 +5111,11 @@ std::string parser::write_isf() const
         {
           oss << "      \"TYPE\": \"audioHistogram\"";
           if(a.max > 0)
-          {
             oss << ",\n      \"MAX\": " << a.max;
-          }
+          if(!a.sampler.filter.empty())
+            oss << ",\n      \"FILTER\": \"" << escape_json(a.sampler.filter) << "\"";
+          if(!a.sampler.wrap.empty())
+            oss << ",\n      \"WRAP\": \"" << escape_json(a.sampler.wrap) << "\"";
           oss << "\n";
         }
 
@@ -3104,6 +5124,12 @@ std::string parser::write_isf() const
         {
           oss << "      \"TYPE\": \"storage\",\n";
           oss << "      \"ACCESS\": \"" << s.access << "\"";
+          if(!s.buffer_usage.empty())
+            oss << ",\n      \"BUFFER_USAGE\": \"" << escape_json(s.buffer_usage) << "\"";
+          if(s.persistent)
+            oss << ",\n      \"PERSISTENT\": true";
+          if(!s.visibility.empty() && s.visibility != "fragment")
+            oss << ",\n      \"VISIBILITY\": \"" << escape_json(s.visibility) << "\"";
           if(!s.layout.empty())
           {
             oss << ",\n      \"LAYOUT\": [\n";
@@ -3121,13 +5147,41 @@ std::string parser::write_isf() const
           oss << "\n";
         }
 
+        void operator()(const uniform_input& u)
+        {
+          oss << "      \"TYPE\": \"uniform\",\n";
+          oss << "      \"LAYOUT\": [\n";
+          for(std::size_t k = 0; k < u.layout.size(); ++k)
+          {
+            const auto& f = u.layout[k];
+            oss << "        { \"NAME\": \"" << escape_json(f.name)
+                << "\", \"TYPE\": \"" << escape_json(f.type) << "\" }";
+            if(k + 1 < u.layout.size())
+              oss << ",";
+            oss << "\n";
+          }
+          oss << "      ]";
+          if(!u.visibility.empty() && u.visibility != "vertex+fragment")
+            oss << ",\n      \"VISIBILITY\": \"" << escape_json(u.visibility) << "\"";
+          oss << "\n";
+        }
+
         void operator()(const texture_input&) { oss << "      \"TYPE\": \"texture\"\n"; }
 
         void operator()(const csf_image_input& img)
         {
           oss << "      \"TYPE\": \"image\",\n";
           oss << "      \"ACCESS\": \"" << img.access << "\",\n";
-          oss << "      \"FORMAT\": \"" << img.format << "\"\n";
+          oss << "      \"FORMAT\": \"" << img.format << "\"";
+          if(!img.visibility.empty() && img.visibility != "compute")
+            oss << ",\n      \"VISIBILITY\": \"" << escape_json(img.visibility) << "\"";
+          if(img.persistent)
+            oss << ",\n      \"PERSISTENT\": true";
+          if(img.is_array)
+            oss << ",\n      \"IS_ARRAY\": true";
+          if(!img.layers_expression.empty())
+            oss << ",\n      \"LAYERS\": \"" << escape_json(img.layers_expression) << "\"";
+          oss << "\n";
         }
 
         void operator()(const geometry_input& geo)
@@ -3144,6 +5198,8 @@ std::string parser::write_isf() const
             try { std::stoi(geo.instance_count); oss << ",\n      \"INSTANCE_COUNT\": " << geo.instance_count; }
             catch(...) { oss << ",\n      \"INSTANCE_COUNT\": \"" << escape_json(geo.instance_count) << "\""; }
           }
+          if(!geo.format_id.empty())
+            oss << ",\n      \"FORMAT_ID\": \"" << escape_json(geo.format_id) << "\"";
           if(!geo.attributes.empty())
           {
             oss << ",\n      \"ATTRIBUTES\": [\n";
@@ -3168,14 +5224,20 @@ std::string parser::write_isf() const
             }
             oss << "      ]";
           }
-          if(!geo.auxiliary.empty())
+          if(!geo.auxiliary.empty() || !geo.auxiliary_textures.empty())
           {
             oss << ",\n      \"AUXILIARY\": [\n";
-            for(size_t i = 0; i < geo.auxiliary.size(); ++i)
+            const size_t nb = geo.auxiliary.size();
+            const size_t nt = geo.auxiliary_textures.size();
+            for(size_t i = 0; i < nb; ++i)
             {
               const auto& aux = geo.auxiliary[i];
               oss << "        {\"NAME\": \"" << escape_json(aux.name) << "\"";
-              if(!aux.access.empty())
+              // TYPE: "uniform" for UBO-kind aux. SSBO kind omits TYPE —
+              // default parse dispatch lands there.
+              if(aux.is_uniform)
+                oss << ", \"TYPE\": \"uniform\"";
+              if(!aux.access.empty() && !aux.is_uniform)
                 oss << ", \"ACCESS\": \"" << escape_json(aux.access) << "\"";
               if(!aux.size.empty())
               {
@@ -3196,7 +5258,53 @@ std::string parser::write_isf() const
                 oss << "]";
               }
               oss << "}";
-              if(i < geo.auxiliary.size() - 1)
+              if(i < nb - 1 || nt > 0)
+                oss << ",";
+              oss << "\n";
+            }
+            // Texture auxiliaries — identifying TYPE field so parse round-
+            // trips via aux_entry_is_texture. Full sampler_config fields
+            // are emitted via emit_sampler_config so WRAP/FILTER/COMPARE
+            // etc. round-trip losslessly.
+            for(size_t i = 0; i < nt; ++i)
+            {
+              const auto& atx = geo.auxiliary_textures[i];
+              oss << "        {\"NAME\": \"" << escape_json(atx.name) << "\"";
+              // TYPE field — reuse the specific storage_* variants so
+              // parse dispatch and re-emit stay symmetric.
+              if(atx.is_storage)
+              {
+                if(atx.is_cubemap && atx.is_array)
+                  oss << ", \"TYPE\": \"storage_cube\""; // Note: no array-cube storage variant in current vocabulary
+                else if(atx.is_cubemap)
+                  oss << ", \"TYPE\": \"storage_cube\"";
+                else if(atx.dimensions == 3)
+                  oss << ", \"TYPE\": \"storage_3d\"";
+                else if(atx.is_array)
+                  oss << ", \"TYPE\": \"storage_image_array\"";
+                else
+                  oss << ", \"TYPE\": \"storage_image\"";
+              }
+              else if(atx.is_cubemap)
+                oss << ", \"TYPE\": \"cubemap\"";
+              else
+                oss << ", \"TYPE\": \"image\"";
+              if(atx.is_array && !atx.is_storage)
+                oss << ", \"IS_ARRAY\": true";
+              if(atx.dimensions != 2 && !atx.is_storage)
+                oss << ", \"DIMENSIONS\": " << atx.dimensions;
+              if(atx.is_depth)
+                oss << ", \"DEPTH\": true";
+              if(atx.is_storage)
+              {
+                if(!atx.format.empty() && atx.format != "rgba8")
+                  oss << ", \"FORMAT\": \"" << escape_json(atx.format) << "\"";
+                if(!atx.access.empty() && atx.access != "read_write")
+                  oss << ", \"ACCESS\": \"" << escape_json(atx.access) << "\"";
+              }
+              emit_sampler_config(oss, atx.sampler);
+              oss << "}";
+              if(i < nt - 1)
                 oss << ",";
               oss << "\n";
             }
@@ -3274,14 +5382,32 @@ std::string parser::write_isf() const
         try
         {
           std::stod(pass.height_expression);
-          oss << "      \"HEIGHT\": " << pass.height_expression;
+          oss << "      \"HEIGHT\": " << pass.height_expression << ",\n";
         }
         catch(...)
         {
-          oss << "      \"HEIGHT\": \"" << escape_json(pass.height_expression) << "\"";
+          oss << "      \"HEIGHT\": \"" << escape_json(pass.height_expression) << "\",\n";
         }
       }
 
+      if(!pass.z_expression.empty())
+      {
+        try
+        {
+          std::stod(pass.z_expression);
+          oss << "      \"Z\": " << pass.z_expression << ",\n";
+        }
+        catch(...)
+        {
+          oss << "      \"Z\": \"" << escape_json(pass.z_expression) << "\",\n";
+        }
+      }
+
+      if(!pass.format.empty())
+      {
+        oss << "      \"FORMAT\": \"" << escape_json(pass.format) << "\",\n";
+      }
+
       // Remove trailing comma if last property
       auto str = oss.str();
       if(str.size() > 2 && str[str.size() - 2] == ',')
@@ -3435,6 +5561,18 @@ void parser::parse_vsa()
             sampler_binding++;
           }
         }
+        else if(auto* cube = ossia::get_if<isf::cubemap_input>(&val.data))
+        {
+          if(cube->depth)
+          {
+            samplers += "layout(binding = ";
+            samplers += std::to_string(sampler_binding);
+            samplers += ") uniform samplerCube ";
+            samplers += val.name;
+            samplers += "_depth;\n";
+            sampler_binding++;
+          }
+        }
       }
       else
       {
@@ -3517,6 +5655,9 @@ void parser::parse_csf()
   // Add version
   m_fragment += "#version 450\n\n";
 
+  // User-declared GLSL EXTENSIONS must come right after #version.
+  m_fragment += isf_emit_user_extensions(m_desc.extensions);
+
   // Add standard ProcessUBO uniforms (same as ISF/VSA)
   m_fragment += GLSL45.defaultUniforms;
   m_fragment += "\n";
@@ -3527,34 +5668,37 @@ void parser::parse_csf()
                 ", local_size_y = ISF_LOCAL_SIZE_Y"
                 ", local_size_z = ISF_LOCAL_SIZE_Z) in;\n\n";
 
-  // Generate struct definitions from TYPES section
+  // Generate struct definitions from TYPES section.
+  //
+  // No auto-padding: GLSL+std430 handles alignment based on actual member
+  // types (vec4 16B-aligned, float/uint 4B-aligned, struct rounds to its
+  // largest member). The previous "(4 - field_count % 4) % 4 trailing
+  // floats" heuristic was based on the field count modulo 4, completely
+  // unrelated to real alignment, and silently grew the struct stride
+  // when field_count wasn't a multiple of 4. RawLight (7 fields) became
+  // 68B → 80B std430-stride here while every rasterizer (graphics-path
+  // TYPES emitter has no such heuristic) and ScenePreprocessor's
+  // RawLight arena both use 64B stride — pack_lights_from_points writes
+  // landed at 80B intervals while the consumer rasterizer read at 64B
+  // intervals, garbling every slot past index 0 (the user's symptom:
+  // procedural light positions acting like colours, all lights piled up
+  // at the constant light_color value). Mirror the graphics-path
+  // emitter (isf_emit_types_struct) verbatim instead.
   if(!m_desc.types.empty())
   {
     m_fragment += "// Struct definitions from TYPES section\n";
     for(const auto& type_def : m_desc.types)
     {
-      m_fragment += "struct " + type_def.name + " \n{\n";
-
+      m_fragment += "struct " + type_def.name + " {\n";
       for(const auto& field : type_def.layout)
       {
         auto bracket = field.type.find('[');
         if(bracket != std::string::npos)
-          m_fragment += "  " + field.type.substr(0, bracket) + " " + field.name
+          m_fragment += "    " + field.type.substr(0, bracket) + " " + field.name
                         + field.type.substr(bracket) + ";\n";
         else
-          m_fragment += "  " + field.type + " " + field.name + ";\n";
-      }
-
-      // Add padding calculation for struct alignment
-      // This is a simplified approach - proper padding would require more complex size calculations
-      int field_count = type_def.layout.size();
-      int padding_needed
-          = (4 - (field_count % 4)) % 4; // Simple 16-byte alignment padding
-      for(int i = 0; i < padding_needed; i++)
-      {
-        m_fragment += "  float pad" + std::to_string(i) + ";\n";
+          m_fragment += "    " + field.type + " " + field.name + ";\n";
       }
-
       m_fragment += "};\n\n";
     }
   }
@@ -3678,6 +5822,20 @@ void parser::parse_csf()
           }
         }
       }
+      else if(auto* storage = ossia::get_if<storage_input>(&inp.data))
+      {
+        // A writable storage buffer whose LAYOUT ends in a flexible-array
+        // member gets a synthesized host-side size int (see ISFVisitors /
+        // RenderedCSFNode). Declare it here so this std140 block matches the
+        // packed material blob; otherwise every uniform after it reads shifted.
+        if(storage->access.find("write") != std::string::npos
+           && !storage->layout.empty()
+           && storage->layout.back().type.find("[]") != std::string::npos)
+        {
+          k++;
+          material_block += "    int " + inp.name + "_size;\n";
+        }
+      }
     }
 
     material_block += "};\n\n";
@@ -3736,6 +5894,7 @@ void parser::parse_csf()
 
   // Generate resource bindings
   m_fragment += "// From RESOURCES - bindings assigned automatically\n";
+  bool emitted_indirect_struct = false;
   for(const auto& inp : m_desc.inputs)
   {
     if(auto* storage_ptr = ossia::get_if<storage_input>(&inp.data))
@@ -3772,34 +5931,50 @@ void parser::parse_csf()
     {
       const auto& img = *img_ptr;
 
-      m_fragment += "layout(binding = " + std::to_string(binding);
+      // Emit the primary image binding, then — if persistent — emit a
+      // readonly `<name>_prev` alias at the following slot. The runtime
+      // ping-pongs between two textures and swaps pointers each frame so
+      // the shader sees current-frame writes on `<name>` and the previous
+      // frame's state on `<name>_prev`.
+      auto emit_image = [&](int b, const std::string& decl_name, bool alias_prev) {
+        m_fragment += "layout(binding = " + std::to_string(b);
 
-      // Add format qualifier
-      if(!img.format.empty())
-      {
-        std::string format = img.format;
-        boost::algorithm::to_lower(format);
-        m_fragment += ", " + format;
-      }
-      else
-      {
-        m_fragment += ", rgba8"; // Default format
-      }
+        if(!img.format.empty())
+        {
+          std::string format = img.format;
+          boost::algorithm::to_lower(format);
+          m_fragment += ", " + format;
+        }
+        else
+        {
+          m_fragment += ", rgba8"; // Default format
+        }
 
-      m_fragment += ") ";
+        m_fragment += ") ";
 
-      // Add access qualifiers
-      if(img.access == "read_only")
-        m_fragment += "readonly ";
-      else if(img.access == "write_only")
-        m_fragment += "writeonly ";
-      else
-        m_fragment += "restrict ";
+        if(alias_prev || img.access == "read_only")
+          m_fragment += "readonly ";
+        else if(img.access == "write_only")
+          m_fragment += "writeonly ";
+        else
+          m_fragment += "restrict ";
 
-      auto prefix = glsl_type_prefix(img.format);
-      m_fragment += "uniform " + prefix + (img.is3D() ? "image3D " : "image2D ");
-      m_fragment += inp.name + ";\n";
+        auto prefix = glsl_type_prefix(img.format);
+        const char* shape = "image2D";
+        if(img.isCube())      shape = "imageCube";
+        else if(img.is3D())   shape = "image3D";
+        else if(img.is_array) shape = "image2DArray";
+        m_fragment += "uniform " + prefix + shape + " ";
+        m_fragment += decl_name + ";\n";
+      };
+
+      emit_image(binding, inp.name, /*alias_prev=*/false);
       binding++;
+      if(img.persistent)
+      {
+        emit_image(binding, inp.name + "_prev", /*alias_prev=*/true);
+        binding++;
+      }
     }
     else if(auto* tex_ptr = ossia::get_if<texture_input>(&inp.data))
     {
@@ -3809,6 +5984,11 @@ void parser::parse_csf()
       m_fragment += inp.name + ";\n";
       binding++;
     }
+    else if(auto* uni_ptr = ossia::get_if<uniform_input>(&inp.data))
+    {
+      m_fragment += isf_emit_ubo_decl(binding, inp.name, *uni_ptr);
+      binding++;
+    }
     else if(auto* geo_ptr = ossia::get_if<geometry_input>(&inp.data))
     {
       const auto& geo = *geo_ptr;
@@ -3816,6 +5996,26 @@ void parser::parse_csf()
       m_fragment += "// Geometry input \"" + inp.name + "\" — SoA: one SSBO per attribute\n";
       m_fragment += "#define ISF_READ(geo, attr) geo ## _ ## attr ## _in\n";
       m_fragment += "#define ISF_WRITE(geo, attr) geo ## _ ## attr ## _out\n";
+      // Nested-aux structured-SSBO/UBO instance access. Resolves to the
+      // instance name emitted by the SSBO/UBO block below — bare aux name
+      // when there's no cross-geometry collision, prefixed otherwise.
+      // Use this instead of writing `scene_cluster_aabbs.data[...]` by
+      // hand: the macro keeps shaders working if the same aux name later
+      // appears in another geometry input and forces a name collision
+      // (the SSBO emitter switches to the prefixed instance name then).
+      m_fragment += "#define ISF_AUX(geo, name) geo ## _ ## name\n";
+      // Nested-aux image access (storage images: read_only / write_only /
+      // read_write). For images there's no _in / _out distinction at the
+      // GLSL level — the same identifier carries the full access mode
+      // determined by the layout qualifier. Same one-name-per-image
+      // contract applies via the alias #define emitted in the texture
+      // block below.
+      m_fragment += "#define ISF_IMG(geo, name) geo ## _ ## name\n";
+      // Nested-aux sampler access (read-only sampled textures with
+      // texture()/textureLod()/etc.). Symmetric to ISF_IMG — separate
+      // macro because the GLSL type differs (samplerXY vs imageXY) and
+      // future shaders may want to grep for usage independently.
+      m_fragment += "#define ISF_TEX(geo, name) geo ## _ ## name\n";
 
       for(const auto& attr : geo.attributes)
       {
@@ -3873,16 +6073,24 @@ void parser::parse_csf()
         const bool collides = colliding_aux_names.count(aux.name) > 0;
         const std::string instance_name = collides ? aux_prefix : aux.name;
 
-        m_fragment += "layout(binding = " + std::to_string(binding) + ", std430) ";
-
-        if(aux.access == "read_only")
-          m_fragment += "readonly ";
-        else if(aux.access == "write_only")
-          m_fragment += "writeonly ";
+        if(aux.is_uniform)
+        {
+          // std140 UBO: no access qualifier, `uniform` not `buffer`.
+          m_fragment += "layout(std140, binding = " + std::to_string(binding) + ") uniform ";
+        }
         else
-          m_fragment += "restrict ";
+        {
+          m_fragment += "layout(binding = " + std::to_string(binding) + ", std430) ";
+          if(aux.access == "read_only")
+            m_fragment += "readonly ";
+          else if(aux.access == "write_only")
+            m_fragment += "writeonly ";
+          else
+            m_fragment += "restrict ";
+          m_fragment += "buffer ";
+        }
 
-        m_fragment += "buffer " + aux_prefix + "_buf {\n";
+        m_fragment += aux_prefix + "_buf {\n";
         for(const auto& field : aux.layout)
         {
           // Handle array types: "vec4[512]" → "vec4 entries[512];"
@@ -3899,14 +6107,17 @@ void parser::parse_csf()
         }
         m_fragment += "} " + instance_name + ";\n";
 
-        // Generate ISF_READ/ISF_WRITE-compatible aliases
-        if(aux.access == "read_only")
+        // Generate ISF_READ/ISF_WRITE-compatible aliases. UBOs are always
+        // read-only from GLSL's perspective (the `access` field is ignored
+        // for UBO kind), so only the `_in` / unqualified aliases exist.
+        const std::string eff_access = aux.is_uniform ? "read_only" : aux.access;
+        if(eff_access == "read_only")
         {
           m_fragment += "#define " + aux_prefix + "_in " + instance_name + "\n";
           if(!collides)
             m_fragment += "#define " + aux_prefix + " " + instance_name + "\n";
         }
-        else if(aux.access == "write_only")
+        else if(eff_access == "write_only")
         {
           m_fragment += "#define " + aux_prefix + "_out " + instance_name + "\n";
           if(!collides)
@@ -3916,12 +6127,110 @@ void parser::parse_csf()
         {
           m_fragment += "#define " + aux_prefix + "_in " + instance_name + "\n";
           m_fragment += "#define " + aux_prefix + "_out " + instance_name + "\n";
+          if(!collides)
+            m_fragment += "#define " + aux_prefix + " " + instance_name + "\n";
         }
         m_fragment += "\n";
 
         binding++;
       }
 
+      // Auxiliary textures (travel with the geometry; resolved by the
+      // renderer from ossia::geometry::auxiliary_textures by name).
+      // RenderedCSFNode binds them right after aux SSBOs in the compute
+      // SRB build loop — order here must match that order.
+      //
+      // Each texture is emitted under its bare aux name (e.g.
+      // `voxel_grid`) — same convention as the structured-SSBO/UBO block
+      // above when there's no name collision. A `#define
+      // <geo>_<aux> <aux>` alias is also emitted so author shaders can
+      // use either the prefixed form directly OR the ISF_IMG /
+      // ISF_TEX macros (which expand to `geo ## _ ## aux`). Keeps
+      // image-aux access symmetric with SSBO/UBO-aux access.
+      for(const auto& atx : geo.auxiliary_textures)
+      {
+        const std::string aux_prefix = inp.name + "_" + atx.name;
+        const bool aliased = (aux_prefix != atx.name);
+
+        if(atx.is_storage)
+        {
+          // Cube-arrays are parser-rejected so no imageCubeArray branch.
+          const char* image_type = "image2D";
+          if(atx.is_cubemap)                 image_type = "imageCube";
+          else if(atx.dimensions == 3)       image_type = "image3D";
+          else if(atx.is_array)              image_type = "image2DArray";
+
+          const char* access_q =
+              (atx.access == "read_only") ? "readonly " :
+              (atx.access == "write_only") ? "writeonly " : "";
+
+          // Integer formats (r32ui, r32i, …) require uimage*/iimage*.
+          std::string scalar_prefix = isf_glsl_type_prefix(atx.format);
+
+          m_fragment += "layout(binding = " + std::to_string(binding)
+                        + ", " + atx.format + ") uniform " + access_q
+                        + scalar_prefix + image_type + " "
+                        + atx.name + ";\n";
+          if(aliased)
+            m_fragment += "#define " + aux_prefix + " " + atx.name + "\n";
+          binding++;
+        }
+        else
+        {
+          const bool cmp = isf_is_comparison_sampler(atx.sampler);
+          const char* sampler_type = "sampler2D";
+          // Cube-arrays (samplerCubeArray) are parser-rejected — no QRhi
+          // backend plumbs CubeMap|TextureArray views correctly.
+          if(atx.is_cubemap)
+            sampler_type = cmp ? "samplerCubeShadow" : "samplerCube";
+          else if(atx.dimensions == 3)
+            sampler_type = "sampler3D";
+          else if(atx.is_array)
+            sampler_type = cmp ? "sampler2DArrayShadow" : "sampler2DArray";
+          else
+            sampler_type = cmp ? "sampler2DShadow" : "sampler2D";
+
+          m_fragment += "layout(binding = " + std::to_string(binding)
+                        + ") uniform " + sampler_type + " " + atx.name + ";\n";
+          if(aliased)
+            m_fragment += "#define " + aux_prefix + " " + atx.name + "\n";
+          binding++;
+
+          if(atx.is_depth && !atx.is_cubemap && atx.dimensions != 3 && !atx.is_array)
+          {
+            m_fragment += "layout(binding = " + std::to_string(binding)
+                          + ") uniform sampler2D " + atx.name + "_depth;\n";
+            if(aliased)
+              m_fragment += "#define " + aux_prefix + "_depth "
+                            + atx.name + "_depth\n";
+            binding++;
+          }
+        }
+      }
+
+      // Indirect draw command buffer (user-writable SSBO)
+      if(geo.indirect)
+      {
+        if(!emitted_indirect_struct)
+        {
+          m_fragment += "struct DrawIndirectCommand {\n"
+                        "    uint vertexCount;\n"
+                        "    uint instanceCount;\n"
+                        "    uint firstVertex;\n"
+                        "    int  baseVertex;\n"
+                        "    uint firstInstance;\n"
+                        "};\n\n";
+          emitted_indirect_struct = true;
+        }
+        const std::string buf_name = inp.name + "_indirect";
+        m_fragment += "layout(binding = " + std::to_string(binding) + ", std430) "
+                      "restrict buffer " + buf_name + "_buf {\n"
+                      "    DrawIndirectCommand " + buf_name + "[];\n"
+                      "};\n";
+        m_fragment += "#define ISF_INDIRECT(" + inp.name + ") " + buf_name + "\n\n";
+        binding++;
+      }
+
       // Element count uniform (packed into the material UBO or standalone)
       m_fragment += "// Element count for geometry input \"" + inp.name + "\"\n";
       m_fragment += "// (set by the renderer from ossia::geometry::vertices)\n";
@@ -3944,6 +6253,11 @@ void parser::parse_csf()
   // Add the user's compute shader code (without the JSON header)
   boost::algorithm::trim(compWithoutCSF);
   m_fragment += compWithoutCSF;
+
+  // Sanity-check: every ATTRIBUTES.TYPE references a real GLSL built-in
+  // or a TYPES entry. Throws invalid_file with the offending name on
+  // miss — surfaces typos at parse time.
+  validate_attribute_types(m_desc);
 }
 
 descriptor::Mode parser::mode() const
diff --git a/src/plugins/score-plugin-gfx/3rdparty/libisf/src/isf.hpp b/src/plugins/score-plugin-gfx/3rdparty/libisf/src/isf.hpp
index dd0ff5f4ec..6aae6b8ff9 100644
--- a/src/plugins/score-plugin-gfx/3rdparty/libisf/src/isf.hpp
+++ b/src/plugins/score-plugin-gfx/3rdparty/libisf/src/isf.hpp
@@ -34,13 +34,25 @@ struct long_input
   using has_minmax = std::true_type;
   std::vector<ossia::variant<int64_t, double, std::string>> values;
   std::vector<std::string> labels;
-  std::size_t def{}; // index of default value (enum mode) or default value (numeric mode)
+
+  // Enum mode (values/labels non-empty): `def` is the INDEX into `values`.
+  // Numeric mode (values empty, min/max set): `def` is the default VALUE.
+  //
+  // The shader always receives the selected numeric VALUE from `values[i]`
+  // (for int/double entries) or the INDEX (for string-only VALUES, since
+  // GLSL can't consume strings). The renderer's UBO-init path resolves this
+  // index→value step so the initial shader state matches what arrives after
+  // any user interaction — see ISFNode.cpp / GeometryFilterNode.cpp long_input
+  // port visitors.
+  std::size_t def{};
 
   // Numeric mode: when values/labels are empty and min/max are set,
-  // create an IntSpinBox instead of a ComboBox.
+  // create an IntSpinBox instead of a ComboBox. In that mode `def` is the
+  // default value directly (not an index).
   std::optional<int64_t> min;
   std::optional<int64_t> max;
 };
+
 struct float_input
 {
   using value_type = double;
@@ -66,6 +78,12 @@ struct point3d_input
   std::optional<value_type> def{};
   std::optional<value_type> min{};
   std::optional<value_type> max{};
+
+  // AS_COLOR: hint to the UI that this vec3 should be shown as a color
+  // swatch (RGB picker) rather than three spin boxes. Useful for e.g.
+  // direction-as-RGB visualisations where editing components individually
+  // is awkward. Does not affect the GLSL type (still vec3).
+  bool as_color{false};
 };
 
 struct color_input
@@ -77,29 +95,133 @@ struct color_input
   std::optional<value_type> max{};
 };
 
+// Sampler configuration fields shared by image/texture/cubemap inputs.
+// All fields are optional: empty/unset string keeps the current default.
+// Address modes accept: "repeat", "clamp_to_edge"/"clamp", "mirror"/"mirrored_repeat",
+//                       "mirror_once"/"mirror_clamp_to_edge".
+// Filter modes accept:  "nearest", "linear" (and "none" for mipmap_mode).
+// Border color accepts: "transparent_black"/"transparent", "opaque_black", "opaque_white".
+// Compare op accepts:   "never", "less", "less_equal"/"lequal", "equal",
+//                       "greater", "greater_equal"/"gequal", "not_equal"/"neq", "always".
+//                       When set (and not "never") a comparison sampler is created and
+//                       the GLSL type becomes sampler*Shadow. Supported on 2D,
+//                       2D-array, cubemap (image/texture/cubemap inputs) and
+//                       cubemap-array (AUXILIARY only). Silently dropped with a
+//                       stderr warning on 3D inputs (sampler3DShadow is not a core
+//                       GLSL type) — use a 2D / 2D-array / cube shadow instead.
+//                       With the engine's reverse-Z convention, the typical
+//                       compare op for a standard "shadowed if closer" test is
+//                       "greater_equal" (not "less_equal").
+struct sampler_config
+{
+  std::string wrap;       // Applied to all 3 axes if individual WRAP_S/T/R unset
+  std::string wrap_s;
+  std::string wrap_t;
+  std::string wrap_r;
+  std::string filter;     // Applied to both min and mag if individual MIN/MAG_FILTER unset
+  std::string min_filter;
+  std::string mag_filter;
+  std::string mipmap_mode;
+  std::optional<float> anisotropy;
+  std::string border_color;
+  std::optional<float> lod_bias;
+  std::optional<float> min_lod;
+  std::optional<float> max_lod;
+  std::string compare; // empty / "never" = no comparison sampler
+};
+
 struct image_input
 {
-  int dimensions{2}; // 2 or 3
-  bool depth{false}; // true = shader wants sampleable depth on this input
+  int dimensions{2};    // 2 or 3
+  bool depth{false};    // true = shader wants sampleable depth on this input
+  bool is_array{false}; // true = sampler2DArray rather than sampler2D
+  // STATIC: producer publishes a long-lived QRhiTexture that downstream binds
+  // directly; engine skips the consumer-side render-target allocation. Use for
+  // precomputed LUTs, IBL bakes, asset caches — anything where the upstream
+  // is a CPU producer (avnd gpu_texture_output, etc.) rather than an ISF /
+  // raster pass that draws into the consumer's RT each frame. Orthogonal to
+  // dimensions / is_array (cube + 3D + array inputs already grab from source
+  // implicitly because they can't be 2D color attachments anyway).
+  bool is_static{false};
+  sampler_config sampler;
 };
 
 struct cubemap_input
 {
+  // DEPTH: true = request a sampleable depth cube alongside the color cube.
+  // Mirrors image_input::depth: pairs the main `samplerCube` (or
+  // `samplerCubeShadow` under COMPARE) with a `samplerCube <name>_depth`
+  // companion for raw depth reads. Useful for omni-directional scene probes
+  // where the upstream provides both a colour cube and its depth cube.
+  // For plain shadow-cube sampling (HW PCF only) set COMPARE instead and
+  // leave DEPTH false — the texture already has to be depth-format for the
+  // compare sampler to return meaningful values.
+  //
+  // Note: cube-arrays (samplerCubeArray) are intentionally NOT exposed. No
+  // QRhi backend (Vulkan/D3D12/Metal/GL) constructs a cube-array view
+  // correctly from the CubeMap | TextureArray flag combination, so the
+  // shader-side type would always disagree with the bound resource. Bind N
+  // individual cubemap inputs instead, or decompose to a sampler2DArray
+  // with face math in the shader.
+  bool depth{false};
+  sampler_config sampler;
+};
+
+// Sampler state accepted by all audio input flavours. Reuses the same
+// string vocabulary as sampler_config (see above) — any unrecognised or
+// empty string keeps the built-in default (linear / clamp_to_edge). Full
+// sampler_config is overkill here: audio textures are 1-mip 2D samplers
+// with no COMPARE / BORDER_COLOR / LOD semantics, so only FILTER and WRAP
+// are honoured. Nearest filtering is the common ask for band-exact FFT
+// reads where linear interpolation would smear adjacent bins.
+struct audio_sampler_config
+{
+  std::string filter; // "nearest" or "linear" (default)
+  std::string wrap;   // "repeat", "clamp_to_edge"/"clamp", "mirror"/"mirrored_repeat"
 };
 
 struct audio_input
 {
   int max{};
+  audio_sampler_config sampler;
 };
 
 struct audioFFT_input
 {
   int max{};
+  audio_sampler_config sampler;
 };
 
 struct audioHist_input
 {
   int max{};
+  audio_sampler_config sampler;
+};
+
+// UBO-style input declared in INPUTS as `"TYPE": "uniform"`.
+//
+// Emitted as `layout(std140, binding=N) uniform <name>_t { ... } <name>;`
+// and bound via QRhiShaderResourceBinding::uniformBuffer (not bufferLoad).
+//
+// Use for small (≤ MaxUniformBufferRange, typically 16KB), read-only data
+// like cameras, light/material counts, indexing constants. For larger or
+// writable data, use `storage_input` (SSBO) instead.
+struct uniform_input
+{
+  // Reuse storage_input's layout_field shape via full struct definition here
+  // to keep the type self-contained.
+  struct layout_field
+  {
+    std::string name;
+    std::string type;
+  };
+
+  std::vector<layout_field> layout;
+
+  // VISIBILITY: which shader stage(s) see this binding in a graphics pipeline.
+  // Accepted values: "vertex+fragment"/"both" (default), "fragment", "vertex",
+  // "compute" (implicit for CSF).
+  std::string visibility{"vertex+fragment"};
 };
 
 // CSF-specific input types
@@ -116,11 +238,22 @@ struct storage_input
   std::vector<layout_field> layout;
 
   std::string buffer_usage; // "", "indirect_draw", "indirect_draw_indexed"
+
+  // PERSISTENT: creates a ping-pong pair of SSBOs swapped each frame.
+  // In GLSL, `name` is the current (read-write) buffer, `name_prev` is the
+  // previous frame's read-only buffer.
+  bool persistent{false};
+
+  // VISIBILITY: which shader stage(s) see this binding in a graphics pipeline.
+  // Accepted values: "fragment" (default), "vertex", "vertex+fragment"/"both",
+  // "compute" (implicit for CSF), "none" (no shader binding).
+  std::string visibility{"fragment"};
 };
 
 struct texture_input
 {
   int dimensions{2}; // 2 or 3
+  sampler_config sampler;
 };
 
 struct csf_image_input
@@ -134,7 +267,45 @@ struct csf_image_input
 
   int dimensions{2}; // 2 or 3 (alternative to depth_expression for declaring 3D)
 
+  // Set internally when the RESOURCES entry uses TYPE: "image_cube".
+  // Writable cubemap (imageCube in GLSL, QRhiTexture::CubeMap |
+  // UsedWithLoadStore). Width must equal height (face edge length). Use for
+  // in-compute reflection-probe baking, environment IBL, etc. Read-only
+  // sampling of the same data is done via TYPE: "cubemap".
+  bool cubemap{false};
+
+  // IS_ARRAY: writable 2D texture array (image2DArray in GLSL, allocated
+  // via QRhi::newTextureArray + UsedWithLoadStore). Layer count comes from
+  // layers_expression (LAYERS: "$USER" / literal). Useful for shadow
+  // cascades, layered G-buffers, compute-written texture atlases.
+  //
+  // Cube-arrays (imageCubeArray) are intentionally NOT supported: no QRhi
+  // backend plumbs CubeMap | TextureArray views correctly, and the shader-
+  // side type would disagree with the bound resource. The parser rejects
+  // is_array + cubemap combinations with a stderr warning.
+  bool is_array{false};
+  std::string layers_expression; // LAYERS: expression for arraySize, may contain $USER
+
+  // VISIBILITY: which shader stage(s) see this binding.
+  // Accepted: "compute" (default), "fragment", "vertex", "vertex+fragment"/"both".
+  std::string visibility{"compute"};
+
+  // PERSISTENT: creates a ping-pong pair of images swapped each frame.
+  // In GLSL, `<name>` is the current (write or read_write) image and
+  // `<name>_prev` is the previous frame's read-only image — mirrors the
+  // storage_input convention. Works for both 2D and 3D images.
+  bool persistent{false};
+
+  // GENERATE_MIPS: when true, the runtime runs QRhi's generateMips() on
+  // this image after every frame's compute dispatches complete, so
+  // downstream samplers with MIPMAP_MODE: linear / nearest see a valid
+  // mip chain instead of zero-filled upper levels. Ignored for 3D images,
+  // cubemaps, and 2D arrays where generateMips semantics differ across
+  // QRhi backends (per-face / per-layer / per-slice).
+  bool generate_mips{false};
+
   bool is3D() const noexcept { return dimensions == 3 || !depth_expression.empty(); }
+  bool isCube() const noexcept { return cubemap; }
 };
 
 // CSF geometry port input: SoA layout, one SSBO per attribute.
@@ -164,27 +335,101 @@ struct geometry_input
     std::optional<copy_from> forward;
   };
 
-  // Structured SSBOs that travel with the geometry (matched by name
-  // against ossia::geometry::auxiliary_buffer entries).
+  // Structured buffers that travel with the geometry (matched by name
+  // against ossia::geometry::auxiliary_buffer entries). Default kind is
+  // SSBO (`layout(std430) buffer`); set `is_uniform = true` to declare a
+  // std140 UBO instead (`layout(std140) uniform`).
   struct auxiliary_request
   {
     std::string name;
     std::string access; // "read_only", "write_only", "read_write"
+                        //  (meaningful for SSBO kind only; UBO is always read-only from GLSL)
     std::vector<storage_input::layout_field> layout;
     std::string size; // expression for flexible array count, may contain $USER
+                      //  (SSBO only; UBOs require fixed-size layouts per std140)
 
     // If set, this auxiliary is forwarded from another geometry's upstream.
     std::optional<copy_from> forward;
+
+    // Raw-raster only: when true the node owns a ping-pong pair of buffers
+    // (allocated from the LAYOUT + SIZE) that are swapped each frame, and
+    // the auxiliary is NOT resolved from upstream geometry. In GLSL,
+    // `<name>` is the current (writable) buffer, `<name>_prev` is the
+    // previous frame's read-only buffer. Useful for temporal accumulation
+    // / history buffers that live only in the rendering node.
+    // (SSBO only; persistent ping-pong makes no sense for read-only UBOs.)
+    bool persistent{false};
+
+    // When true, declare/bind this auxiliary as a std140 uniform block
+    // (`layout(std140, binding=N) uniform name_t { … } name;`) and bind
+    // with QRhiShaderResourceBinding::uniformBuffer. When false (default),
+    // it's an std430 SSBO. The upstream geometry's
+    // ossia::geometry::auxiliary_buffer is kind-agnostic — the shader's
+    // declaration alone determines how the buffer is bound.
+    bool is_uniform{false};
+  };
+
+  // Texture variant of auxiliary: resolved from ossia::geometry::auxiliary_textures
+  // by name, no score input port. Declared in the top-level AUXILIARY array
+  // with TYPE: "image" / "texture" / "cubemap". Unlike regular INPUTS
+  // textures, does not create an input port — the texture handle travels
+  // bundled with the geometry (e.g. ScenePreprocessor ships `base_color_array`
+  // / `skybox` / `shadow_atlas`).
+  struct auxiliary_texture_request
+  {
+    std::string name;
+    int dimensions{2};     // 2 or 3
+    bool is_array{false};  // sampler2DArray when true
+    bool is_cubemap{false};// samplerCube when true
+    bool is_depth{false};  // sampleable depth (promotes comparison when cfg set)
+    // Storage-image kind: emit `image2D/3D/Cube/Array` with imageLoad/
+    // imageStore semantics instead of `sampler2D/…` with texture(). Set
+    // by TYPE: "storage_image" in the AUXILIARY JSON. Paired with:
+    //   - `format`: GLSL layout qualifier (e.g. "rgba8", "r32f", "rgba16f").
+    //   - `access`: "read_only" / "write_only" / "read_write", controlling
+    //     imageLoad / imageStore / imageLoadStore binding type + the
+    //     GLSL `readonly`/`writeonly` decoration.
+    bool is_storage{false};
+    std::string format{"rgba8"}; // only meaningful when is_storage
+    std::string access{"read_write"}; // only meaningful when is_storage
+
+    // Sizing expressions for write_only / read_write storage images. Same
+    // convention as csf_image_input (top-level INPUTS images): an integer
+    // literal or a `$variable` reference resolved against the shader's
+    // long/float input ports + the standard $WIDTH/$HEIGHT/$DEPTH/$LAYERS
+    // family. Empty → engine falls back to renderer state (renderSize for
+    // 2D, voxel-resolution heuristics for 3D). When the engine
+    // auto-allocates a writable nested-aux storage image, these strings
+    // drive its dimensions; for sampled (read-only) entries they're
+    // ignored — the texture comes from the upstream producer at whatever
+    // size that producer baked.
+    std::string width_expression;
+    std::string height_expression;
+    std::string depth_expression;   // 3rd dimension for 3D textures
+    std::string layers_expression;  // array slice count for 2D arrays
+
+    sampler_config sampler;
   };
 
   std::vector<attribute_request> attributes;
   std::vector<auxiliary_request> auxiliary;
+  std::vector<auxiliary_texture_request> auxiliary_textures;
 
   std::string vertex_count;   // expression string, may contain $USER
   std::string instance_count; // expression string, may contain $USER
 
-  bool indirect_draw{false};            // compute shader writes draw args to an indirect buffer
-  std::string indirect_draw_type;       // "draw" (default) or "draw_indexed"
+  // Optional format identity stamped onto the consumer geometry's
+  // filter_tag (rapidhash truncated to 32 bits). Only meaningful on
+  // RESOURCES of TYPE: geometry used as outputs (geoOut). Empty leaves
+  // filter_tag at 0 (the "untagged" sentinel) — no routing change for
+  // CSFs that don't author an output format.
+  std::string format_id;
+
+  struct indirect_request
+  {
+    std::string count; // expression string (same resolver as vertex_count)
+  };
+  std::optional<indirect_request> indirect;
 };
 
 struct input
@@ -193,7 +438,7 @@ struct input
       float_input, long_input, event_input, bool_input, color_input, point2d_input,
       point3d_input, image_input, cubemap_input, audio_input, audioFFT_input,
       audioHist_input, storage_input, texture_input, csf_image_input,
-      geometry_input>;
+      geometry_input, uniform_input>;
 
   std::string name;
   std::string label;
@@ -290,10 +535,53 @@ struct vertex_attribute
   int location{};
   attribute_type type{};
   std::string name;
+
+  // Optional explicit ossia attribute_semantic name ("position", "velocity",
+  // "texcoord0", ..., "custom"). Only meaningful on `vertex_input` (raw
+  // raster), where it controls how the runtime matches the declared input
+  // to an upstream geometry attribute — same lookup algorithm as CSF
+  // attribute_request. When empty, the parser implicitly uses `name` as the
+  // semantic key. Set to "custom" to force exact-name matching against
+  // custom attributes.
+  std::string semantic;
+
+  // Interpolation qualifier (only applicable to vertex_output / fragment_input).
+  // Allowed: "smooth" (default), "flat", "noperspective", "centroid", "sample".
+  // "sample" forces per-sample fragment shading on this varying — the fragment
+  // shader runs once per MSAA sample for that coverage. Required when MSAA
+  // outputs need per-sample correct interpolation (specular highlights,
+  // normal-mapped surfaces). Empty string = default smooth.
+  std::string interpolation;
 };
 
 struct vertex_input : vertex_attribute
 {
+  // When false, the raw-raster renderer tolerates an upstream geometry that
+  // does not carry a matching attribute: instead of failing the pipeline
+  // build, it synthesises a tiny PerInstance step_rate=1 buffer filled with
+  // a neutral "identity" value (zero for translation, white for color, 1
+  // for roughness, etc.) and binds that in place of the missing upstream
+  // attribute. Lets a single shader cover both instanced and non-instanced
+  // upstreams without per-shape variants.
+  //
+  // When false AND `default_val` is set, those explicit numbers are used
+  // verbatim (after component-truncation / zero-padding against the
+  // declared TYPE). When false AND `default_val` is empty, the runtime
+  // looks the semantic up in a built-in whitelist (see
+  // score::gfx::vertexFallbackDefault) — non-whitelisted semantics without
+  // an explicit DEFAULT are rejected at pipeline-build time with a clear
+  // error to avoid silently-wrong rendering.
+  //
+  // When true (default), the upstream geometry MUST provide the attribute
+  // or the pipeline build fails — existing strict behaviour.
+  bool required{true};
+
+  // Explicit DEFAULT numbers from the JSON header. Stored as doubles for
+  // JSON fidelity; converted to the runtime format (float / int) at
+  // buffer-build time. Empty = use the whitelist neutral (see `required`).
+  // Length is not pre-validated against TYPE here — the runtime truncates
+  // or zero-pads to match the declared GLSL type width.
+  std::vector<double> default_val;
 };
 struct vertex_output : vertex_attribute
 {
@@ -305,6 +593,92 @@ struct fragment_output : vertex_attribute
 {
 };
 
+// --- Pipeline state control (PIPELINE_STATE descriptor key) ---------------
+//
+// All fields are optional (std::optional): missing = keep current/legacy
+// default. Two instances live in `descriptor`: a global `default_state`
+// (from PIPELINE_STATE), and a per-pass `override_state` that merges on top.
+
+struct blend_attachment
+{
+  bool enable{false};
+  std::string src_color{"src_alpha"};
+  std::string dst_color{"one_minus_src_alpha"};
+  std::string op_color{"add"};
+  std::string src_alpha{"one"};
+  std::string dst_alpha{"one_minus_src_alpha"};
+  std::string op_alpha{"add"};
+  std::string color_write{"rgba"}; // "rgba", "rgb", "r", ...
+};
+
+struct stencil_op_state
+{
+  std::string fail_op{"keep"};
+  std::string depth_fail_op{"keep"};
+  std::string pass_op{"keep"};
+  std::string compare_op{"always"};
+};
+
+struct pipeline_state
+{
+  std::optional<bool> depth_test;
+  std::optional<bool> depth_write;
+  std::optional<std::string> depth_compare; // "less", "less_equal", "greater", ...
+  std::optional<float> depth_bias;
+  std::optional<float> slope_scaled_depth_bias;
+
+  std::optional<std::string> cull_mode;   // "none", "front", "back"
+  std::optional<std::string> front_face;  // "ccw", "cw"
+  std::optional<std::string> polygon_mode;// "fill", "line"
+  std::optional<float> line_width;
+
+  // Procedural-draw override (Vertex Shader Art style). When
+  // `vertex_count` is set, the renderer issues a single
+  // cb.draw(vertex_count, instance_count, 0, 0) and ignores the
+  // incoming geometry's index / indirect buffers entirely. The vertex
+  // shader drives positions purely from gl_VertexIndex +
+  // gl_InstanceIndex. Use cases:
+  //   - Fullscreen passes: VERTEX_COUNT=3, TOPOLOGY=triangles (skybox).
+  //   - VSA-style plasma / curves: VERTEX_COUNT=10000,
+  //     TOPOLOGY=line_strip.
+  //   - Procedural particle grids: VERTEX_COUNT=65536, TOPOLOGY=points.
+  //
+  // Safety: if VERTEX_INPUTS is non-empty (the shader declares vertex
+  // attribute reads), the renderer clamps vertex_count to the incoming
+  // geometry's vertex_count to avoid reading past buffer ends. Shaders
+  // that rely purely on gl_VertexIndex should declare an empty
+  // `VERTEX_INPUTS: []` so the pipeline is built with no vertex
+  // bindings and the draw count is used verbatim.
+  std::optional<uint32_t> vertex_count;
+  std::optional<uint32_t> instance_count;
+  // Topology override. When unset, the incoming geometry's topology is
+  // used. Values: "triangles", "triangle_strip", "triangle_fan",
+  // "lines", "line_strip", "points".
+  std::optional<std::string> topology;
+
+  // Blending: either a single state applied to all color attachments, or a
+  // per-attachment vector. If both are present the per-attachment wins.
+  std::optional<blend_attachment> blend_all;
+  std::vector<blend_attachment> blend_per_attachment;
+
+  // Stencil (optional)
+  std::optional<bool> stencil_test;
+  std::optional<uint32_t> stencil_read_mask;
+  std::optional<uint32_t> stencil_write_mask;
+  std::optional<stencil_op_state> stencil_front;
+  std::optional<stencil_op_state> stencil_back;
+
+  // Variable-rate shading (VRS).
+  //   "SHADING_RATE": [w, h]   — per-draw shading rate where w,h ∈ {1, 2, 4}.
+  //                              [1,1] = 1×1 (full rate, default).
+  //                              [2,2] = 1 invocation per 2×2 pixel block.
+  //                              [4,4] = 1 per 4×4 block.
+  // Combined with a shading-rate map (set on the render target) the actual
+  // rate is the per-draw rate combined with the per-tile rate via the chosen
+  // combiner op. Requires QRhi::Feature::VariableRateShading (Vulkan, D3D12).
+  std::optional<std::array<int, 2>> shading_rate;
+};
+
 struct pass
 {
   std::string target;
@@ -313,12 +687,85 @@ struct pass
   bool nearest_filter{};
   std::string width_expression{};
   std::string height_expression{};
+
+  // Render to a specific layer of a texture-array output (-1 = layer 0).
+  int layer{-1};
+
+  // Render to a specific Z-slice of a 3D output. Expression string so the
+  // slice can be computed from inputs (e.g. "$USER_slice"). Empty = slice 0
+  // when the target is 3D, or irrelevant when 2D.
+  std::string z_expression{};
+
+  // Optional format override for the intermediate render target of this
+  // pass (e.g. "rgba16f" for precision-sensitive blur stages). Empty = use
+  // FLOAT: true mapping (rgba32f / rgba8) as before.
+  std::string format{};
+
+  // Per-pass pipeline state overrides (merged with descriptor.default_state).
+  pipeline_state override_state;
 };
 
 struct output_declaration
 {
   std::string name;     // User-chosen name (e.g. "color", "sceneDepth")
   std::string type;     // "color" (default) or "depth"
+
+  // LAYERS: >1 allocates a texture array with this many layers.
+  int layers{1};
+
+  // DEPTH: >1 allocates a 3D texture of this depth. Mutually exclusive with
+  // LAYERS (a ThreeDimensional texture is not a TextureArray). A fragment
+  // PASSES entry with Z renders into a single Z-slice via a color attachment
+  // with setLayer(z).
+  int depth{1};
+
+  // FORMAT: optional explicit texture format ("rgba8", "rgba16f", "r32f", "d32f", ...).
+  // Empty = use the default (RGBA8 for color, D32F for depth).
+  std::string format;
+
+  // SAMPLES: MSAA sample count (1, 2, 4, 8, 16, 32, 64). 1 = no MSAA (default).
+  // The renderer allocates an MSAA texture and inserts an automatic resolve
+  // pass when downstream consumers expect a non-MSAA input. Each declared
+  // OUTPUT can have its own sample count; the depth attachment for a colour
+  // OUTPUT inherits the same sample count.
+  int samples{1};
+
+  // CUBEMAP: when true the output is allocated with the QRhi cubemap flag
+  // so downstream consumers can bind it as a samplerCube. Implies
+  // `layers == 6` on allocation even when the shader didn't set LAYERS
+  // explicitly. Used by the IBL precompute path (irradiance_convolve,
+  // prefilter_ggx) together with MULTIVIEW:6.
+  bool is_cubemap{false};
+
+  // GENERATE_MIPS: when true the runtime calls generateMips() on this
+  // output's texture after the render pass completes, auto-averaging
+  // the base level into a full mip chain. Implies the QRhi
+  // `MipMapped` + `UsedWithGenerateMips` flags on allocation. Use this
+  // for "source-data" targets whose base level is authored by the
+  // fragment shader and whose sub-mips should be GPU-filtered (skybox
+  // converter, base color textures, SSAO LUTs…). NOT for the
+  // prefilter-style case where each mip has distinct shader-authored
+  // content — use EXECUTION_MODEL: PER_MIP instead.
+  bool generate_mips{false};
+
+  // WIDTH / HEIGHT: explicit target size for offscreen outputs. Set
+  // by the shader author when the intrinsic size of the algorithm
+  // isn't tied to the window / swap-chain (IBL precompute, shadow
+  // atlases, post-process LUTs, …). Zero → fall back to the
+  // renderer's render-size (classic behaviour). Integer literal or
+  // string expression; the expression is evaluated once at init
+  // against the same variable surface as CSF dispatch expressions
+  // ($WIDTH_<input> / $HEIGHT_<input> / scalar input values).
+  //
+  // All colour OUTPUTs of a single RAW_RASTER_PIPELINE shader share
+  // a render pass and must therefore resolve to the same final size;
+  // the runtime uses the first colour OUTPUT's resolved size as the
+  // RT size and allocates every attachment at that size. Cubemaps
+  // are additionally clamped to square via min(w, h) (QRhi contract).
+  int width{0};
+  int height{0};
+  std::string width_expression;
+  std::string height_expression;
 };
 
 struct descriptor
@@ -374,6 +821,91 @@ struct descriptor
   // Auxiliary SSBOs expected from upstream geometry (matched by name).
   // Populated from top-level AUXILIARY key in RAW_RASTER_PIPELINE mode.
   std::vector<geometry_input::auxiliary_request> auxiliary;
+
+  // Auxiliary textures travelling with the geometry (matched by name
+  // against ossia::geometry::auxiliary_textures). Populated from the same
+  // top-level AUXILIARY array when entries have TYPE: "image" / "texture"
+  // / "cubemap". Unlike INPUTS-declared textures they don't consume a
+  // score input port — the renderer looks them up on the geometry every
+  // frame.
+  std::vector<geometry_input::auxiliary_texture_request> auxiliary_textures;
+
+  // PIPELINE_STATE: global pipeline state (depth, blend, cull, stencil, ...).
+  // Applies to every output pass; may be overridden per-pass via pass::override_state.
+  pipeline_state default_state;
+
+  // MULTIVIEW: render to N layers of a texture array in a single draw.
+  // 0 or 1 = disabled. N>=2 = enabled (requires QRhi::MultiView capability).
+  int multiview_count{0};
+
+  // EXECUTION_MODEL (RAW_RASTER_PIPELINE only — silently ignored in other
+  // modes). Drives the invocation count of the single raster pass:
+  //
+  //   "SINGLE"        (default) — one invocation per frame, RT bound at
+  //                   mip 0.
+  //   "PER_MIP"       — N invocations, RT bound at mip `i` on iteration
+  //                     `i`. N is derived from the `target` texture's
+  //                     mip chain (floor(log2(min(w, h))) + 1).
+  //                     ProcessUBO.passIndex carries the mip index.
+  //   "PER_CUBE_FACE" — 6 invocations, RT bound at cube layer `i`
+  //                     (face order +X, -X, +Y, -Y, +Z, -Z). Target
+  //                     OUTPUT must be CUBEMAP: true. Mutually
+  //                     exclusive with MULTIVIEW (which already
+  //                     amplifies one draw to 6 faces).
+  //   "PER_LAYER"     — N invocations, RT bound at array layer `i`. N
+  //                     comes from the target OUTPUT's `layers`
+  //                     declaration. Works on either colour TextureArray
+  //                     targets (setLayer attachment) or depth
+  //                     TextureArray targets (rendered to a scratch
+  //                     and copied into the array layer post-pass —
+  //                     QRhi 6.11 has no per-layer depth attachment
+  //                     API). ProcessUBO.passIndex carries the layer
+  //                     index. Drives shadow_cascades.frag.
+  //   "MANUAL"        — N invocations, same RT each time, where N is
+  //                     evaluated from the `count` expression string
+  //                     via the math_expression parser every frame
+  //                     (same variable bindings as CSF's stride /
+  //                     image-size expressions: $WIDTH, $HEIGHT,
+  //                     $<inputName>, ...).
+  struct raster_execution_model
+  {
+    std::string type;            // "SINGLE" / "PER_MIP" / "PER_CUBE_FACE" / "PER_LAYER" / "MANUAL"
+    std::string target;          // PER_MIP / PER_CUBE_FACE / PER_LAYER: OUTPUT name to iterate
+    std::string count_expression; // MANUAL: integer-valued expression
+  };
+  raster_execution_model execution_model;
+
+  // User-declared GLSL extension names, emitted as `#extension NAME : require`
+  // immediately after `#version` in every generated stage. Examples:
+  // "GL_KHR_shader_subgroup_arithmetic", "GL_EXT_shader_atomic_float".
+  std::vector<std::string> extensions{
+      "GL_GOOGLE_include_directive", "GL_GOOGLE_cpp_style_line_directive"};
+
+  // CLIP_DISTANCES: number of gl_ClipDistance[N] outputs the vertex shader
+  // writes (1..8 typical). When > 0 the parser injects
+  // `out float gl_ClipDistance[N];` in the vertex stage so user code can
+  // assign without writing the declaration. Each declared distance enables
+  // one user-defined clipping plane: fragments where gl_ClipDistance[i] < 0
+  // are discarded.
+  int clip_distances{0};
+
+  // CULL_DISTANCES: like clip distances but per-primitive: a primitive whose
+  // every vertex has all gl_CullDistance[i] < 0 is fully culled before
+  // rasterisation. Useful for cheap frustum-/occlusion-style culling.
+  int cull_distances{0};
+
+  // DEPTH_LAYOUT: conservative-depth qualifier on gl_FragDepth. Allowed:
+  //   "any"        — driver default (no guarantee, disables early-Z when
+  //                  gl_FragDepth is written).
+  //   "greater"    — promise the value written is >= the value rasterisation
+  //                  would have produced. Lets the HW keep early-Z reject
+  //                  for fragments already deeper than the depth buffer.
+  //   "less"       — symmetric promise in the other direction.
+  //   "unchanged"  — promise the written value equals the rasterised value
+  //                  (mostly for documentation; same fast path as "greater"
+  //                   on hardware where reverse-Z applies).
+  // Empty = no qualifier emitted.
+  std::string depth_layout;
 };
 
 class SCORE_PLUGIN_GFX_EXPORT parser
diff --git a/src/plugins/score-plugin-gfx/CMakeLists.txt b/src/plugins/score-plugin-gfx/CMakeLists.txt
index 175758b842..c43a169e60 100644
--- a/src/plugins/score-plugin-gfx/CMakeLists.txt
+++ b/src/plugins/score-plugin-gfx/CMakeLists.txt
@@ -136,6 +136,8 @@ set(HDRS
     Gfx/Filter/Library.hpp
     Gfx/Filter/PreviewWidget.hpp
 
+    Gfx/Widgets/RhiPreviewWidget.hpp
+
     Gfx/GeometryFilter/Executor.hpp
     Gfx/GeometryFilter/Metadata.hpp
     Gfx/GeometryFilter/Process.hpp
@@ -173,11 +175,12 @@ set(HDRS
     
     Gfx/Graph/BackgroundNode.hpp
     Gfx/Graph/CommonUBOs.hpp
+    Gfx/Graph/PhongNode.hpp
     Gfx/Graph/CustomMesh.hpp
-    Gfx/Graph/DepthNode.hpp
     Gfx/Graph/GeometryFilterNode.hpp
     Gfx/Graph/GeometryFilterNodeRenderer.hpp
     Gfx/Graph/RhiComputeBarrier.hpp
+    Gfx/Graph/RhiClearBuffer.hpp
     Gfx/Graph/GPUBufferScatter.hpp
     Gfx/Graph/RenderedCSFNode.hpp
     Gfx/Graph/Graph.hpp
@@ -188,8 +191,18 @@ set(HDRS
     Gfx/Graph/Node.hpp
     Gfx/Graph/NodeRenderer.hpp
     Gfx/Graph/OutputNode.hpp
-    Gfx/Graph/PhongNode.hpp
     Gfx/Graph/PreviewNode.hpp
+    Gfx/Graph/SceneGPUState.hpp
+    Gfx/Graph/GpuResourceRegistry.hpp
+    Gfx/Graph/VertexFallbackDefaults.hpp
+    Gfx/Graph/VertexFallbackPlan.hpp
+    Gfx/Graph/VertexFallbackPool.hpp
+    Gfx/Graph/GpuTiming.hpp
+    Gfx/Graph/ScenePreprocessorNode.hpp
+    Gfx/Graph/CameraMath.hpp
+    Gfx/Graph/SceneFilterNode.hpp
+    Gfx/Graph/FlattenedSceneFilterNode.hpp
+    Gfx/Graph/MergeGeometriesNode.hpp
     Gfx/Graph/RenderList.hpp
     Gfx/Graph/RenderState.hpp
     Gfx/Graph/RenderedISFNode.hpp
@@ -203,6 +216,7 @@ set(HDRS
     Gfx/Graph/SimpleRenderedISFNode.hpp
     Gfx/Graph/TexgenNode.hpp
     Gfx/Graph/TextNode.hpp
+    Gfx/Graph/TextureLoader.hpp
     Gfx/Graph/Uniforms.hpp
     Gfx/Graph/Utils.hpp
     Gfx/Graph/VideoNode.hpp
@@ -268,10 +282,14 @@ set(HDRS
     Gfx/Settings/View.hpp
     Gfx/Settings/Factory.hpp
     
+    Gfx/AssetTable.hpp
+    Gfx/FormatRegistry.hpp
+    Gfx/Hashes.hpp
     Gfx/Window/BackgroundDevice.hpp
     Gfx/Window/CollapsibleSection.hpp
     Gfx/Window/DesktopLayout.hpp
     Gfx/Window/MultiWindowDevice.hpp
+    Gfx/Window/OffscreenDevice.hpp
     Gfx/Window/OutputMapping.hpp
     Gfx/Window/OutputPreview.hpp
     Gfx/Window/TestCard.hpp
@@ -321,6 +339,8 @@ set(SRCS
     Gfx/Filter/Process.cpp
     Gfx/Filter/PreviewWidget.cpp
 
+    Gfx/Widgets/RhiPreviewWidget.cpp
+
     Gfx/GeometryFilter/Executor.cpp
     Gfx/GeometryFilter/Process.cpp
     Gfx/GeometryFilter/Library.cpp
@@ -353,9 +373,11 @@ set(SRCS
     Gfx/Graph/decoders/HAP.cpp
     Gfx/Graph/BackgroundNode.cpp
     Gfx/Graph/CustomMesh.cpp
+    Gfx/Graph/PhongNode.cpp
     Gfx/Graph/GeometryFilterNode.cpp
     Gfx/Graph/GeometryFilterNodeRenderer.cpp
     Gfx/Graph/RhiComputeBarrier.cpp
+    Gfx/Graph/RhiClearBuffer.cpp
     Gfx/Graph/GPUBufferScatter.cpp
     Gfx/Graph/RenderedCSFNode.cpp
     Gfx/Graph/Graph.cpp
@@ -366,16 +388,30 @@ set(SRCS
     Gfx/Graph/Node.cpp
     Gfx/Graph/NodeRenderer.cpp
     Gfx/Graph/OutputNode.cpp
-    Gfx/Graph/PhongNode.cpp
     Gfx/Graph/PreviewNode.cpp
+    Gfx/Graph/SceneGPUState.cpp
+    Gfx/Graph/GpuResourceRegistry.cpp
+    Gfx/Graph/VertexFallbackDefaults.cpp
+    Gfx/Graph/VertexFallbackPool.cpp
+    Gfx/Graph/GpuTiming.cpp
+    Gfx/Graph/ScenePreprocessorNode.cpp
+    Gfx/Graph/CameraMath.cpp
+    Gfx/Graph/SceneFilterNode.cpp
+    Gfx/Graph/FlattenedSceneFilterNode.cpp
+    Gfx/Graph/MergeGeometriesNode.cpp
     Gfx/Graph/RenderList.cpp
     Gfx/Graph/RenderedISFNode.cpp
     Gfx/Graph/RenderedRawRasterPipelineNode.cpp
     Gfx/Graph/RenderedVSANode.cpp
+    Gfx/Graph/PipelineStateHelpers.hpp
+    Gfx/Graph/PipelineStateHelpers.cpp
+    Gfx/Graph/IsfBindingsBuilder.hpp
+    Gfx/Graph/IsfBindingsBuilder.cpp
     Gfx/Graph/ScreenNode.cpp
     Gfx/Graph/ShaderCache.cpp
     Gfx/Graph/SimpleRenderedISFNode.cpp
     Gfx/Graph/TextNode.cpp
+    Gfx/Graph/TextureLoader.cpp
     Gfx/Graph/Utils.cpp
     Gfx/Graph/VideoNode.cpp
     Gfx/Graph/VideoNodeRenderer.cpp
@@ -383,6 +419,8 @@ set(SRCS
     Gfx/Graph/DirectVideoNodeRenderer.cpp
     Gfx/Graph/Window.cpp
 
+    Gfx/AssetTable.cpp
+    Gfx/FormatRegistry.cpp
     Gfx/GfxApplicationPlugin.cpp
     Gfx/GfxExecNode.cpp
     Gfx/GfxExecutionAction.cpp
@@ -429,13 +467,17 @@ set_source_files_properties(
     "${3RDPARTY_FOLDER}/glsl-parser/glsl.parser.c"
     "${3RDPARTY_FOLDER}/glsl-parser/glsl.lexer.c"
     "${3RDPARTY_FOLDER}/dxv/dxv.c"
+    "${3RDPARTY_FOLDER}/OffsetAllocator/offsetAllocator.cpp"
     PROPERTIES
       SKIP_PRECOMPILE_HEADERS ON
       SKIP_UNITY_BUILD_INCLUSION ON
 )
 
 # Creation of the library
-add_library(${PROJECT_NAME} ${SRCS} ${HDRS})
+add_library(${PROJECT_NAME} ${SRCS} ${HDRS}
+  "${3RDPARTY_FOLDER}/OffsetAllocator/offsetAllocator.cpp"
+  "${3RDPARTY_FOLDER}/OffsetAllocator/offsetAllocator.hpp"
+)
 
 # Code generation
 score_generate_command_list_file(${PROJECT_NAME} "${HDRS}")
@@ -443,6 +485,7 @@ score_generate_command_list_file(${PROJECT_NAME} "${HDRS}")
 target_include_directories(${PROJECT_NAME}
   PUBLIC
     3rdparty/libisf/src
+    "${3RDPARTY_FOLDER}/OffsetAllocator"
   PRIVATE
     "${3RDPARTY_FOLDER}/dxv"
 )
@@ -582,11 +625,13 @@ elseif(APPLE)
   target_sources(${PROJECT_NAME} PRIVATE
     Gfx/CameraDevice.avf.mm
     Gfx/Graph/RhiBufferCopyMetal.mm
+    Gfx/Graph/RhiClearBufferMetal.mm
   )
 
   set_source_files_properties(
     Gfx/CameraDevice.avf.mm
     Gfx/Graph/RhiBufferCopyMetal.mm
+    Gfx/Graph/RhiClearBufferMetal.mm
     PROPERTIES
       SKIP_UNITY_BUILD_INCLUSION 1
   )
diff --git a/src/plugins/score-plugin-gfx/Gfx/AssetTable.cpp b/src/plugins/score-plugin-gfx/Gfx/AssetTable.cpp
new file mode 100644
index 0000000000..12d3a65b78
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/AssetTable.cpp
@@ -0,0 +1,187 @@
+#include <Gfx/AssetTable.hpp>
+
+#include <cstddef>
+#include <cstdint>
+
+namespace Gfx
+{
+
+namespace
+{
+std::size_t estimateSize(const AssetTable::DecodedAsset& a) noexcept
+{
+  std::size_t total = 0;
+  if(!a.image.isNull())
+    total += static_cast<std::size_t>(a.image.sizeInBytes());
+  if(a.bytes)
+    total += a.bytes->size();
+  return total;
+}
+}
+
+void AssetTable::stage(uint64_t content_hash, QImage image)
+{
+  std::lock_guard lock{m_mutex};
+  auto it = m_entries.find(content_hash);
+  if(it != m_entries.end())
+    return; // Hash contract: same hash = same bytes. Idempotent stage.
+
+  auto e = std::make_shared<DecodedAsset>();
+  e->image = std::move(image);
+  e->byte_size = estimateSize(*e);
+  m_total_bytes += e->byte_size;
+
+  Slot s;
+  s.asset = std::move(e);
+  m_entries.emplace(content_hash, std::move(s));
+}
+
+void AssetTable::stage(
+    uint64_t content_hash,
+    std::shared_ptr<const std::vector<uint8_t>> bytes,
+    std::string mime_type)
+{
+  std::lock_guard lock{m_mutex};
+  auto it = m_entries.find(content_hash);
+  if(it != m_entries.end())
+    return;
+
+  auto e = std::make_shared<DecodedAsset>();
+  e->bytes = std::move(bytes);
+  e->mime_type = std::move(mime_type);
+  e->byte_size = estimateSize(*e);
+  m_total_bytes += e->byte_size;
+
+  Slot s;
+  s.asset = std::move(e);
+  m_entries.emplace(content_hash, std::move(s));
+}
+
+std::shared_ptr<const AssetTable::DecodedAsset>
+AssetTable::acquire(uint64_t content_hash)
+{
+  std::lock_guard lock{m_mutex};
+  auto it = m_entries.find(content_hash);
+  if(it == m_entries.end())
+    return {};
+  auto& slot = it->second;
+
+  // Resurrect from LRU if cold.
+  if(slot.in_lru)
+  {
+    m_lru.erase(slot.lru_it);
+    slot.in_lru = false;
+    m_cold_bytes -= slot.asset->byte_size;
+  }
+
+  ++slot.asset->refcount;
+  return slot.asset;
+}
+
+std::shared_ptr<const AssetTable::DecodedAsset>
+AssetTable::peek(uint64_t content_hash) const
+{
+  std::lock_guard lock{m_mutex};
+  auto it = m_entries.find(content_hash);
+  if(it == m_entries.end())
+    return {};
+  // Intentionally does NOT move out of LRU nor bump refcount — the
+  // caller just wants a read-through. If the entry is cold it stays
+  // cold (still evictable next trim). shared_ptr semantics keep the
+  // DecodedAsset alive as long as the caller holds the returned ptr,
+  // even if eviction happens concurrently on another thread.
+  return it->second.asset;
+}
+
+void AssetTable::release(uint64_t content_hash)
+{
+  std::lock_guard lock{m_mutex};
+  auto it = m_entries.find(content_hash);
+  if(it == m_entries.end())
+    return;
+  auto& slot = it->second;
+  if(slot.asset->refcount > 0)
+    --slot.asset->refcount;
+  if(slot.asset->refcount == 0 && !slot.in_lru)
+  {
+    // Newest-first: push_front, tail is oldest. trim() pops from tail.
+    m_lru.push_front(content_hash);
+    slot.lru_it = m_lru.begin();
+    slot.in_lru = true;
+    m_cold_bytes += slot.asset->byte_size;
+  }
+}
+
+void AssetTable::evictOne() noexcept
+{
+  // Caller holds m_mutex.
+  if(m_lru.empty())
+    return;
+  const uint64_t hash = m_lru.back();
+  m_lru.pop_back();
+
+  auto it = m_entries.find(hash);
+  if(it == m_entries.end())
+    return;
+
+  const std::size_t sz = it->second.asset->byte_size;
+  m_total_bytes -= sz;
+  m_cold_bytes -= sz;
+  m_entries.erase(it);
+}
+
+std::size_t AssetTable::trim(std::size_t max_bytes_budget)
+{
+  std::lock_guard lock{m_mutex};
+  std::size_t evicted = 0;
+  // Only evict from cold pool — hot entries stay regardless of budget.
+  while(m_cold_bytes > max_bytes_budget && !m_lru.empty())
+  {
+    const std::size_t before_total = m_total_bytes;
+    evictOne();
+    evicted += (before_total - m_total_bytes);
+  }
+  return evicted;
+}
+
+void AssetTable::maybeAutoTrim(
+    float utilization, float high_watermark, float target)
+{
+  if(utilization < high_watermark)
+    return;
+
+  std::lock_guard lock{m_mutex};
+  if(m_cold_bytes == 0)
+    return;
+
+  // Convert target utilization to a cold-pool budget. Heuristic:
+  // scale the current cold pool by (target / utilization). At
+  // util=0.85, target=0.60 → trim to ~70% of current cold total.
+  // Not a proper memory-pressure solver — a low-cost knob that
+  // kicks in on sustained overload.
+  const float scale = target / utilization;
+  const auto budget
+      = static_cast<std::size_t>(static_cast<float>(m_cold_bytes) * scale);
+  while(m_cold_bytes > budget && !m_lru.empty())
+    evictOne();
+}
+
+std::size_t AssetTable::size() const noexcept
+{
+  std::lock_guard lock{m_mutex};
+  return m_entries.size();
+}
+
+std::size_t AssetTable::totalBytes() const noexcept
+{
+  std::lock_guard lock{m_mutex};
+  return m_total_bytes;
+}
+
+std::size_t AssetTable::coldCount() const noexcept
+{
+  std::lock_guard lock{m_mutex};
+  return m_lru.size();
+}
+
+} // namespace Gfx
diff --git a/src/plugins/score-plugin-gfx/Gfx/AssetTable.hpp b/src/plugins/score-plugin-gfx/Gfx/AssetTable.hpp
new file mode 100644
index 0000000000..5b91d7fb98
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/AssetTable.hpp
@@ -0,0 +1,169 @@
+#pragma once
+
+#include <score_plugin_gfx_export.h>
+
+#include <ossia/detail/hash_map.hpp>
+
+#include <QImage>
+
+#include <cstddef>
+#include <cstdint>
+#include <list>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <vector>
+
+namespace Gfx
+{
+
+/**
+ * @brief Cross-RenderList content-hash dedup for decoded asset bytes.
+ *
+ * Lives on GfxContext, shared across all RenderLists in the session.
+ * Keyed by `content_hash` (64-bit stable hash of the source bytes —
+ * the canonical primitive is `ossia::hash_bytes` from
+ * `ossia/detail/hash.hpp`, which dispatches to rapidhash; parsers and
+ * the preprocessor produce content_hash values through that helper).
+ *
+ * Purpose: one decode per asset across the whole session. When two
+ * glTF files reference the same `baseColor.jpg`, we decode it once
+ * and reuse. Per-RenderList GpuResourceRegistries upload from the
+ * cached QImage independently (Plan 09 §4.2: one decode, N uploads).
+ *
+ * Not the GPU-resource owner — GpuResourceRegistry does that per
+ * QRhi. AssetTable only holds CPU-side bytes + format metadata
+ * during the window between decode and eviction.
+ *
+ * # Lifecycle (Plan 09 S1)
+ *
+ * Three states per entry:
+ *
+ *   - **hot** (refcount > 0): actively held by at least one consumer.
+ *     Never evicted.
+ *   - **cool** (refcount == 0, still referenced in the LRU list):
+ *     eviction candidate. `acquire()` resurrects it at zero cost.
+ *   - **evicted**: dropped from the map. Next `acquire()` misses;
+ *     the caller re-decodes and restage()s.
+ *
+ * Transitions:
+ *   - `stage()` inserts into hot map (or no-op if already present).
+ *   - `acquire()` bumps refcount and (if resurrecting) splices out
+ *     of the LRU list.
+ *   - `release()` decrements; at 0 the entry moves to the LRU head.
+ *   - `trim(max_bytes)` pops from the LRU tail until under budget.
+ *   - `maybeAutoTrim()` called periodically: reads a supplied
+ *     utilization ratio and trims when above a threshold.
+ *
+ * Byte accounting is approximate — `sizeInBytes(DecodedAsset)` hits
+ * QImage::sizeInBytes and the raw bytes vector size. Good enough
+ * for budget bookkeeping without a full allocator hook.
+ *
+ * # Thread safety
+ *
+ * All public methods take `m_mutex`. Fine for the access pattern
+ * (parser worker threads stage, render threads acquire/release,
+ * GUI tick trims) — the mutex is held for microseconds at a time.
+ */
+class SCORE_PLUGIN_GFX_EXPORT AssetTable
+{
+public:
+  /// Decoded image or raw byte payload. `image` is preferred for 2D
+  /// textures (carries QImage's format metadata); `bytes` for generic
+  /// buffer assets (vertex/index streams etc.).
+  struct DecodedAsset
+  {
+    QImage image;
+    std::shared_ptr<const std::vector<uint8_t>> bytes;
+    std::string mime_type;
+    int64_t refcount{0};
+    // Approximate storage cost. Computed at stage() time; the
+    // allocator may report a different value but this is the number
+    // the LRU trim budgets against.
+    std::size_t byte_size{0};
+  };
+
+  // For byte-range hashing use `ossia::hash_bytes` from
+  // `ossia/detail/hash.hpp` — it's the canonical rapidhash-tiered
+  // dispatcher that produces stable `content_hash` values across
+  // the codebase. Parsers call it directly when stamping
+  // `texture_source::content_hash` / `buffer_resource::content_hash`.
+
+  AssetTable() = default;
+  AssetTable(const AssetTable&) = delete;
+  AssetTable& operator=(const AssetTable&) = delete;
+  ~AssetTable() = default;
+
+  /// Publish a decoded asset under its content hash. Idempotent —
+  /// a second stage() with the same hash is a no-op (hash contract:
+  /// same hash = same bytes).
+  void stage(uint64_t content_hash, QImage image);
+  void stage(
+      uint64_t content_hash, std::shared_ptr<const std::vector<uint8_t>> bytes,
+      std::string mime_type = {});
+
+  /// Return a shared pointer to the decoded asset, bumping its
+  /// refcount. Null when not staged. O(1) average.
+  std::shared_ptr<const DecodedAsset> acquire(uint64_t content_hash);
+
+  /// Read-through without refcount bump. The returned shared_ptr
+  /// keeps the DecodedAsset alive on the caller's side even if the
+  /// AssetTable evicts the entry — but does NOT prevent eviction.
+  /// Suitable for the "upload once to GPU, then done" path where
+  /// the consumer doesn't care if the CPU-side bytes live on.
+  std::shared_ptr<const DecodedAsset> peek(uint64_t content_hash) const;
+
+  /// Decrement refcount. At 0 the entry moves to the LRU head and
+  /// is eligible for eviction on the next trim.
+  void release(uint64_t content_hash);
+
+  /// Force eviction until the cold-pool byte total is below
+  /// @p max_bytes. Called explicitly by UI ("unload unused") or
+  /// implicitly by maybeAutoTrim.
+  /// @return bytes evicted.
+  std::size_t trim(std::size_t max_bytes_budget);
+
+  /// Called on a cadence (e.g. from the Gfx thread idle tick) to
+  /// pressure-trim when the supplied utilization ratio exceeds
+  /// @p high_watermark. Cost: O(n) in the LRU list when a trim
+  /// fires; constant otherwise.
+  ///
+  /// @p utilization in [0, 1]. Compute externally from
+  /// QRhiStats::usedBytes / (usedBytes + unusedBytes), or from a
+  /// hard OS-level memory query.
+  /// @p high_watermark default 0.80. @p target default 0.60.
+  void maybeAutoTrim(
+      float utilization, float high_watermark = 0.80f,
+      float target = 0.60f);
+
+  /// Debug / inspector.
+  std::size_t size() const noexcept;
+  /// Approx total bytes held in cold pool + hot pool.
+  std::size_t totalBytes() const noexcept;
+  /// Number of cold entries eligible for eviction.
+  std::size_t coldCount() const noexcept;
+
+private:
+  struct Slot;  // forward
+
+  // Linked list of cold entries, newest at head. std::list for
+  // stable iterators under concurrent erase.
+  using LruList = std::list<uint64_t>;
+
+  struct Slot
+  {
+    std::shared_ptr<DecodedAsset> asset;
+    LruList::iterator lru_it;   // valid only when refcount == 0
+    bool in_lru{false};
+  };
+
+  void evictOne() noexcept;   // Pops the LRU tail. Caller holds m_mutex.
+
+  mutable std::mutex m_mutex;
+  ossia::hash_map<uint64_t, Slot> m_entries;
+  LruList m_lru;              // cold entries, newest at front
+  std::size_t m_total_bytes{0};
+  std::size_t m_cold_bytes{0};
+};
+
+} // namespace Gfx
diff --git a/src/plugins/score-plugin-gfx/Gfx/CSF/Library.cpp b/src/plugins/score-plugin-gfx/Gfx/CSF/Library.cpp
index de27d2090b..3ee61ae569 100644
--- a/src/plugins/score-plugin-gfx/Gfx/CSF/Library.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/CSF/Library.cpp
@@ -13,7 +13,7 @@ namespace Gfx::CSF
 
 QSet<QString> LibraryHandler::acceptedFiles() const noexcept
 {
-  return {"cs", "comp"};
+  return {"cs", "comp", "csf"};
 }
 
 void LibraryHandler::setup(
@@ -62,7 +62,7 @@ QWidget* LibraryHandler::previewWidget(
 
 QSet<QString> DropHandler::fileExtensions() const noexcept
 {
-  return {"cs", "comp"};
+  return {"cs", "comp", "csf"};
 }
 
 void DropHandler::dropPath(
diff --git a/src/plugins/score-plugin-gfx/Gfx/CSF/Process.cpp b/src/plugins/score-plugin-gfx/Gfx/CSF/Process.cpp
index 95d1b063d3..20de4e309b 100644
--- a/src/plugins/score-plugin-gfx/Gfx/CSF/Process.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/CSF/Process.cpp
@@ -6,10 +6,13 @@
 
 #include <Gfx/Graph/Node.hpp>
 #include <Gfx/Graph/ShaderCache.hpp>
+#include <Gfx/ShaderProgram.hpp>
 #include <Gfx/TexturePort.hpp>
 
 #include <score/application/GUIApplicationContext.hpp>
+#include <score/document/DocumentInterface.hpp>
 #include <score/tools/DeleteAll.hpp>
+#include <score/tools/FilePath.hpp>
 
 #include <QFileInfo>
 
@@ -78,7 +81,10 @@ Model::Model(
 
   QFile f{init};
   if(f.open(QIODevice::ReadOnly))
+  {
+    m_scriptPath = init;
     (void)setCompute(f.readAll());
+  }
 }
 
 Model::~Model() { }
@@ -87,8 +93,18 @@ bool Model::validate(const QString& txt) const noexcept
 {
   try
   {
+    // Expand #include directives against the model's origin dir + the
+    // global search paths before handing the source to the ISF parser.
+    auto [resolved, err]
+        = Gfx::preprocessShaderIncludes(txt.toUtf8(), m_scriptPath);
+    if(!err.isEmpty())
+    {
+      this->errorMessage(0, err);
+      return false;
+    }
+
     // Parse the CSF shader to extract metadata
-    std::string str = txt.toStdString();
+    std::string str(resolved.constData(), resolved.size());
     isf::parser p{str, isf::parser::ShaderType::CSF};
 
     // Check if it's a valid CSF shader
@@ -144,15 +160,25 @@ Process::ScriptChangeResult Model::setScript(const QString& f)
 {
   m_compute = f;
 
-  QString processed = m_compute;
-
   auto inls = score::clearAndDeleteLater(m_inlets);
   auto outls = score::clearAndDeleteLater(m_outlets);
 
   try
   {
+    // Expand #include directives against the model's origin dir before
+    // feeding the source to the ISF parser.
+    auto [resolved, err]
+        = Gfx::preprocessShaderIncludes(m_compute.toUtf8(), m_scriptPath);
+    if(!err.isEmpty())
+    {
+      this->errorMessage(0, err);
+      return {.valid = false, .inlets = std::move(inls), .outlets = std::move(outls)};
+    }
+
     // Parse CSF shader
-    isf::parser p{processed.toStdString(), isf::parser::ShaderType::CSF};
+    isf::parser p{
+        std::string(resolved.constData(), resolved.size()),
+        isf::parser::ShaderType::CSF};
     m_processedProgram.descriptor = p.data();
     m_processedProgram.fragment = QString::fromStdString(p.compute_shader());
     m_processedProgram.type = isf::parser::ShaderType::CSF;
@@ -310,8 +336,19 @@ void Model::setupCSF(const isf::descriptor& desc)
         alternatives.emplace_back("2", 2);
       }
 
+      // ComboBox::init is a VALUE that should match one of the alternatives'
+      // values — NOT an index. libisf stores `v.def` as the INDEX into
+      // values (see isf.hpp comment on long_input::def). Passing the raw
+      // index made the ComboBox fail to match any alternative and silently
+      // default to alternatives[0], which is why DEFAULT: 32 in
+      // VALUES: [16, 32, 64] showed up as 16 in the UI. Look up the
+      // alternative at v.def and pass its second (the value).
+      const std::size_t def_idx
+          = std::min<std::size_t>(v.def, alternatives.size() - 1);
+      const ossia::value& init_value = alternatives[def_idx].second;
+
       auto port = new Process::ComboBox(
-          std::move(alternatives), (int)v.def, QString::fromStdString(input.name),
+          std::move(alternatives), init_value, QString::fromStdString(input.name),
           Id<Process::Port>(input_i++), &self);
 
       self.m_inlets.push_back(port);
@@ -448,18 +485,34 @@ void Model::setupCSF(const isf::descriptor& desc)
             QString::fromStdString(input.name), Id<Process::Port>(output_i++), &self);
         self.m_outlets.push_back(port);
 
-        auto size_inl = new Process::IntSpinBox{
-            1,
-            536870911,
-            1024,
-            QString::fromStdString(input.name) + " size",
-            Id<Process::Port>(input_i++),
-            &self};
-        self.m_inlets.push_back(size_inl);
-        self.controlAdded(size_inl->id());
+        // Only writable buffers whose layout ends in a flexible-array member
+        // get a synthesized "size" inlet — this MUST match the renderer
+        // (isf_input_port_count_vis / isf_input_port_vis) and the generated
+        // GLSL, or every later control routes to the wrong port.
+        if(!v.layout.empty()
+           && v.layout.back().type.find("[]") != std::string::npos)
+        {
+          auto size_inl = new Process::IntSpinBox{
+              1,
+              536870911,
+              1024,
+              QString::fromStdString(input.name) + " size",
+              Id<Process::Port>(input_i++),
+              &self};
+          self.m_inlets.push_back(size_inl);
+          self.controlAdded(size_inl->id());
+        }
       }
     }
 
+    void operator()(const uniform_input& v)
+    {
+      // UBO inputs sourced from upstream Buffer ports (read-only).
+      auto port = new Gfx::TextureInlet(
+          QString::fromStdString(input.name), Id<Process::Port>(input_i++), &self);
+      self.m_inlets.push_back(port);
+    }
+
     void operator()(const texture_input& v)
     {
       auto port = new Gfx::TextureInlet(
@@ -606,7 +659,17 @@ Process::Descriptor ProcessFactory::descriptor(QString) const noexcept
 template <>
 void DataStreamReader::read(const Gfx::CSF::Model& proc)
 {
-  m_stream << proc.m_compute;
+  // documentContext() SCORE_ASSERTs when the model isn't in a document
+  // (e.g. saving a template / copy). Only relativize against the document
+  // when there's an actual script path to relativize — mirrors the
+  // JSON/load guards. The empty case writes an empty path verbatim.
+  QString relativeScriptPath;
+  if(!proc.m_scriptPath.isEmpty())
+  {
+    auto& ctx = score::IDocument::documentContext(proc);
+    relativeScriptPath = score::relativizeFilePath(proc.m_scriptPath, ctx);
+  }
+  m_stream << proc.m_compute << relativeScriptPath;
   readPorts(*this, proc.m_inlets, proc.m_outlets);
 
   insertDelimiter();
@@ -616,7 +679,12 @@ template <>
 void DataStreamWriter::write(Gfx::CSF::Model& proc)
 {
   QString s;
-  m_stream >> s;
+  m_stream >> s >> proc.m_scriptPath;
+  if(!proc.m_scriptPath.isEmpty())
+  {
+    auto& ctx = score::IDocument::documentContext(proc);
+    proc.m_scriptPath = score::locateFilePath(proc.m_scriptPath, ctx);
+  }
   (void)proc.setScript(s);
   writePorts(
       *this, components.interfaces<Process::PortFactoryList>(), proc.m_inlets,
@@ -629,6 +697,11 @@ template <>
 void JSONReader::read(const Gfx::CSF::Model& proc)
 {
   obj["Compute"] = proc.script();
+  if(!proc.m_scriptPath.isEmpty())
+  {
+    auto& ctx = score::IDocument::documentContext(proc);
+    obj["Root"] = score::relativizeFilePath(proc.m_scriptPath, ctx);
+  }
   readPorts(*this, proc.m_inlets, proc.m_outlets);
 }
 
@@ -636,6 +709,15 @@ template <>
 void JSONWriter::write(Gfx::CSF::Model& proc)
 {
   QString s = obj["Compute"].toString();
+  if(auto r = obj.tryGet("Root"))
+  {
+    proc.m_scriptPath <<= *r;
+    if(!proc.m_scriptPath.isEmpty())
+    {
+      auto& ctx = score::IDocument::documentContext(proc);
+      proc.m_scriptPath = score::locateFilePath(proc.m_scriptPath, ctx);
+    }
+  }
   (void)proc.setScript(s);
   writePorts(
       *this, components.interfaces<Process::PortFactoryList>(), proc.m_inlets,
diff --git a/src/plugins/score-plugin-gfx/Gfx/CSF/Process.hpp b/src/plugins/score-plugin-gfx/Gfx/CSF/Process.hpp
index a0c6580885..1ac120ddf7 100644
--- a/src/plugins/score-plugin-gfx/Gfx/CSF/Process.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/CSF/Process.hpp
@@ -75,6 +75,11 @@ class Model final : public Process::ProcessModel
   void errorMessage(int line, const QString& err) const
       W_SIGNAL(errorMessage, line, err);
 
+  // Absolute path of the shader file this model was loaded from. Used as
+  // the base for quoted #include resolution in ProgramCache::get. Empty
+  // when the shader source is in-memory. Mirrors JS::ProcessModel::m_root.
+  QString rootPath() const noexcept { return m_scriptPath; }
+
 private:
   void loadPreset(const Process::Preset& preset) override;
   Process::Preset savePreset() const noexcept override;
@@ -84,6 +89,7 @@ class Model final : public Process::ProcessModel
 
   QString m_compute;
   ProcessedProgram m_processedProgram;
+  QString m_scriptPath;
 };
 
 struct ProcessFactory final : Process::ProcessFactory_T<Gfx::CSF::Model>
diff --git a/src/plugins/score-plugin-gfx/Gfx/CameraDevice.win32.cpp b/src/plugins/score-plugin-gfx/Gfx/CameraDevice.win32.cpp
index 725a13bf26..6f53be1ce5 100644
--- a/src/plugins/score-plugin-gfx/Gfx/CameraDevice.win32.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/CameraDevice.win32.cpp
@@ -8,14 +8,19 @@ extern "C" {
 #include <libavutil/pixfmt.h>
 }
 
-// !
+// clang-format off
+// Order-sensitive — do NOT let clang-format sort these:
+//  - <initguid.h> must precede <windows.h>/<dshow.h> so the DirectShow GUIDs get
+//    a real definition (not just an extern declaration);
+//  - the Windows system headers must come before <dshow.h>/<dvdmedia.h>.
 #include <initguid.h>
-// ! Needs to be present before, to ensure uuids get enumerated
+#include <windows.h>
 
 #include <dshow.h>
 #include <dvdmedia.h>
-#include <wmcodecdsp.h>
 #include <oleauto.h>
+#include <wmcodecdsp.h>
+// clang-format on
 
 namespace Gfx
 {
diff --git a/src/plugins/score-plugin-gfx/Gfx/Filter/Library.cpp b/src/plugins/score-plugin-gfx/Gfx/Filter/Library.cpp
index bd1a1f1185..c733244f04 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Filter/Library.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Filter/Library.cpp
@@ -13,6 +13,7 @@
 #include <ossia/gfx/texture_parameter.hpp>
 #include <ossia/network/base/device.hpp>
 
+#include <QDebug>
 #include <QDir>
 #include <QGuiApplication>
 #include <QJsonArray>
@@ -163,7 +164,7 @@ void DropHandler::dropPath(
 
 void DropHandler::dropCustom(
     std::vector<ProcessDrop>& vec, const QMimeData& mime,
-    const score::DocumentContext& ctx) const noexcept
+    const score::DocumentContext& ctx) const
 {
   // FIXME handle multipass / multibuffer
   for(const auto& uri : mime.urls())
@@ -186,28 +187,40 @@ void DropHandler::dropCustom(
           {
             continue;
           }
-          isf::parser parser("", shader_json, 450, isf::parser::ShaderType::ShaderToy);
-          auto isf = parser.write_isf();
-          auto spec = parser.data();
-          if(isf.empty())
+          // The ISF parser throws invalid_file on malformed Shadertoy
+          // JSON (empty body, non-JSON response, missing fields, parse-
+          // time validation failures like non-numeric LOCATION). Catch
+          // per URL so one bad URL doesn't abort the whole drop batch.
+          try
+          {
+            isf::parser parser("", shader_json, 450, isf::parser::ShaderType::ShaderToy);
+            auto isf = parser.write_isf();
+            auto spec = parser.data();
+            if(isf.empty())
+            {
+              continue;
+            }
+            // For immediate feedback, add a placeholder
+            Process::ProcessDropHandler::ProcessDrop p;
+            p.creation.key = Metadata<ConcreteKey_k, Gfx::Filter::Model>::get();
+            p.creation.prettyName = "Shadertoy " + shaderId;
+            p.setup = [isf](Process::ProcessModel& p, score::Dispatcher& d) {
+              auto& filter = (Gfx::Filter::Model&)p;
+              Gfx::ShaderSource source;
+              source.vertex = "";
+              source.fragment = QString::fromStdString(isf);
+              auto cmd = new Gfx::ChangeShader{
+                  filter, source, score::IDocument::documentContext(p)};
+              d.submit(cmd);
+            };
+
+            vec.push_back(std::move(p));
+          }
+          catch(const std::exception& e)
           {
+            qWarning() << "Shadertoy drop failed for" << shaderId << ":" << e.what();
             continue;
           }
-          // For immediate feedback, add a placeholder
-          Process::ProcessDropHandler::ProcessDrop p;
-          p.creation.key = Metadata<ConcreteKey_k, Gfx::Filter::Model>::get();
-          p.creation.prettyName = "Shadertoy " + shaderId;
-          p.setup = [isf](Process::ProcessModel& p, score::Dispatcher& d) {
-            auto& filter = (Gfx::Filter::Model&)p;
-            Gfx::ShaderSource source;
-            source.vertex = "";
-            source.fragment = QString::fromStdString(isf);
-            auto cmd = new Gfx::ChangeShader{
-                filter, source, score::IDocument::documentContext(p)};
-            d.submit(cmd);
-          };
-
-          vec.push_back(std::move(p));
         }
       }
     }
diff --git a/src/plugins/score-plugin-gfx/Gfx/Filter/Library.hpp b/src/plugins/score-plugin-gfx/Gfx/Filter/Library.hpp
index 53a41e75d7..152e3f8aba 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Filter/Library.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Filter/Library.hpp
@@ -44,7 +44,7 @@ class DropHandler final : public Process::ProcessDropHandler
 
   void dropCustom(
       std::vector<ProcessDrop>& drops, const QMimeData& mime,
-      const score::DocumentContext& ctx) const noexcept override;
+      const score::DocumentContext& ctx) const override;
 };
 
 struct VideoTextureDropHandler : public Process::ProcessDropHandler
diff --git a/src/plugins/score-plugin-gfx/Gfx/Filter/PreviewWidget.cpp b/src/plugins/score-plugin-gfx/Gfx/Filter/PreviewWidget.cpp
index 7175e95344..93df20d895 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Filter/PreviewWidget.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Filter/PreviewWidget.cpp
@@ -1,12 +1,15 @@
 #include <Process/Preset.hpp>
 
 #include <Gfx/Filter/PreviewWidget.hpp>
+#include <Gfx/Graph/BackgroundNode.hpp>
 #include <Gfx/Graph/ISFNode.hpp>
-#include <Gfx/Graph/ScreenNode.hpp>
+#include <Gfx/Graph/ISFVisitors.hpp>
 #include <Gfx/Settings/Model.hpp>
+#include <Gfx/Widgets/RhiPreviewWidget.hpp>
 
 #include <score/application/ApplicationContext.hpp>
 
+#include <ossia/detail/algorithms.hpp>
 #include <ossia/network/value/value.hpp>
 
 #include <QApplication>
@@ -137,7 +140,8 @@ struct PreviewInputVisitor
   
   // CSF-specific input handlers
   score::gfx::NodeModel* operator()(const isf::storage_input& v) { return nullptr; }
-  
+  score::gfx::NodeModel* operator()(const isf::uniform_input& v) { return nullptr; }
+
   score::gfx::NodeModel* operator()(const isf::texture_input& v)
   {
     static std::array<QImage, 3> images{
@@ -175,61 +179,80 @@ struct PreviewPresetVisitor
 {
   score::gfx::ISFNode& node;
   ossia::flat_map<int, ossia::value>& controls;
+  // Descriptor-input index: matches both the saved preset control keys
+  // (model inlet id == desc.inputs index, see setupISFModelPorts) and the
+  // controls flat_map key.
   int i{};
+  // Render-port index: index into node.input[], advanced via
+  // walk_descriptor_inputs (an input may create 0 or 2 ports, so this
+  // drifts from the descriptor index).
+  int port{};
+
+  // Guarded material pointer for the current render port: nullptr if the
+  // port index is out of range or the port carries no material storage.
+  float* portValue() const noexcept
+  {
+    if(port < 0 || port >= (int)node.input.size())
+      return nullptr;
+    return reinterpret_cast<float*>(node.input[port]->value);
+    // NB: for scalar/vector inputs value always points into the material
+    // UBO blob; image/audio inputs never reach this (their visitors no-op).
+  }
+
   void operator()(const isf::float_input& v)
   {
-    if(float* v = controls[i].target<float>())
-    {
-      (*(float*)node.input[i]->value) = *v;
-    }
+    if(float* dst = portValue(); dst)
+      if(float* val = controls[i].target<float>())
+        *dst = *val;
   }
 
   void operator()(const isf::long_input& v)
   {
-    if(int* v = controls[i].target<int>())
-    {
-      (*(int*)node.input[i]->value) = *v;
-    }
+    if(float* dst = portValue(); dst)
+      if(int* val = controls[i].target<int>())
+        *reinterpret_cast<int*>(dst) = *val;
   }
 
   void operator()(const isf::event_input& v) { }
 
   void operator()(const isf::bool_input& v)
   {
-    if(bool* v = controls[i].target<bool>())
-    {
-      (*(int*)node.input[i]->value) = *v ? 1 : 0;
-    }
+    if(float* dst = portValue(); dst)
+      if(bool* val = controls[i].target<bool>())
+        *reinterpret_cast<int*>(dst) = *val ? 1 : 0;
   }
 
   void operator()(const isf::point2d_input& v)
   {
-    if(ossia::vec2f* v = controls[i].target<ossia::vec2f>())
-    {
-      (*(float*)node.input[i]->value) = (*v)[0];
-      (*((float*)node.input[i]->value + 1)) = (*v)[1];
-    }
+    if(float* dst = portValue(); dst)
+      if(ossia::vec2f* val = controls[i].target<ossia::vec2f>())
+      {
+        dst[0] = (*val)[0];
+        dst[1] = (*val)[1];
+      }
   }
 
   void operator()(const isf::point3d_input& v)
   {
-    if(ossia::vec3f* v = controls[i].target<ossia::vec3f>())
-    {
-      (*(float*)node.input[i]->value) = (*v)[0];
-      (*((float*)node.input[i]->value + 1)) = (*v)[1];
-      (*((float*)node.input[i]->value + 2)) = (*v)[2];
-    }
+    if(float* dst = portValue(); dst)
+      if(ossia::vec3f* val = controls[i].target<ossia::vec3f>())
+      {
+        dst[0] = (*val)[0];
+        dst[1] = (*val)[1];
+        dst[2] = (*val)[2];
+      }
   }
 
   void operator()(const isf::color_input& v)
   {
-    if(ossia::vec4f* v = controls[i].target<ossia::vec4f>())
-    {
-      (*(float*)node.input[i]->value) = (*v)[0];
-      (*((float*)node.input[i]->value + 1)) = (*v)[1];
-      (*((float*)node.input[i]->value + 2)) = (*v)[2];
-      (*((float*)node.input[i]->value + 3)) = (*v)[3];
-    }
+    if(float* dst = portValue(); dst)
+      if(ossia::vec4f* val = controls[i].target<ossia::vec4f>())
+      {
+        dst[0] = (*val)[0];
+        dst[1] = (*val)[1];
+        dst[2] = (*val)[2];
+        dst[3] = (*val)[3];
+      }
   }
 
   void operator()(const isf::image_input& v) { }
@@ -244,6 +267,7 @@ struct PreviewPresetVisitor
   
   // CSF-specific input handlers
   void operator()(const isf::storage_input& v) { }
+  void operator()(const isf::uniform_input& v) { }
 
   void operator()(const isf::texture_input& v) { }
 
@@ -256,18 +280,17 @@ struct PreviewPresetVisitor
 ShaderPreviewManager* g_shaderPreview{};
 bool g_shaderPreviewScheduledForDeletion{};
 
-// Creating and destroying QRhi is fairly expensive, so
-// we keep one around when we are showing ISF previews
+// Holds the source ISF + image nodes shared across hover previews.
+// The output side is owned by individual ShaderPreviewWidget /
+// RhiPreviewWidget instances: each contributes a score::gfx::PreviewNode
+// targeting its own QRhiWidget render target. Multiple previews can be
+// attached at once (e.g. library hover + live texture-port preview).
 class ShaderPreviewManager : public QObject
 {
 public:
   ShaderPreviewManager()
       : QObject{qApp}
   {
-    score::gfx::OutputNode::Configuration conf{};
-    m_screen = std::make_unique<score::gfx::ScreenNode>(conf, true);
-    m_graph.addNode(m_screen.get());
-
     connect(qApp, &QCoreApplication::aboutToQuit, this, [] {
       delete g_shaderPreview;
       g_shaderPreviewScheduledForDeletion = false;
@@ -288,7 +311,8 @@ class ShaderPreviewManager : public QObject
     if(path.contains(".vs") || path.contains(".vert"))
       program = programFromVSAVertexShaderPath(path, {});
 
-    if(const auto& [processed, error] = ProgramCache::instance().get(program);
+    if(const auto& [processed, error]
+       = ProgramCache::instance().get(program, path);
        bool(processed))
     {
       m_program = *processed;
@@ -311,6 +335,8 @@ class ShaderPreviewManager : public QObject
     auto vert = obj["Vertex"].GetString();
     ShaderSource program{type, vert, frag};
 
+    // Preset-loaded source has no origin file; includes resolve against
+    // global search paths only.
     if(const auto& [processed, error] = ProgramCache::instance().get(program);
        bool(processed))
     {
@@ -324,21 +350,49 @@ class ShaderPreviewManager : public QObject
           controls[arr[0].GetInt()] = JsonValue{arr[1]}.to<ossia::value>();
         }
 
+        // controls is keyed by descriptor-input index (== model inlet id);
+        // node.input[] is keyed by render-port index. walk_descriptor_inputs
+        // gives the render-port index (cur.inlets) for each descriptor entry,
+        // which drifts from the descriptor index for 0-/2-port inputs.
         int i = 0;
-        for(const isf::input& input : m_program.descriptor.inputs)
-        {
-          ossia::visit(PreviewPresetVisitor{*m_isf, controls, i}, input.data);
-          i++;
-        }
+        score::gfx::walk_descriptor_inputs(
+            m_program.descriptor,
+            [&](const isf::input& input, const score::gfx::port_counts& cur,
+                const score::gfx::port_counts&) {
+              ossia::visit(
+                  PreviewPresetVisitor{*m_isf, controls, i, cur.inlets},
+                  input.data);
+              i++;
+            });
       }
     }
   }
 
-  std::shared_ptr<QWindow> getWindow()
+  score::gfx::Graph& graph() noexcept { return m_graph; }
+
+  // True while at least one preview widget is still attached to the shared
+  // graph. The deferred manager deletion must NOT fire while this holds, or
+  // a surviving widget's RhiPreviewWidget::m_graph would dangle (UAF on its
+  // detach()).
+  bool hasPreviews() const noexcept { return !m_previews.empty(); }
+
+  void attachPreview(score::gfx::BackgroundNode& node)
+  {
+    m_previews.push_back(&node);
+    if(m_isf)
+    {
+      m_graph.addEdge(
+          m_isf->output[0], node.input[0], Process::CableType::ImmediateGlutton);
+      const auto& settings = score::AppContext().settings<Gfx::Settings::Model>();
+      m_graph.createAllRenderLists(settings.graphicsApiEnum());
+    }
+  }
+
+  void detachPreview(score::gfx::BackgroundNode& node)
   {
-    if(m_screen && m_screen.get())
-      return m_screen.get()->window();
-    return {};
+    ossia::remove_erase(m_previews, &node);
+    if(m_isf)
+      m_graph.removeEdge(m_isf->output[0], node.input[0]);
   }
 
   std::vector<std::pair<score::gfx::Port*, score::gfx::Port*>> m_previewEdges;
@@ -346,7 +400,7 @@ class ShaderPreviewManager : public QObject
   void setup()
   {
     const auto& settings = score::AppContext().settings<Gfx::Settings::Model>();
-    // Create our graph
+    // Tear down the previous set of source nodes.
     for(auto [a, b] : m_previewEdges)
       m_graph.removeEdge(a, b);
     m_previewEdges.clear();
@@ -359,48 +413,63 @@ class ShaderPreviewManager : public QObject
 
     if(m_isf)
     {
-      m_graph.removeEdge(m_isf->output[0], m_screen->input[0]);
+      for(auto* p : m_previews)
+        m_graph.removeEdge(m_isf->output[0], p->input[0]);
       m_graph.removeNode(m_isf.get());
     }
 
-    m_graph.removeNode(m_screen.get());
-
     // Clear the graph, renderers etc.
     m_graph.createAllRenderLists(settings.graphicsApiEnum());
 
     m_isf.reset();
     m_textures.clear();
 
-    // Recreate what we need
-    m_graph.addNode(m_screen.get());
-
     // FIXME add an error image if the shader did not parse
     m_isf = std::make_unique<score::gfx::ISFNode>(
         m_program.descriptor, m_program.vertex, m_program.fragment);
 
     m_graph.addNode(m_isf.get());
-    // Edge from filter to output
-    m_graph.addEdge(
-        m_isf->output[0], m_screen->input[0], Process::CableType::ImmediateGlutton);
 
-    // Edges from image nodes to image inputs
-    int image_i = 0;
-    int i = 0;
-    for(const isf::input& input : m_program.descriptor.inputs)
-    {
-      auto node = ossia::visit(PreviewInputVisitor{image_i}, input.data);
-      if(node)
-      {
-        m_graph.addNode(node);
+    // Wire ISF output to every currently-attached preview.
+    for(auto* p : m_previews)
+      m_graph.addEdge(
+          m_isf->output[0], p->input[0], Process::CableType::ImmediateGlutton);
 
-        m_graph.addEdge(
-            node->output[0], m_isf->input[i], Process::CableType::ImmediateGlutton);
-        m_previewEdges.emplace_back(node->output[0], m_isf->input[i]);
-
-        m_textures.push_back(std::unique_ptr<score::gfx::Node>(node));
-      }
-      i++;
-    }
+    // Edges from image nodes to image inputs. The render-port index of an
+    // input (cur.inlets, via walk_descriptor_inputs) drifts from the
+    // descriptor index for inputs that create 0 or 2 ports, so we must not
+    // equate them. PreviewInputVisitor only yields a node for image-like
+    // inputs, each of which creates exactly one input port at cur.inlets.
+    int image_i = 0;
+    score::gfx::walk_descriptor_inputs(
+        m_program.descriptor,
+        [&](const isf::input& input, const score::gfx::port_counts& cur,
+            const score::gfx::port_counts& delta) {
+          auto node = ossia::visit(PreviewInputVisitor{image_i}, input.data);
+          if(node)
+          {
+            const int port_idx = cur.inlets;
+            // Only wire when this input actually creates an input port:
+            // write-access csf_image_input yields a node but 0 inlets, and
+            // the render-port index must come from cur.inlets (not the
+            // descriptor index, which drifts for 0-/2-port inputs).
+            if(delta.inlets < 1 || port_idx < 0
+               || port_idx >= (int)m_isf->input.size())
+            {
+              delete node;
+              return;
+            }
+
+            m_graph.addNode(node);
+
+            m_graph.addEdge(
+                node->output[0], m_isf->input[port_idx],
+                Process::CableType::ImmediateGlutton);
+            m_previewEdges.emplace_back(node->output[0], m_isf->input[port_idx]);
+
+            m_textures.push_back(std::unique_ptr<score::gfx::Node>(node));
+          }
+        });
 
     m_graph.createAllRenderLists(settings.graphicsApiEnum());
   }
@@ -463,10 +532,10 @@ class ShaderPreviewManager : public QObject
     }
   }
 
-  std::unique_ptr<score::gfx::ScreenNode> m_screen{};
 private:
   std::unique_ptr<score::gfx::ISFNode> m_isf{};
   std::vector<std::unique_ptr<score::gfx::Node>> m_textures;
+  std::vector<score::gfx::BackgroundNode*> m_previews;
   score::gfx::Graph m_graph{};
   ProcessedProgram m_program;
 };
@@ -497,45 +566,59 @@ ShaderPreviewWidget::ShaderPreviewWidget(const Process::Preset& preset, QWidget*
 
 ShaderPreviewWidget::~ShaderPreviewWidget()
 {
+  // Tearing down the RhiPreviewWidget triggers detachPreview() on the
+  // manager, which removes the producer→preview edge. Do this before
+  // scheduling manager deletion so the deferred delete sees a clean
+  // graph.
+  delete m_rhi;
+  m_rhi = nullptr;
+
   g_shaderPreviewScheduledForDeletion = true;
   QTimer::singleShot(std::chrono::seconds(5), qApp, []() {
-    if(g_shaderPreviewScheduledForDeletion)
+    // Multi-client safety: several ShaderPreviewWidgets can share the same
+    // manager (library hover + live texture-port preview). Destroying one
+    // schedules this deletion, but another may still be attached — its
+    // RhiPreviewWidget holds a raw pointer into g_shaderPreview->graph().
+    // Only tear the manager down once no preview remains attached, otherwise
+    // the surviving widget would dereference a freed Graph on its own
+    // destruction (use-after-free).
+    if(g_shaderPreviewScheduledForDeletion && g_shaderPreview
+       && !g_shaderPreview->hasPreviews())
     {
       delete g_shaderPreview;
       g_shaderPreview = nullptr;
       g_shaderPreviewScheduledForDeletion = false;
     }
   });
-
-  if(m_window)
-    m_window->setParent(nullptr);
 }
 
 void ShaderPreviewWidget::setup()
 {
   // UI setup
   auto lay = new QHBoxLayout(this);
-  if((m_window = g_shaderPreview->getWindow()))
-  {
-    auto widg = createWindowContainer(m_window.get(), this);
-    widg->setMinimumWidth(300);
-    widg->setMaximumWidth(300);
-    widg->setMinimumHeight(200);
-    widg->setMaximumHeight(200);
-    lay->addWidget(widg);
-  }
-  // FIXME else { display error widget }
-
-  // so anyways, I started blasting...
+  m_rhi = new RhiPreviewWidget(this);
+  m_rhi->setMinimumSize(300, 200);
+  m_rhi->setMaximumSize(300, 200);
+  m_rhi->useGraph(
+      &g_shaderPreview->graph(),
+      [](score::gfx::BackgroundNode& n) {
+        if(g_shaderPreview)
+          g_shaderPreview->attachPreview(n);
+      },
+      [](score::gfx::BackgroundNode& n) {
+        if(g_shaderPreview)
+          g_shaderPreview->detachPreview(n);
+      });
+  lay->addWidget(m_rhi);
+
+  // Drives ISF time/progress uniforms. Frame submission is owned by
+  // the QRhiWidget (it calls update() each render).
   startTimer(16);
 }
 
 void ShaderPreviewWidget::timerEvent(QTimerEvent* event)
 {
   if(g_shaderPreview)
-  {
     g_shaderPreview->updateControls();
-    g_shaderPreview->m_screen->render();
-  }
 }
 }
diff --git a/src/plugins/score-plugin-gfx/Gfx/Filter/PreviewWidget.hpp b/src/plugins/score-plugin-gfx/Gfx/Filter/PreviewWidget.hpp
index e58e7ded5a..76318f8189 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Filter/PreviewWidget.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Filter/PreviewWidget.hpp
@@ -3,11 +3,10 @@
 #include <Gfx/Graph/ImageNode.hpp>
 #include <Gfx/Graph/Node.hpp>
 #include <Gfx/Graph/ShaderCache.hpp>
-#include <Gfx/Graph/Window.hpp>
 #include <Gfx/ShaderProgram.hpp>
 
-#include <QWidget>
 #include <QHBoxLayout>
+#include <QWidget>
 namespace score::gfx
 {
 class ISFNode;
@@ -18,6 +17,7 @@ struct Preset;
 }
 namespace Gfx
 {
+class RhiPreviewWidget;
 class ShaderPreviewManager;
 class ShaderPreviewWidget : public QWidget
 {
@@ -30,7 +30,7 @@ class ShaderPreviewWidget : public QWidget
   void setup();
   void timerEvent(QTimerEvent* event) override;
 
-  std::shared_ptr<QWindow> m_window;
+  RhiPreviewWidget* m_rhi{};
 };
 
 }
diff --git a/src/plugins/score-plugin-gfx/Gfx/Filter/Process.cpp b/src/plugins/score-plugin-gfx/Gfx/Filter/Process.cpp
index b6a900ae26..3499e835f3 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Filter/Process.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Filter/Process.cpp
@@ -11,8 +11,10 @@
 #include <Gfx/TexturePort.hpp>
 
 #include <score/application/GUIApplicationContext.hpp>
+#include <score/document/DocumentInterface.hpp>
 #include <score/tools/DeleteAll.hpp>
 #include <score/tools/File.hpp>
+#include <score/tools/FilePath.hpp>
 
 #include <QFileInfo>
 
@@ -71,10 +73,12 @@ Model::Model(
 
   if(init.endsWith("fs") || init.endsWith("frag"))
   {
+    m_scriptPath = init;
     (void)setProgram(programFromISFFragmentShaderPath(init, {}));
   }
   else if(init.endsWith("vs") || init.endsWith("vert"))
   {
+    m_scriptPath = init;
     (void)setProgram(programFromVSAVertexShaderPath(init, {}));
   }
 }
@@ -83,7 +87,7 @@ Model::~Model() { }
 
 bool Model::validate(const ShaderSource& txt) const noexcept
 {
-  const auto& [_, error] = ProgramCache::instance().get(txt);
+  const auto& [_, error] = ProgramCache::instance().get(txt, m_scriptPath);
   if(!error.isEmpty())
   {
     this->errorMessage(error);
@@ -116,7 +120,9 @@ Process::ScriptChangeResult Model::setProgram(const ShaderSource& f)
 {
   setVertex(f.vertex);
   setFragment(f.fragment);
-  if(const auto& [processed, error] = ProgramCache::instance().get(f); bool(processed))
+  if(const auto& [processed, error]
+     = ProgramCache::instance().get(f, m_scriptPath);
+     bool(processed))
   {
     ossia::flat_map<QString, ossia::value> previous_values;
     for(auto inl : m_inlets)
@@ -203,7 +209,17 @@ void DataStreamWriter::write(Gfx::ShaderSource& p)
 template <>
 void DataStreamReader::read(const Gfx::Filter::Model& proc)
 {
-  m_stream << proc.m_program;
+  // documentContext() SCORE_ASSERTs when the model isn't in a document
+  // (e.g. saving a template / copy). Only relativize against the document
+  // when there's an actual script path to relativize — mirrors the
+  // JSON/load guards. The empty case writes an empty path verbatim.
+  QString relativeScriptPath;
+  if(!proc.m_scriptPath.isEmpty())
+  {
+    auto& ctx = score::IDocument::documentContext(proc);
+    relativeScriptPath = score::relativizeFilePath(proc.m_scriptPath, ctx);
+  }
+  m_stream << proc.m_program << relativeScriptPath;
 
   readPorts(*this, proc.m_inlets, proc.m_outlets);
 
@@ -214,7 +230,12 @@ template <>
 void DataStreamWriter::write(Gfx::Filter::Model& proc)
 {
   Gfx::ShaderSource s;
-  m_stream >> s;
+  m_stream >> s >> proc.m_scriptPath;
+  if(!proc.m_scriptPath.isEmpty())
+  {
+    auto& ctx = score::IDocument::documentContext(proc);
+    proc.m_scriptPath = score::locateFilePath(proc.m_scriptPath, ctx);
+  }
   s.type = isf::parser::ShaderType::ISF;
   (void)proc.setProgram(s);
 
@@ -230,6 +251,11 @@ void JSONReader::read(const Gfx::Filter::Model& proc)
 {
   obj["Vertex"] = proc.vertex();
   obj["Fragment"] = proc.fragment();
+  if(!proc.m_scriptPath.isEmpty())
+  {
+    auto& ctx = score::IDocument::documentContext(proc);
+    obj["Root"] = score::relativizeFilePath(proc.m_scriptPath, ctx);
+  }
 
   readPorts(*this, proc.m_inlets, proc.m_outlets);
 }
@@ -241,6 +267,15 @@ void JSONWriter::write(Gfx::Filter::Model& proc)
   s.vertex = obj["Vertex"].toString();
   s.fragment = obj["Fragment"].toString();
   s.type = isf::parser::ShaderType::ISF;
+  if(auto r = obj.tryGet("Root"))
+  {
+    proc.m_scriptPath <<= *r;
+    if(!proc.m_scriptPath.isEmpty())
+    {
+      auto& ctx = score::IDocument::documentContext(proc);
+      proc.m_scriptPath = score::locateFilePath(proc.m_scriptPath, ctx);
+    }
+  }
   (void)proc.setProgram(s);
 
   writePorts(
diff --git a/src/plugins/score-plugin-gfx/Gfx/Filter/Process.hpp b/src/plugins/score-plugin-gfx/Gfx/Filter/Process.hpp
index a6e04b48c2..b8fd28005b 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Filter/Process.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Filter/Process.hpp
@@ -64,6 +64,12 @@ class Model final : public Process::ProcessModel
     return m_processedProgram;
   }
 
+  // Absolute path of the shader file this model was loaded from. Used as
+  // the base for quoted #include resolution in ProgramCache::get. Empty
+  // when the shader source is in-memory (default preset, pasted text).
+  // Mirrors JS::ProcessModel::m_root.
+  QString rootPath() const noexcept { return m_scriptPath; }
+
   void errorMessage(const QString& arg_2) const W_SIGNAL(errorMessage, arg_2);
 
 private:
@@ -73,6 +79,7 @@ class Model final : public Process::ProcessModel
 
   ShaderSource m_program;
   ProcessedProgram m_processedProgram;
+  QString m_scriptPath;
 };
 
 struct ProcessFactory final : Process::ProcessFactory_T<Gfx::Filter::Model>
diff --git a/src/plugins/score-plugin-gfx/Gfx/FormatRegistry.cpp b/src/plugins/score-plugin-gfx/Gfx/FormatRegistry.cpp
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/plugins/score-plugin-gfx/Gfx/FormatRegistry.hpp b/src/plugins/score-plugin-gfx/Gfx/FormatRegistry.hpp
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/plugins/score-plugin-gfx/Gfx/GStreamer/GStreamerOutputDevice.cpp b/src/plugins/score-plugin-gfx/Gfx/GStreamer/GStreamerOutputDevice.cpp
index 756db583a4..b54882a415 100644
--- a/src/plugins/score-plugin-gfx/Gfx/GStreamer/GStreamerOutputDevice.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/GStreamer/GStreamerOutputDevice.cpp
@@ -85,6 +85,7 @@ struct GStreamerOutputNode : score::gfx::OutputNode
   GstElement* m_audio_src{};
   GStreamerSettings m_settings;
   bool m_started{};
+  uint64_t m_video_max_bytes{}; // appsrc queue cap; 0 = disabled
   std::unique_ptr<score::gfx::GPUVideoEncoder> m_encoder[2];
   int m_encoderIdx{}; // ping-pong index for double-buffered encoder
   QString m_detectedFormat;      // UYVY, NV12, I420, or empty for RGBA
@@ -125,6 +126,14 @@ struct GStreamerOutputNode : score::gfx::OutputNode
       qDebug() << "GStreamer output parse error:" << err->message;
       if(gst.g_error_free)
         gst.g_error_free(err);
+      // gst_parse_launch (non-_full) can return a non-NULL *partial* pipeline
+      // with *error set. Such a pipeline is broken (e.g. missing appsrcs) —
+      // unref it so we don't leak/retain it and never set it PLAYING.
+      if(m_pipeline)
+      {
+        gst.object_unref(m_pipeline);
+        m_pipeline = nullptr;
+      }
       return false;
     }
     if(!m_pipeline)
@@ -177,10 +186,29 @@ struct GStreamerOutputNode : score::gfx::OutputNode
           gst.object_set_property(elem, prop, &gv);
           gst.value_unset(&gv);
         };
+        auto setUInt64 = [&](GstElement* elem, const char* prop, uint64_t val) {
+          if(!gst.value_set_uint64)
+            return;
+          GValue gv{};
+          gst.value_init(&gv, G_TYPE_UINT64);
+          gst.value_set_uint64(&gv, val);
+          gst.object_set_property(elem, prop, &gv);
+          gst.value_unset(&gv);
+        };
 
         setBool(m_video_src, "is-live", true);
         setBool(m_video_src, "do-timestamp", true);
         setInt(m_video_src, "format", 3); // GST_FORMAT_TIME
+
+        // Backpressure: the appsrc default max-bytes is 200000, far below a
+        // single 1080p RGBA frame (~8 MB). Bound the queue to a few frames so
+        // RSS can't grow without limit when downstream stalls. We additionally
+        // drop frames ourselves (see push_video_frame_*) by polling
+        // current-level-bytes, which gives downstream-leaky behaviour without
+        // depending on the leaky-type enum GType (not introspectable here) and
+        // without blocking the render thread.
+        m_video_max_bytes = (uint64_t)16 * 1024 * 1024; // ~2 frames @1080p RGBA
+        setUInt64(m_video_src, "max-bytes", m_video_max_bytes);
       }
     }
 
@@ -279,6 +307,36 @@ struct GStreamerOutputNode : score::gfx::OutputNode
     m_started = true;
   }
 
+  // Non-blocking bus poll: surfaces otherwise-silent encoder/filesink/muxer
+  // errors. Called once per rendered frame; logs the first error then stops
+  // pushing (m_started=false) so we don't spam or feed a dead pipeline.
+  void poll_bus_errors()
+  {
+    if(!m_pipeline || !m_started)
+      return;
+
+    auto& gst = libgstreamer::instance();
+    if(!gst.element_get_bus || !gst.bus_timed_pop_filtered)
+      return;
+
+    GstBus* bus = gst.element_get_bus(m_pipeline);
+    if(!bus)
+      return;
+
+    // timeout==0 => return immediately if no matching message is queued.
+    while(GstMessage* msg = gst.bus_timed_pop_filtered(
+              bus, 0, (GstMessageType)(GST_MESSAGE_ERROR | GST_MESSAGE_WARNING)))
+    {
+      qWarning() << "GStreamer output: pipeline error/warning on the bus";
+      if(gst.message_unref)
+        gst.message_unref(msg);
+      // An ERROR aborts the pipeline; stop feeding it.
+      m_started = false;
+      break;
+    }
+    gst.object_unref(bus);
+  }
+
   void stop_pipeline()
   {
     if(!m_pipeline || !m_started)
@@ -292,6 +350,33 @@ struct GStreamerOutputNode : score::gfx::OutputNode
     if(m_audio_src && gst.app_src_end_of_stream)
       gst.app_src_end_of_stream(m_audio_src);
 
+    // appsrc EOS is ASYNC: it travels through the pipeline as a buffer would,
+    // and muxers (mp4mux/matroskamux/...) only finalize the file once EOS
+    // reaches them. Setting the pipeline to NULL immediately would truncate
+    // the moov atom / cluster index, producing unplayable files. Wait for the
+    // EOS (or ERROR) message on the bus, with a bounded timeout so we never
+    // hang the UI thread on a stuck pipeline.
+    if(gst.element_get_bus && gst.bus_timed_pop_filtered)
+    {
+      if(GstBus* bus = gst.element_get_bus(m_pipeline))
+      {
+        GstMessage* msg = gst.bus_timed_pop_filtered(
+            bus, 5 * GST_SECOND,
+            (GstMessageType)(GST_MESSAGE_EOS | GST_MESSAGE_ERROR));
+        if(msg)
+        {
+          if(gst.message_unref)
+            gst.message_unref(msg);
+        }
+        else
+        {
+          qWarning() << "GStreamer output: timed out waiting for EOS; "
+                        "output file may be truncated";
+        }
+        gst.object_unref(bus);
+      }
+    }
+
     gst.element_set_state(m_pipeline, GST_STATE_NULL);
     m_started = false;
   }
@@ -309,12 +394,35 @@ struct GStreamerOutputNode : score::gfx::OutputNode
     }
   }
 
+  // Downstream-leaky backpressure: if appsrc's queued bytes already exceed the
+  // configured budget, drop this frame instead of growing RSS without bound.
+  // Reading current-level-bytes (guint64) is cheap and lock-free in appsrc.
+  bool video_queue_full() const
+  {
+    if(m_video_max_bytes == 0 || !m_video_src)
+      return false;
+
+    auto& gst = libgstreamer::instance();
+    if(!gst.object_get_property || !gst.value_init || !gst.value_unset
+       || !gst.value_get_uint64)
+      return false;
+
+    GValue gv{};
+    gst.value_init(&gv, G_TYPE_UINT64);
+    gst.object_get_property(m_video_src, "current-level-bytes", &gv);
+    uint64_t level = gst.value_get_uint64(&gv);
+    gst.value_unset(&gv);
+    return level >= m_video_max_bytes;
+  }
+
   // Zero-copy push: takes a shallow copy of the QByteArray.
   // The QByteArray's refcount keeps the data alive until GStreamer is done.
   void push_video_frame_zerocopy(QByteArray data)
   {
     if(!m_video_src || !m_started)
       return;
+    if(video_queue_full())
+      return; // drop: downstream can't keep up
 
     auto& gst = libgstreamer::instance();
     if(!gst.buffer_new_wrapped_full)
@@ -405,6 +513,9 @@ struct GStreamerOutputNode : score::gfx::OutputNode
     if(!renderer || !m_renderState)
       return;
 
+    // Surface any silent pipeline errors (encoder/filesink/muxer failures).
+    poll_bus_errors();
+
     auto rhi = m_renderState->rhi;
     QRhiCommandBuffer* cb{};
     if(rhi->beginOffscreenFrame(&cb) != QRhi::FrameOpSuccess)
@@ -492,18 +603,15 @@ struct GStreamerOutputNode : score::gfx::OutputNode
 
   void createOutput(score::gfx::OutputConfiguration conf) override
   {
-    m_renderState = std::make_shared<score::gfx::RenderState>();
-
-    m_renderState->surface = QRhiGles2InitParams::newFallbackSurface();
-    QRhiGles2InitParams params;
-    params.fallbackSurface = m_renderState->surface;
-    score::GLCapabilities caps;
-    caps.setupFormat(params.format);
-    m_renderState->rhi = QRhi::create(QRhi::OpenGLES2, &params, {});
-    m_renderState->renderSize = QSize(m_settings.width, m_settings.height);
+    m_renderState = score::gfx::createRenderState(
+        conf.graphicsApi, QSize(m_settings.width, m_settings.height), nullptr);
+    if(!m_renderState || !m_renderState->rhi)
+    {
+      qWarning() << "GStreamerOutputNode: failed to create QRhi";
+      m_renderState.reset();
+      return;
+    }
     m_renderState->outputSize = m_renderState->renderSize;
-    m_renderState->api = score::gfx::GraphicsApi::OpenGL;
-    m_renderState->version = caps.qShaderVersion;
 
     auto rhi = m_renderState->rhi;
     m_texture = rhi->newTexture(
@@ -517,10 +625,12 @@ struct GStreamerOutputNode : score::gfx::OutputNode
         m_renderState->renderPassDescriptor);
     m_renderTarget->create();
 
-    init_pipeline();
+    const bool pipeline_ok = init_pipeline();
+    if(!pipeline_ok)
+      qWarning() << "GStreamerOutputNode: pipeline init failed; output disabled";
 
     // Create GPU encoder if a YUV target format was detected
-    if(!m_detectedFormat.isEmpty() && rhi)
+    if(pipeline_ok && !m_detectedFormat.isEmpty() && rhi)
     {
       auto makeEncoder = [&]() -> std::unique_ptr<score::gfx::GPUVideoEncoder> {
         if(m_detectedFormat == "UYVY" || m_detectedFormat == "YUY2")
@@ -538,6 +648,24 @@ struct GStreamerOutputNode : score::gfx::OutputNode
 
       if(m_encoder[0] && m_encoder[1])
       {
+        // Stride alignment: QRhi reads textures back with TIGHTLY packed rows,
+        // but GStreamer's default GstVideoInfo strides are GST_ROUND_UP_4. For
+        // the planar/semi-planar YUV formats the two only agree when each plane
+        // row is already a multiple of 4:
+        //   I420: Y stride = width, chroma stride = width/2  -> need width%8==0
+        //   NV12: Y stride = width, UV   stride = width      -> need width%4==0
+        //   UYVY: stride   = width*2 (4:2:2 macropixels)     -> need width%2==0
+        // height must be even for 4:2:0 vertical subsampling. We round DOWN so
+        // we never sample past the rendered texture, and feed the SAME aligned
+        // dimensions to both the encoder and the negotiated caps so the tight
+        // readback matches GStreamer's expected (now no-op ROUND_UP_4) strides.
+        const int enc_w = std::max(8, m_settings.width & ~7);  // mult of 8 (covers 4 & 2)
+        const int enc_h = std::max(2, m_settings.height & ~1); // mult of 2
+        if(enc_w != m_settings.width || enc_h != m_settings.height)
+          qDebug() << "GStreamer output: aligning" << m_detectedFormat
+                   << "from" << m_settings.width << "x" << m_settings.height
+                   << "to" << enc_w << "x" << enc_h << "for packed strides";
+
         auto input_trc = static_cast<AVColorTransferCharacteristic>(m_settings.input_transfer);
         auto colorShader = colorShaderFromColorimetry(m_detectedColorimetry, input_trc);
         qDebug() << "GStreamer output: GPU encoder"
@@ -546,9 +674,9 @@ struct GStreamerOutputNode : score::gfx::OutputNode
                  << "inputTrc=" << m_settings.input_transfer
                  << "shaderLen=" << colorShader.size();
         m_encoder[0]->init(*rhi, *m_renderState, m_texture,
-                           m_settings.width, m_settings.height, colorShader);
+                           enc_w, enc_h, colorShader);
         m_encoder[1]->init(*rhi, *m_renderState, m_texture,
-                           m_settings.width, m_settings.height, colorShader);
+                           enc_w, enc_h, colorShader);
 
         // Update appsrc caps to match the encoder's output format
         if(auto& gst = libgstreamer::instance();
@@ -556,8 +684,8 @@ struct GStreamerOutputNode : score::gfx::OutputNode
         {
           auto capsStr = QString("video/x-raw,format=%1,width=%2,height=%3,framerate=%4/1")
                              .arg(m_detectedFormat)
-                             .arg(m_settings.width)
-                             .arg(m_settings.height)
+                             .arg(enc_w)
+                             .arg(enc_h)
                              .arg(m_settings.rate);
           if(auto* caps = gst.caps_from_string(capsStr.toStdString().c_str()))
           {
@@ -582,6 +710,38 @@ struct GStreamerOutputNode : score::gfx::OutputNode
       }
     }
     cleanup_pipeline();
+
+    // Reset per-instance frame/encoder state so a subsequent createOutput()
+    // (re-create on settings change) starts clean instead of reusing a stale
+    // readback, ping-pong index, detected format or dangling renderer pointer.
+    m_currentReadback = &m_readback[0];
+    m_readback[0] = {};
+    m_readback[1] = {};
+    m_encoderIdx = 0;
+    m_detectedFormat.clear();
+    m_detectedColorimetry.clear();
+    m_inv_y_renderer = nullptr;
+    m_video_max_bytes = 0;
+
+    if(!m_renderState)
+      return;
+
+    // Persist-across-rebuild contract: registry survives RL teardown,
+    // so we tear down its QRhi resources here BEFORE
+    // RenderState::destroy() (called below) frees the device.
+    releaseRegistry();
+
+    delete m_renderTarget;
+    m_renderTarget = nullptr;
+
+    delete m_renderState->renderPassDescriptor;
+    m_renderState->renderPassDescriptor = nullptr;
+
+    delete m_texture;
+    m_texture = nullptr;
+
+    m_renderState->destroy();
+    m_renderState.reset();
   }
 
   std::shared_ptr<score::gfx::RenderState> renderState() const override
diff --git a/src/plugins/score-plugin-gfx/Gfx/GeometryFilter/Process.cpp b/src/plugins/score-plugin-gfx/Gfx/GeometryFilter/Process.cpp
index 4624286cd9..001e1ac064 100644
--- a/src/plugins/score-plugin-gfx/Gfx/GeometryFilter/Process.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/GeometryFilter/Process.cpp
@@ -324,8 +324,17 @@ void Model::setupIsf(const isf::descriptor& desc)
         alternatives.emplace_back("2", 2);
       }
 
+      // ComboBox::init expects the VALUE that should be initially selected,
+      // not an index. libisf stores `v.def` as the INDEX into values.
+      // Pass the alternative's value at v.def so the widget initialises
+      // to the author-intended entry instead of falling back to
+      // alternatives[0]. Same fix as CSF/Process.cpp.
+      const std::size_t def_idx
+          = std::min<std::size_t>(v.def, alternatives.size() - 1);
+      const ossia::value& init_value = alternatives[def_idx].second;
+
       auto port = new Process::ComboBox(
-          std::move(alternatives), (int)v.def, QString::fromStdString(input.name),
+          std::move(alternatives), init_value, QString::fromStdString(input.name),
           Id<Process::Port>(i), &self);
 
       self.m_inlets.push_back(port);
@@ -456,7 +465,9 @@ void Model::setupIsf(const isf::descriptor& desc)
       // They're managed by the system, so we don't create a UI control
       return nullptr;
     }
-    
+
+    Process::Inlet* operator()(const uniform_input& v) { return nullptr; }
+
     Process::Inlet* operator()(const texture_input& v)
     {
       auto port = new Gfx::TextureInlet(
diff --git a/src/plugins/score-plugin-gfx/Gfx/GfxContext.cpp b/src/plugins/score-plugin-gfx/Gfx/GfxContext.cpp
index 5fc416fffe..86c728aeb5 100644
--- a/src/plugins/score-plugin-gfx/Gfx/GfxContext.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/GfxContext.cpp
@@ -10,6 +10,8 @@
 #include <score/tools/Timers.hpp>
 
 #include <ossia/detail/flicks.hpp>
+
+#include <algorithm>
 #include <ossia/detail/logger.hpp>
 #include <ossia/detail/thread.hpp>
 
@@ -36,6 +38,10 @@ GfxContext::GfxContext(const score::DocumentContext& ctx)
       &GfxContext::recompute_graph);
 
   m_graph = new score::gfx::Graph;
+  // Hand the session-wide AssetTable down to the Graph so every
+  // RenderList it creates can participate in content-hash decode
+  // dedup. Plan 09 S1: one decode per asset per session, N uploads.
+  m_graph->setAssetTable(&m_assets);
 
   double rate = m_context.app.settings<Gfx::Settings::Model>().getRate();
   rate = qBound(1.0, rate, 1000.);
@@ -61,6 +67,14 @@ GfxContext::~GfxContext()
   m_thread.wait();
 #endif
 
+  // Stop all timers before destroying the graph and nodes,
+  // to prevent timer callbacks from accessing stale pointers.
+  m_manualTimers.clear();
+  m_no_vsync_timer = nullptr;
+  m_watchdog_timer = nullptr;
+  std::destroy_at(&m_timers);
+  std::construct_at(&m_timers);
+
   delete m_graph;
 }
 
@@ -122,63 +136,79 @@ void GfxContext::disconnect_preview_node(EdgeSpec e)
 void GfxContext::add_edge(EdgeSpec edge)
 {
   auto source_node_it = this->nodes.find(edge.first.node);
-  if(source_node_it != this->nodes.end())
-  {
-    auto sink_node_it = this->nodes.find(edge.second.node);
-    if(sink_node_it != this->nodes.end())
-    {
-      assert(source_node_it->second);
-      assert(sink_node_it->second);
-
-      auto& source_ports = source_node_it->second->output;
-      auto& sink_ports = sink_node_it->second->input;
-
-      SCORE_ASSERT(source_ports.size() > 0);
-      SCORE_ASSERT(sink_ports.size() > 0);
-      SCORE_ASSERT(source_ports.size() > edge.first.port);
-      SCORE_ASSERT(sink_ports.size() > edge.second.port);
-      auto source_port = source_ports[edge.first.port];
-      auto sink_port = sink_ports[edge.second.port];
-
-      m_graph->addEdge(source_port, sink_port, edge.type);
-    }
-  }
+  if(source_node_it == this->nodes.end())
+    return;
+  auto sink_node_it = this->nodes.find(edge.second.node);
+  if(sink_node_it == this->nodes.end())
+    return;
+  if(!source_node_it->second || !sink_node_it->second)
+    return;
+
+  auto& source_ports = source_node_it->second->output;
+  auto& sink_ports = sink_node_it->second->input;
+
+  // Silently drop malformed edges. A live-coded or half-wired patch can
+  // produce an edge whose declared port index doesn't exist on either side
+  // (e.g. a shader that parses to zero input ports but the script still
+  // issued a `connect(..., 0, consumer, 0)`). Aborting the whole renderer
+  // on a script-level wiring mistake is not an option — drop the edge and
+  // keep rendering.
+  if(edge.first.port >= source_ports.size()
+     || edge.second.port >= sink_ports.size())
+    return;
+
+  m_graph->addEdge(source_ports[edge.first.port], sink_ports[edge.second.port],
+                   edge.type);
 }
 
 void GfxContext::remove_edge(EdgeSpec edge)
 {
   auto source_node_it = this->nodes.find(edge.first.node);
-  if(source_node_it != this->nodes.end())
-  {
-    auto sink_node_it = this->nodes.find(edge.second.node);
-    if(sink_node_it != this->nodes.end())
-    {
-      assert(source_node_it->second);
-      assert(sink_node_it->second);
-
-      auto source_port = source_node_it->second->output[edge.first.port];
-      auto sink_port = sink_node_it->second->input[edge.second.port];
-
-      m_graph->removeEdge(source_port, sink_port);
-    }
-  }
+  if(source_node_it == this->nodes.end())
+    return;
+  auto sink_node_it = this->nodes.find(edge.second.node);
+  if(sink_node_it == this->nodes.end())
+    return;
+  if(!source_node_it->second || !sink_node_it->second)
+    return;
+
+  auto& source_ports = source_node_it->second->output;
+  auto& sink_ports = sink_node_it->second->input;
+  if(edge.first.port >= source_ports.size()
+     || edge.second.port >= sink_ports.size())
+    return;
+
+  m_graph->removeEdge(source_ports[edge.first.port],
+                      sink_ports[edge.second.port]);
 }
 
 void GfxContext::recompute_edges()
 {
   m_graph->clearEdges();
 
-  for(auto edge : edges)
+  // Snapshot under lock: writer in updateGraph reassigns `edges` under
+  // edges_lock on the render-driving thread, while this can be invoked from
+  // settings-change signals on the UI thread. Iterating the live container
+  // would race with that reassignment.
+  ossia::flat_set<EdgeSpec> edges_snapshot;
+  ossia::flat_set<EdgeSpec> preview_snapshot;
+  {
+    std::lock_guard l{edges_lock};
+    edges_snapshot = edges;
+    preview_snapshot = preview_edges;
+  }
+
+  for(auto edge : edges_snapshot)
   {
     add_edge(edge);
   }
-  for(auto edge : preview_edges)
+  for(auto edge : preview_snapshot)
   {
     add_edge(edge);
   }
 }
 
-void GfxContext::recompute_graph()
+void GfxContext::recomputeTimers()
 {
   // Clear previous timers
   std::destroy_at(&m_timers);
@@ -195,15 +225,10 @@ void GfxContext::recompute_graph()
     output->setVSyncCallback({});
   }
 
-  // Recreate the graph
-  recompute_edges();
-
   auto& settings = m_context.app.settings<Gfx::Settings::Model>();
-  const double settings_rate = m_context.app.settings<Gfx::Settings::Model>().getRate();
+  const double settings_rate = settings.getRate();
   const auto api = settings.graphicsApiEnum();
 
-  m_graph->createAllRenderLists(api);
-
   // Recreate new timers
   const bool vsync = settings.getVSync() && m_graph->canDoVSync();
 
@@ -274,6 +299,24 @@ void GfxContext::recompute_graph()
   }
 }
 
+void GfxContext::recomputeGraphTopology()
+{
+  recompute_edges();
+
+  auto& settings = m_context.app.settings<Gfx::Settings::Model>();
+  const auto api = settings.graphicsApiEnum();
+
+  m_graph->createAllRenderLists(api);
+}
+
+void GfxContext::recompute_graph()
+{
+  // Topology first: refreshes m_graph->outputs() which recomputeTimers reads.
+  // Must run before timers because recomputeTimers iterates outputs().
+  recomputeGraphTopology();
+  recomputeTimers();
+}
+
 void GfxContext::add_preview_output(score::gfx::OutputNode& node)
 {
   auto& settings = m_context.app.settings<Gfx::Settings::Model>();
@@ -296,12 +339,131 @@ void GfxContext::add_preview_output(score::gfx::OutputNode& node)
 void GfxContext::recompute_connections()
 {
   recompute_graph();
-  // FIXME for more performance
-  /*
-  recompute_edges();
-  // m_graph->setupOutputs(m_api);
-  m_graph->relinkGraph();
-  */
+}
+
+void GfxContext::incrementalEdgeUpdate(
+    const ossia::flat_set<EdgeSpec>& old_edges,
+    const ossia::flat_set<EdgeSpec>& cur_edges)
+{
+  // Compute diff
+  std::vector<EdgeSpec> removed;
+  std::vector<EdgeSpec> added;
+
+  std::set_difference(
+      old_edges.begin(), old_edges.end(),
+      cur_edges.begin(), cur_edges.end(),
+      std::back_inserter(removed));
+
+  std::set_difference(
+      cur_edges.begin(), cur_edges.end(),
+      old_edges.begin(), old_edges.end(),
+      std::back_inserter(added));
+
+  // Pre-compute the set of sink ports that will be fed by an incoming edge
+  // in this same batch. Handing that set to onEdgeRemoved prevents the
+  // "remove A→B, add F→B" sequence from destroying B's input RT in the
+  // gap between the two, which was pure churn when the old and new feeds
+  // share a sink port (classic filter insertion). Reconcile reallocates
+  // RTs only when the slot is empty, so preserving the existing RT lets
+  // the new pass slot straight into place. Source: Graph.cpp
+  // createPassForEdgeIfMissing already treats a present RT as valid
+  // regardless of the edge that produced it.
+  ossia::hash_set<const score::gfx::Port*> preserveSinks;
+  preserveSinks.reserve(added.size());
+  for(auto& spec : added)
+  {
+    auto sink_it = nodes.find(spec.second.node);
+    if(sink_it == nodes.end())
+      continue;
+    // EdgeSpecs are script-supplied: guard against null nodes and
+    // out-of-range port indices before indexing, exactly as
+    // add_edge/remove_edge do. An OOB std::vector access is UB, not a
+    // catchable exception, so the try/catch around the caller cannot
+    // save us here.
+    if(!sink_it->second)
+      continue;
+    auto& sink_ports = sink_it->second->input;
+    if(spec.second.port >= sink_ports.size())
+      continue;
+    preserveSinks.insert(sink_ports[spec.second.port]);
+  }
+
+  // Process removals first (while edge objects still exist).
+  for(auto& spec : removed)
+  {
+    auto source_it = nodes.find(spec.first.node);
+    auto sink_it = nodes.find(spec.second.node);
+    if(source_it == nodes.end() || sink_it == nodes.end())
+      continue;
+    if(!source_it->second || !sink_it->second)
+      continue;
+
+    auto& source_ports = source_it->second->output;
+    auto& sink_ports = sink_it->second->input;
+    if(spec.first.port >= source_ports.size()
+       || spec.second.port >= sink_ports.size())
+      continue;
+
+    auto* source_port = source_ports[spec.first.port];
+    auto* sink_port = sink_ports[spec.second.port];
+
+    // Find the actual Edge object
+    score::gfx::Edge* edge = nullptr;
+    for(auto* e : source_port->edges)
+    {
+      if(e->sink == sink_port)
+      {
+        edge = e;
+        break;
+      }
+    }
+
+    if(edge)
+    {
+      // Notify graph BEFORE destroying the edge
+      m_graph->onEdgeRemoved(*edge, &preserveSinks);
+      m_graph->removeEdge(source_port, sink_port);
+    }
+  }
+
+  // Process additions: first create all edge objects in the graph,
+  // then reconcile render lists in one pass. Processing edges one
+  // at a time doesn't work because edge ordering creates dependencies
+  // (e.g. edge A->B is skipped because B isn't in the RL yet, then
+  // edge B->C brings B into the RL, but A never gets a renderer).
+  for(auto& spec : added)
+  {
+    auto source_it = nodes.find(spec.first.node);
+    auto sink_it = nodes.find(spec.second.node);
+    if(source_it == nodes.end() || sink_it == nodes.end())
+      continue;
+    if(!source_it->second || !sink_it->second)
+      continue;
+
+    auto& source_ports = source_it->second->output;
+    auto& sink_ports = sink_it->second->input;
+    if(spec.first.port >= source_ports.size()
+       || spec.second.port >= sink_ports.size())
+      continue;
+
+    auto* source_port = source_ports[spec.first.port];
+    auto* sink_port = sink_ports[spec.second.port];
+
+    m_graph->addEdge(source_port, sink_port, spec.type);
+  }
+
+  // Reconcile: ensure all reachable nodes have renderers and passes.
+  // This handles NEW nodes (creates renderers + passes for all their edges).
+  if(!added.empty() || !removed.empty())
+    m_graph->reconcileAllRenderLists();
+
+  // Create missing passes and update samplers for ALL edges in the graph,
+  // not just the newly-added ones. When a node becomes reachable through a
+  // new edge (e.g. filter→Grid makes filter reachable), pre-existing edges
+  // TO that node (e.g. A→filter) also need passes created. Checking only
+  // the diff misses these.
+  m_graph->createAllMissingPasses();
+  m_graph->updateAllSinkSamplers();
 }
 
 void GfxContext::update_inputs()
@@ -328,13 +490,17 @@ void GfxContext::update_inputs()
 void GfxContext::remove_node(
     std::vector<std::unique_ptr<score::gfx::Node>>& nursery, int32_t index)
 {
-  // Remove all edges involving that node
-  for(auto it = this->edges.begin(); it != this->edges.end();)
+  // Remove all edges involving that node. recompute_edges snapshots
+  // `edges` under edges_lock, so take it here too while mutating.
   {
-    if(it->first.node == index || it->second.node == index)
-      it = this->edges.erase(it);
-    else
-      ++it;
+    std::lock_guard l{edges_lock};
+    for(auto it = this->edges.begin(); it != this->edges.end();)
+    {
+      if(it->first.node == index || it->second.node == index)
+        it = this->edges.erase(it);
+      else
+        ++it;
+    }
   }
 
   if(auto node_it = nodes.find(index); node_it != nodes.end())
@@ -392,7 +558,11 @@ void GfxContext::run_commands()
         case NodeCommand::ADD_NODE: {
           m_graph->addNode(cmd.node.get());
           nodes[cmd.index] = {std::move(cmd.node)};
-          recompute = true;
+          // Only output nodes require a full rebuild (new window/timer).
+          // Non-output nodes just wait for edges — the incremental
+          // reconciliation path creates their renderers when connected.
+          if(dynamic_cast<score::gfx::OutputNode*>(nodes[cmd.index].get()))
+            recompute = true;
           break;
         }
         case NodeCommand::REMOVE_PREVIEW_NODE: {
@@ -400,13 +570,27 @@ void GfxContext::run_commands()
           auto n = dynamic_cast<score::gfx::OutputNode*>(node.get());
           SCORE_ASSERT(n);
           {
-            auto it = ossia::find_if(this->preview_edges, [idx = cmd.index](EdgeSpec e) {
-              return e.second.node == idx;
-            });
-            if(it != this->preview_edges.end())
+            // recompute_edges snapshots preview_edges under edges_lock,
+            // so guard reads/mutations of it here too. remove_edge only
+            // touches m_graph, so keep it outside the lock.
+            EdgeSpec to_remove;
+            bool found = false;
+            {
+              std::lock_guard l{edges_lock};
+              auto it = ossia::find_if(this->preview_edges, [idx = cmd.index](EdgeSpec e) {
+                return e.second.node == idx;
+              });
+              if(it != this->preview_edges.end())
+              {
+                to_remove = *it;
+                found = true;
+              }
+            }
+            if(found)
             {
-              this->remove_edge(*it);
-              this->preview_edges.erase(*it);
+              this->remove_edge(to_remove);
+              std::lock_guard l{edges_lock};
+              this->preview_edges.erase(to_remove);
             }
           }
           m_graph->destroyOutputRenderList(*n);
@@ -414,8 +598,27 @@ void GfxContext::run_commands()
           break;
         }
         case NodeCommand::REMOVE_NODE: {
-          remove_node(nursery, cmd.index);
-          recompute = true;
+          if(auto node_it = nodes.find(cmd.index); node_it != nodes.end())
+          {
+            bool is_output = dynamic_cast<score::gfx::OutputNode*>(node_it->second.get());
+            if(!is_output)
+            {
+              // Incremental removal: clean up edges, renderers, retopo sort.
+              // Must happen BEFORE remove_node deletes the node.
+              m_graph->removeNodeAndEdges(node_it->second.get());
+            }
+            remove_node(nursery, cmd.index);
+            if(is_output)
+            {
+              // Recompute immediately so subsequent commands in this tick
+              // see a consistent graph state. Deferring until the end of
+              // the loop leaves the graph half-broken (node gone from
+              // m_nodes but renderer/output still wired) for any further
+              // commands or render frames that fire in this window.
+              recompute_graph();
+              m_fullRebuildThisFrame = true;
+            }
+          }
           break;
         }
         case NodeCommand::RELINK: {
@@ -430,12 +633,18 @@ void GfxContext::run_commands()
       switch(cmd.cmd)
       {
         case EdgeCommand::CONNECT_PREVIEW_NODE: {
-          this->preview_edges.emplace(cmd.edge);
+          {
+            std::lock_guard l{edges_lock};
+            this->preview_edges.emplace(cmd.edge);
+          }
           add_edge(cmd.edge);
           break;
         }
         case EdgeCommand::DISCONNECT_PREVIEW_NODE: {
-          this->preview_edges.erase(cmd.edge);
+          {
+            std::lock_guard l{edges_lock};
+            this->preview_edges.erase(cmd.edge);
+          }
           remove_edge(cmd.edge);
           break;
         }
@@ -452,6 +661,11 @@ void GfxContext::run_commands()
   if(recompute)
   {
     recompute_graph();
+    // Signal to updateGraph() that a full rebuild happened this frame.
+    // The incremental edge path should NOT run after a full rebuild,
+    // because the graph was just rebuilt with the old edge set and
+    // applying an incremental diff would result in a half-built state.
+    m_fullRebuildThisFrame = true;
   }
 
   // This will force the nodes to be deleted in the main thread a bit later
@@ -470,14 +684,49 @@ void GfxContext::updateGraph()
 
   update_inputs();
 
-  if(edges_changed)
+  // Clear the flag BEFORE copying new_edges so a producer that publishes a
+  // fresh edge set after our copy (and re-sets the flag) cannot have its
+  // signal lost: the worst case is one redundant reprocess next tick, never
+  // a dropped update. Clearing it after the copy (the previous behaviour)
+  // could clobber a set-after-copy and, with prev_edges dedup on the
+  // producer side, that update would never be re-sent.
+  if(edges_changed.exchange(false))
   {
+    ossia::flat_set<EdgeSpec> old_edges;
+    ossia::flat_set<EdgeSpec> cur_edges;
     {
       std::lock_guard l{edges_lock};
-      std::swap(edges, new_edges);
+      old_edges = edges;
+      edges = new_edges;
+      cur_edges = edges;
+    }
+
+    // If a full rebuild happened this frame (nodes added/removed),
+    // use the nuclear path for edges too. The incremental path
+    // doesn't work correctly after a full rebuild because the graph
+    // was rebuilt with the old edge set.
+    if(m_fullRebuildThisFrame)
+    {
+      m_fullRebuildThisFrame = false;
+      recompute_connections();
+      return;
+    }
+    // Incremental edge update: apply the diff between old and new edges.
+    try
+    {
+      incrementalEdgeUpdate(old_edges, cur_edges);
+    }
+    catch(const std::exception& e)
+    {
+      qWarning("Incremental edge update failed (%s), falling back to full rebuild",
+               e.what());
+      recompute_connections();
+    }
+    catch(...)
+    {
+      qWarning("Incremental edge update failed, falling back to full rebuild");
+      recompute_connections();
     }
-    recompute_connections();
-    edges_changed = false;
   }
 }
 
@@ -497,7 +746,8 @@ void GfxContext::on_manual_timer(score::HighResolutionTimer* self)
   if(auto ptr = m_manualTimers.find(self); ptr != m_manualTimers.end())
   {
     for(auto output : ptr->second) {
-      output->render();
+      if(output && output->canRender())
+        output->render();
     }
   }
 }
diff --git a/src/plugins/score-plugin-gfx/Gfx/GfxContext.hpp b/src/plugins/score-plugin-gfx/Gfx/GfxContext.hpp
index 7422bd1212..d2d104b795 100644
--- a/src/plugins/score-plugin-gfx/Gfx/GfxContext.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/GfxContext.hpp
@@ -1,6 +1,7 @@
 #pragma once
 #include <Process/Dataflow/CableData.hpp>
 
+#include <Gfx/AssetTable.hpp>
 #include <Gfx/Graph/Node.hpp>
 
 #include <score/tools/Timers.hpp>
@@ -75,6 +76,11 @@ class SCORE_PLUGIN_GFX_EXPORT GfxContext : public QObject
   void recompute_edges();
   void recompute_graph();
   void recompute_connections();
+  void recomputeTimers();
+  void recomputeGraphTopology();
+  void incrementalEdgeUpdate(
+      const ossia::flat_set<EdgeSpec>& old_edges,
+      const ossia::flat_set<EdgeSpec>& cur_edges);
 
   void update_inputs();
   void updateGraph();
@@ -84,6 +90,18 @@ class SCORE_PLUGIN_GFX_EXPORT GfxContext : public QObject
     tick_messages.enqueue(std::move(msg));
   }
 
+  /**
+   * @brief Session-wide content-hash decode cache.
+   *
+   * Shared across all RenderLists in this GfxContext. Loaders stage
+   * decoded bytes here on their worker thread; downstream consumers
+   * (texture upload, mesh VB/IB assembly) acquire by content hash,
+   * avoiding re-decoding the same source asset across multiple outputs
+   * or reloads. See Gfx/AssetTable.hpp.
+   */
+  AssetTable& assets() noexcept { return m_assets; }
+  const AssetTable& assets() const noexcept { return m_assets; }
+
 private:
   void run_commands();
   void add_preview_output(score::gfx::OutputNode& out);
@@ -132,9 +150,10 @@ class SCORE_PLUGIN_GFX_EXPORT GfxContext : public QObject
 
   std::mutex edges_lock;
   ossia::flat_set<EdgeSpec> new_edges TS_GUARDED_BY(edges_lock);
-  ossia::flat_set<EdgeSpec> edges;
-  ossia::flat_set<EdgeSpec> preview_edges;
+  ossia::flat_set<EdgeSpec> edges TS_GUARDED_BY(edges_lock);
+  ossia::flat_set<EdgeSpec> preview_edges TS_GUARDED_BY(edges_lock);
   std::atomic_bool edges_changed{};
+  bool m_fullRebuildThisFrame{};
 
   score::HighResolutionTimer* m_no_vsync_timer{};
   score::HighResolutionTimer* m_watchdog_timer{};
@@ -143,6 +162,8 @@ class SCORE_PLUGIN_GFX_EXPORT GfxContext : public QObject
 
   ossia::object_pool<std::vector<score::gfx::gfx_input>> m_buffers;
 
+  AssetTable m_assets;
+
   score::Timers m_timers;
 };
 
diff --git a/src/plugins/score-plugin-gfx/Gfx/GfxDevice.cpp b/src/plugins/score-plugin-gfx/Gfx/GfxDevice.cpp
index 8a152e0a8c..744dbdec13 100644
--- a/src/plugins/score-plugin-gfx/Gfx/GfxDevice.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/GfxDevice.cpp
@@ -2,6 +2,7 @@
 
 #include "GfxParameter.hpp"
 
+#include <State/Message.hpp>
 #include <State/MessageListSerialization.hpp>
 
 #include <ossia/network/base/device.hpp>
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/BackgroundNode.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/BackgroundNode.hpp
index 24dd9c4675..51b9f9e787 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/BackgroundNode.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/BackgroundNode.hpp
@@ -5,6 +5,10 @@
 #include <Gfx/Graph/Window.hpp>
 #include <Gfx/InvertYRenderer.hpp>
 #include <Gfx/Settings/Model.hpp>
+#include <Gfx/Window/WindowSettings.hpp>
+
+#include <score/application/GUIApplicationContext.hpp>
+#include <score/tools/Debug.hpp>
 
 namespace score::gfx
 {
@@ -21,7 +25,7 @@ struct BackgroundNode : OutputNode
     m_conf = {.manualRenderingRate = 1000. / settings_rate};
   }
 
-  virtual ~BackgroundNode() { }
+  virtual ~BackgroundNode() { destroyOutput(); }
 
   void startRendering() override { }
   void render() override
@@ -56,6 +60,12 @@ struct BackgroundNode : OutputNode
   void createOutput(score::gfx::OutputConfiguration conf) override
   {
     m_onResize = conf.onResize;
+    // Cache the requested graphics API so setSwapchainFormat can rebuild
+    // through createOutput when the format actually changes (live HDR↔SDR
+    // toggle). Without this the format setter was inert: m_swapchainFormat
+    // was updated but the underlying QRhiTexture stayed in its original
+    // format, silently downgrading HDR to SDR.
+    m_lastGraphicsApi = conf.graphicsApi;
 
     QSize newSz = m_renderSize;
     if(newSz.width() <= 0 || newSz.height() <= 0)
@@ -64,22 +74,38 @@ struct BackgroundNode : OutputNode
       newSz = QSize{1024, 1024};
 
     m_renderState = score::gfx::createRenderState(conf.graphicsApi, newSz, nullptr);
+    if(!m_renderState || !m_renderState->rhi)
+    {
+      qWarning() << "BackgroundNode: failed to create QRhi";
+      m_renderState.reset();
+      return;
+    }
     m_renderState->outputSize = m_renderState->renderSize;
+    m_renderState->renderFormat
+        = (m_swapchainFormat != Gfx::SwapchainFormat::SDR)
+              ? QRhiTexture::RGBA32F
+              : QRhiTexture::RGBA8;
 
     auto rhi = m_renderState->rhi;
     m_texture = rhi->newTexture(
-        QRhiTexture::RGBA8, m_renderState->renderSize, 1,
+        m_renderState->renderFormat, m_renderState->renderSize, 1,
         QRhiTexture::RenderTarget | QRhiTexture::UsedAsTransferSource);
     m_texture->create();
 
-    m_depthBuffer = rhi->newRenderBuffer(
-        QRhiRenderBuffer::DepthStencil, m_renderState->renderSize, 1);
-    m_depthBuffer->create();
+    // Reverse-Z project rule: depth attachment is D32F (float). Fixed-point
+    // D24 combined with reverse-Z gives strictly worse precision than
+    // standard-Z, so we must allocate a float texture here. RenderTarget
+    // flag is required for attaching as a depth target.
+    m_depthTexture = rhi->newTexture(
+        QRhiTexture::D32F, m_renderState->renderSize, 1,
+        QRhiTexture::RenderTarget);
+    m_depthTexture->setName("BackgroundNode::m_depthTexture");
+    m_depthTexture->create();
 
     QRhiTextureRenderTargetDescription desc;
     desc.setColorAttachments({QRhiColorAttachment(m_texture)});
 
-    desc.setDepthStencilBuffer(m_depthBuffer);
+    desc.setDepthTexture(m_depthTexture);
     m_renderTarget = rhi->newTextureRenderTarget(desc);
     m_renderState->renderPassDescriptor
         = m_renderTarget->newCompatibleRenderPassDescriptor();
@@ -93,11 +119,33 @@ struct BackgroundNode : OutputNode
   {
     if(m_renderState)
     {
+      // Drain the GPU before tearing resources down. Same rationale as
+      // ScreenNode::destroyOutput: when setSwapchainFormat invokes
+      // destroyOutput synchronously (C-16 / commit e2afe7874), an
+      // unfinished cbWrapper from a prior offscreen frame can still be
+      // referenced by ScenePreprocessor's per-frame copyBuffer
+      // (C-01 / commit fe146c8de). Recording into that CB after we've
+      // freed the rhi triggers VUID-vkCmdCopyBuffer-commandBuffer-
+      // recording and a device loss. Mirrors MultiWindowNode.cpp:1068.
+      if(m_renderState->rhi)
+      {
+        // Pre-condition: destroyOutput must not be called inside a
+        // frame. Mirrors ScreenNode::destroyOutput.
+        SCORE_ASSERT(!m_renderState->rhi->isRecordingFrame());
+        m_renderState->rhi->finish();
+      }
+
+      // Persist-across-rebuild contract: the registry survives RL
+      // teardown, so we must release its QRhi resources here BEFORE
+      // RenderState::destroy() tears down the QRhi. destroyOwned()
+      // `delete`s the wrappers directly while the device is alive.
+      releaseRegistry();
+
       delete m_renderTarget;
       m_renderTarget = nullptr;
 
-      delete m_depthBuffer;
-      m_depthBuffer = nullptr;
+      delete m_depthTexture;
+      m_depthTexture = nullptr;
 
       delete m_texture;
       m_texture = nullptr;
@@ -109,7 +157,39 @@ struct BackgroundNode : OutputNode
       m_renderState.reset();
     }
   }
-  void updateGraphicsAPI(GraphicsApi) override { }
+  void updateGraphicsAPI(GraphicsApi api) override
+  {
+    if(!m_renderState)
+      return;
+    if(m_renderState->api != api)
+      destroyOutput();
+  }
+
+  void setSwapchainFormat(Gfx::SwapchainFormat format)
+  {
+    if(m_swapchainFormat == format)
+      return;
+    m_swapchainFormat = format;
+
+    // Live format change while rendering: the existing m_texture was
+    // allocated at createOutput-time with the prior format. setFormat alone
+    // wouldn't re-allocate the GPU memory backing — only setPixelSize +
+    // recreate-via-resize does. Re-route through destroyOutput +
+    // createOutput so the renderTarget / RPD / depth tex / colour tex all
+    // come back in matching format. Skipped before any output exists
+    // (m_renderState null) — createOutput will pick up the new format
+    // naturally via m_swapchainFormat.
+    if(m_renderState)
+    {
+      score::gfx::OutputConfiguration conf;
+      conf.graphicsApi = m_lastGraphicsApi;
+      conf.onResize = m_onResize;
+      destroyOutput();
+      createOutput(std::move(conf));
+      if(m_onResize)
+        m_onResize();
+    }
+  }
 
   void setSize(QSize newSz)
   {
@@ -143,24 +223,38 @@ struct BackgroundNode : OutputNode
 
       auto rhi = m_renderState->rhi;
 
+      // Drain the GPU before destroying m_renderTarget / m_texture /
+      // m_depthTexture. Same anti-pattern that destroyOutput already
+      // avoids via FIX-A: the current frame's offscreen CB (or a
+      // queued one) may still reference these resources, and Qt's
+      // setPixelSize+create dance below does not internally drain.
+      // Without this, validation fires on the next vkCmd*-recording
+      // (-recording / -commandBuffer-recording / -in-use) and may
+      // device-loss.
+      rhi->finish();
+
       m_renderTarget->destroy();
       m_texture->destroy();
       m_texture->setPixelSize(newSz);
       m_texture->create();
 
-      if(m_depthBuffer)
-        m_depthBuffer->destroy();
+      if(m_depthTexture)
+        m_depthTexture->destroy();
       else
-        m_depthBuffer = rhi->newRenderBuffer(QRhiRenderBuffer::DepthStencil, newSz);
-      m_depthBuffer->setPixelSize(newSz);
-      m_depthBuffer->create();
-
-      delete m_renderTarget;
-      delete m_renderState->renderPassDescriptor;
+        m_depthTexture = rhi->newTexture(
+            QRhiTexture::D32F, newSz, 1, QRhiTexture::RenderTarget);
+      m_depthTexture->setPixelSize(newSz);
+      m_depthTexture->create();
+
+      m_renderTarget->deleteLater();
+      if(auto* rpd = m_renderState->renderPassDescriptor)
+        rpd->deleteLater();
+      m_renderState->renderPassDescriptor = nullptr;
+      m_renderTarget = nullptr;
 
       QRhiTextureRenderTargetDescription desc;
       desc.setColorAttachments({QRhiColorAttachment(m_texture)});
-      desc.setDepthStencilBuffer(m_depthBuffer);
+      desc.setDepthTexture(m_depthTexture);
       m_renderTarget = rhi->newTextureRenderTarget(desc);
       m_renderState->renderPassDescriptor
           = m_renderTarget->newCompatibleRenderPassDescriptor();
@@ -179,7 +273,8 @@ struct BackgroundNode : OutputNode
     score::gfx::TextureRenderTarget rt{
         .texture = m_texture,
         .renderPass = m_renderState->renderPassDescriptor,
-        .renderTarget = m_renderTarget};
+        .renderTarget = m_renderTarget,
+        .depthTexture = m_depthTexture};
     return new Gfx::InvertYRenderer{
         *this, rt, const_cast<QRhiReadbackResult&>(*shared_readback)};
   }
@@ -193,12 +288,17 @@ struct BackgroundNode : OutputNode
 
   std::weak_ptr<score::gfx::RenderList> m_renderer{};
   QRhiTexture* m_texture{};
-  QRhiRenderBuffer* m_depthBuffer{};
+  QRhiTexture* m_depthTexture{};
   QRhiTextureRenderTarget* m_renderTarget{};
   std::shared_ptr<score::gfx::RenderState> m_renderState{};
 
   std::function<void()> m_onResize;
   QSize m_size{1024, 1024};
   QSize m_renderSize{};
+  Gfx::SwapchainFormat m_swapchainFormat{};
+  // Cached graphics API from the last createOutput so setSwapchainFormat
+  // can route a live format change through destroyOutput + createOutput
+  // without having to re-derive it from the host.
+  GraphicsApi m_lastGraphicsApi{};
 };
 }
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/CameraMath.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/CameraMath.cpp
new file mode 100644
index 0000000000..fe9fc89c5e
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/CameraMath.cpp
@@ -0,0 +1,48 @@
+#include <Gfx/Graph/CameraMath.hpp>
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+namespace score::gfx
+{
+
+void packCameraUBO(
+    CameraUBOData& out, const ossia::camera_component& cam,
+    const QMatrix4x4& worldTransform, QSize renderSize, float timeSeconds,
+    float aspectOverride)
+{
+  const QVector3D eye = worldTransform.column(3).toVector3D();
+
+  QMatrix4x4 view = worldTransform.inverted();
+
+  const float fovYDeg = cam.yfov * (180.f / float(M_PI));
+  float aspect = aspectOverride;
+  if(aspect <= 0.f)
+  {
+    aspect = (renderSize.height() > 0)
+        ? (float(renderSize.width()) / float(renderSize.height()))
+        : (cam.aspect_ratio > 0.f ? cam.aspect_ratio : 1.f);
+  }
+
+  QMatrix4x4 proj;
+  setReverseZPerspective(proj, fovYDeg, aspect, cam.znear, cam.zfar);
+
+  QMatrix4x4 vp = proj * view;
+
+  writeMat4(out.view, view);
+  writeMat4(out.projection, proj);
+  writeMat4(out.viewProjection, vp);
+  out.cameraPosition[0] = eye.x();
+  out.cameraPosition[1] = eye.y();
+  out.cameraPosition[2] = eye.z();
+  out.cameraPosition[3] = 0.f;
+  out.renderSize[0] = float(renderSize.width());
+  out.renderSize[1] = float(renderSize.height());
+  out.renderSize[2] = 0.f;
+  out.renderSize[3] = 0.f;
+  out.params[0] = timeSeconds;
+  out.params[1] = cam.znear;
+  out.params[2] = cam.zfar;
+  out.params[3] = 0.f;
+}
+
+}
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/CameraMath.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/CameraMath.hpp
new file mode 100644
index 0000000000..5196c94107
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/CameraMath.hpp
@@ -0,0 +1,82 @@
+#pragma once
+#include <QMatrix4x4>
+#include <QSize>
+#include <QVector3D>
+
+#include <cmath>
+#include <cstdint>
+#include <cstring>
+
+namespace ossia
+{
+struct camera_component;
+}
+
+namespace score::gfx
+{
+
+// std140 layout; must byte-for-byte match every shader's `uniform camera_t`.
+// Packed into ScenePreprocessor's per-camera Camera UBO aux buffer (attached
+// to Geometry Out and auto-bound in consuming shaders by name).
+struct CameraUBOData
+{
+  float view[16]{};
+  float projection[16]{};
+  float viewProjection[16]{};
+  float cameraPosition[4]{};
+  float renderSize[4]{};
+  float params[4]{};
+};
+static_assert(sizeof(CameraUBOData) == 240, "CameraUBO layout must match shader");
+
+inline void writeMat4(float dst[16], const QMatrix4x4& src)
+{
+  std::memcpy(dst, src.constData(), 16 * sizeof(float));
+}
+
+// Reverse-Z perspective projection in OpenGL NDC convention.
+//
+// Standard OpenGL perspective: view_z ∈ [-far, -near] → NDC z ∈ [-1, +1].
+// Reverse-Z (this function):    view_z ∈ [-far, -near] → NDC z ∈ [-1, +1]
+//   but INVERTED: near → +1, far → -1.
+//
+// QRhi's clipSpaceCorrMatrix on Vulkan/Metal/D3D remaps the output NDC z ∈
+// [-1, +1] down to the backend-native [0, 1] without further flipping:
+// near → 1.0, far → 0.0 in the depth buffer.
+//
+// This is paired project-wide with a float (D32F) depth attachment, a
+// GREATER depth compare and a clear-depth of 0.0. Mixing conventions on a
+// single depth buffer produces garbage.
+inline void setReverseZPerspective(
+    QMatrix4x4& out, float fovYDeg, float aspect, float nearPlane,
+    float farPlane)
+{
+  out.setToIdentity();
+  if(nearPlane == farPlane || aspect == 0.f)
+    return;
+
+  const float radians = (fovYDeg * 0.5f) * float(M_PI / 180.0);
+  const float sine = std::sin(radians);
+  if(sine == 0.f)
+    return;
+  const float cotan = std::cos(radians) / sine;
+  const float clip = farPlane - nearPlane;
+
+  out(0, 0) = cotan / aspect;
+  out(1, 1) = cotan;
+  out(2, 2) = (farPlane + nearPlane) / clip;
+  out(2, 3) = (2.f * farPlane * nearPlane) / clip;
+  out(3, 2) = -1.f;
+  out(3, 3) = 0.f;
+}
+
+// Pack a camera_component's view/projection/position into a CameraUBOData.
+// `worldTransform` is the camera node's accumulated world matrix (its
+// column 3 is the eye position and its inverse is the view matrix).
+// `aspectOverride` of <= 0 falls back to `renderSize.width / renderSize.height`.
+void packCameraUBO(
+    CameraUBOData& out, const ossia::camera_component& cam,
+    const QMatrix4x4& worldTransform, QSize renderSize, float timeSeconds,
+    float aspectOverride = -1.f);
+
+}
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/CommonUBOs.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/CommonUBOs.hpp
index 45b121e877..420e464f7d 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/CommonUBOs.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/CommonUBOs.hpp
@@ -20,6 +20,15 @@ struct ProcessUBO
 
   float renderSize[2]{2048, 2048};
   float date[4]{0.f, 0.f, 0.f, 0.f};
+
+  // Mirrors gl_NumWorkGroups for CSF compute shaders. Populated by
+  // RenderedCSFNode just before dispatch so the libisf-injected
+  // `#define gl_NumWorkGroups isf_process_uniforms.NUMWORKGROUPS_`
+  // resolves to real dispatch counts on every backend (especially D3D
+  // where SPIRV-Cross refuses to emit the built-in directly).
+  // std140 packs uvec3 into a vec4 slot — the trailing word is padding.
+  uint32_t numWorkgroups[3]{};
+  uint32_t _numWorkgroups_pad{};
 };
 
 /**
@@ -40,12 +49,18 @@ struct ModelCameraUBO
   float projection[16]{};
   float modelNormal[9]{};
   float padding[3]; // Needed as a mat3 needs a bit more space...
-  float fov = 90.;
+  float fov = 90.f;
+  // NB: must NOT be named `near`/`far` — those are legacy macros defined by
+  // <windows.h>; naming members after them forces an #undef that then breaks
+  // any Windows system header (mmeapi.h, combaseapi.h) included afterwards.
+  float znear = 0.001f;  //!< Used by non-matrix projections (fulldome, …) for reverse-Z depth.
+  float zfar = 10000.f;  //!< idem.
   // clang-format on
 };
 
 static_assert(
-    sizeof(ModelCameraUBO) == sizeof(float) * (16 + 16 + 16 + 16 + 16 + 9 + 3 + 1));
+    sizeof(ModelCameraUBO)
+    == sizeof(float) * (16 + 16 + 16 + 16 + 16 + 9 + 3 + 1 + 1 + 1));
 
 /**
  * @brief UBO shared across all entities shown on the same output.
@@ -55,6 +70,13 @@ struct OutputUBO
   float clipSpaceCorrMatrix[16]{};
 
   float renderSize[2]{};
+
+  // MSAA sample count of the bound output target. Mirrors
+  // RenderList::samples(); shaders need it because gl_NumSamples is
+  // stripped by glslang under SPIR-V. The trailing pad keeps the UBO
+  // aligned to a vec4 boundary (std140-friendly).
+  int32_t sampleCount{1};
+  int32_t _pad0{0};
 };
 
 /**
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/CustomMesh.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/CustomMesh.cpp
index cfb926a829..53439f861c 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/CustomMesh.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/CustomMesh.cpp
@@ -2,10 +2,25 @@
 #include <score/tools/Debug.hpp>
 #include <Gfx/Graph/Utils.hpp>
 
+#include <QDebug>
+
+#include <cstdlib>
+
 // TODO: extend MeshBufs to hold multiple buffers
 // TODO: check that rendering e.g. sponza still works
 namespace score::gfx{
 
+// [BUFTRACE] implementation — see CustomMesh.hpp. Turn off at runtime
+// by setting SCORE_BUFTRACE=0.
+bool buftrace_enabled()
+{
+  static const bool on = [] {
+    const char* v = std::getenv("SCORE_BUFTRACE");
+    return !v || v[0] != '0';
+  }();
+  return on;
+}
+
 CustomMesh::CustomMesh(const ossia::mesh_list &g, const ossia::geometry_filter_list_ptr &f)
 {
   reload(g, f);
@@ -19,7 +34,9 @@ QRhiBuffer *CustomMesh::init_vbo(const ossia::geometry::cpu_buffer &buf, QRhi &r
       QRhiBuffer::Static, QRhiBuffer::StorageBuffer | QRhiBuffer::VertexBuffer,
       vtx_buf_size);
   mesh_buf->setName(
-      QString("Mesh::vtx_buf.%1").arg(idx.load(std::memory_order_relaxed)).toLatin1());
+      QString("Mesh::vtx_buf.%1")
+          .arg(idx.fetch_add(1, std::memory_order_relaxed))
+          .toLatin1());
   mesh_buf->create();
 
   return mesh_buf;
@@ -32,11 +49,15 @@ QRhiBuffer *CustomMesh::init_vbo(const ossia::geometry::gpu_buffer &buf, QRhi &r
 
 QRhiBuffer *CustomMesh::init_index(const ossia::geometry::cpu_buffer &buf, QRhi &rhi) const noexcept
 {
+  static std::atomic_int idx = 0;
   QRhiBuffer* idx_buf{};
   if(const auto idx_buf_size = buf.byte_size; idx_buf_size > 0)
   {
     idx_buf = rhi.newBuffer(QRhiBuffer::Static, QRhiBuffer::IndexBuffer, idx_buf_size);
-    idx_buf->setName("Mesh::idx_buf");
+    idx_buf->setName(
+        QString("Mesh::idx_buf.%1")
+            .arg(idx.fetch_add(1, std::memory_order_relaxed))
+            .toLatin1());
     idx_buf->create();
   }
 
@@ -54,132 +75,232 @@ MeshBuffers CustomMesh::init(QRhi &rhi) const noexcept
   {
     return {};
   }
-  if(geom.meshes[0].buffers.empty())
-  {
-    return {};
-  }
 
   MeshBuffers ret;
-  // FIXME multi-mesh
-  auto& mesh = geom.meshes[0];
-
-  // 1. Null check
-  bool any_is_null = false;
-  for(const auto& buf : mesh.buffers)
-  {
-    any_is_null |= ossia::visit([&]<typename Buffer>(Buffer& buf) {
-      if constexpr(std::is_same_v<Buffer, ossia::geometry::cpu_buffer>)
-      {
-        return buf.byte_size == 0 || buf.data == nullptr;
-      }
-      else if constexpr(std::is_same_v<Buffer, ossia::geometry::gpu_buffer>)
-      {
-        return buf.handle == nullptr;
-      }
-      return false;
-    }, buf.data);
-  }
 
-  if(any_is_null)
+  // Multi-mesh: concatenate every mesh's buffers into ret.buffers in order.
+  // Each sub-mesh's local `input[].buffer` / `index.buffer` indices are
+  // remapped at draw time by adding the sub-mesh's starting offset in
+  // ret.buffers. The first sub-mesh's layout drives the pipeline
+  // (vertex bindings / attributes) in reload() — sub-meshes with a
+  // different layout are not supported today and will draw incorrectly.
+  for(std::size_t mi = 0; mi < geom.meshes.size(); ++mi)
   {
-    return {};
-  }
-
-  int i = 0;
-  int index_i = mesh.index.buffer;
+    const auto& mesh = geom.meshes[mi];
+    if(mesh.buffers.empty())
+      continue;
 
-  for(const auto& buf : mesh.buffers)
-  {
-    if(i != index_i)
+    // Null check — skip a sub-mesh whose data isn't ready yet.
+    bool any_is_null = false;
+    for(const auto& buf : mesh.buffers)
     {
-      auto rhi_buf
-          = ossia::visit([&](auto& buf) { return init_vbo(buf, rhi); }, buf.data);
-      ret.buffers.emplace_back(rhi_buf, 0, 0);
+      any_is_null |= ossia::visit([&]<typename Buffer>(Buffer& buf) {
+        if constexpr(std::is_same_v<Buffer, ossia::geometry::cpu_buffer>)
+          return buf.byte_size == 0 || buf.data == nullptr;
+        else if constexpr(std::is_same_v<Buffer, ossia::geometry::gpu_buffer>)
+          return buf.handle == nullptr;
+        return false;
+      }, buf.data);
     }
-    else
+    if(any_is_null)
+    {
+      // Emit null placeholders so indexing stays aligned with geom.meshes.
+      for(std::size_t k = 0; k < mesh.buffers.size(); ++k)
+        ret.buffers.emplace_back(nullptr, 0, 0);
+      continue;
+    }
+
+    int i = 0;
+    const int index_i = mesh.index.buffer;
+    for(const auto& buf : mesh.buffers)
     {
-      auto rhi_buf
-          = ossia::visit([&](auto& buf) { return init_index(buf, rhi); }, buf.data);
-      ret.buffers.emplace_back(rhi_buf, 0, 0);
+      QRhiBuffer* rhi_buf = (i != index_i)
+          ? ossia::visit([&](auto& b) { return init_vbo(b, rhi); }, buf.data)
+          : ossia::visit([&](auto& b) { return init_index(b, rhi); }, buf.data);
+      // Ownership follows the source variant: cpu_buffer paths allocate
+      // fresh QRhiBuffers (owned), gpu_buffer paths borrow an upstream
+      // handle (unowned — the original producer still owns it).
+      const bool owned = ossia::visit(
+          []<typename Buffer>(const Buffer&) {
+            return std::is_same_v<Buffer, ossia::geometry::cpu_buffer>;
+          }, buf.data);
+      BufferView bv{};
+      bv.handle = rhi_buf;
+      bv.owned = owned;
+      ret.buffers.emplace_back(bv);
+      i++;
     }
-    i++;
   }
 
-#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0)
-  // Populate indirect draw buffer from geometry's indirect_count
-  if(mesh.indirect_count.handle)
+  if(ret.buffers.empty())
+    return {};
+
+  // Indirect draw / cpu_draw_commands: only meaningful when a single output
+  // mesh carries them (ScenePreprocessor's MDI mode). Pick them up from mesh[0].
+  const auto& first_mesh = geom.meshes[0];
+  if(first_mesh.indirect_count.handle)
   {
-    ret.indirectDrawBuffer = static_cast<QRhiBuffer*>(mesh.indirect_count.handle);
+    ret.indirectDrawBuffer = static_cast<QRhiBuffer*>(first_mesh.indirect_count.handle);
     ret.useIndirectDraw = true;
-    ret.indirectDrawIndexed = (mesh.index.buffer >= 0);
+    ret.indirectDrawIndexed = (first_mesh.index.buffer >= 0);
+    ret.indirectDrawCount
+        = first_mesh.indirect_count.byte_size / (5 * sizeof(uint32_t));
+    ret.indirectDrawStride = 5 * sizeof(uint32_t);
+    if(ret.indirectDrawCount == 0)
+      ret.indirectDrawCount = 1;
   }
-#endif
+  if(!first_mesh.cpu_draw_commands.empty())
+    ret.cpuDrawCommands.assign(
+        first_mesh.cpu_draw_commands.begin(), first_mesh.cpu_draw_commands.end());
 
   return ret;
 }
 
 void CustomMesh::update_vbo(
     int buffer_index, const ossia::geometry::cpu_buffer& vtx_buf, MeshBuffers& meshbuf,
-    QRhiResourceUpdateBatch& rb) const noexcept
+    QRhi& rhi, QRhiResourceUpdateBatch& rb) const noexcept
 {
   if(meshbuf.buffers.size() <= buffer_index)
     return;
 
-  auto buffer = meshbuf.buffers[buffer_index].handle; // FIXME use offset here?
-  if(auto sz = vtx_buf.byte_size; sz != buffer->size())
+  auto& slot = meshbuf.buffers[buffer_index];
+  // Diag 009 — guard the cpu→over-unowned-slot UAF: the slot was last
+  // populated by an upstream gpu_buffer producer (owned=false). Calling
+  // setSize/create on the upstream's QRhiBuffer destroys the underlying
+  // VkBuffer through QRhi's deferred-release queue and bumps the
+  // generation, silently clobbering every downstream consumer of that
+  // upstream handle. Allocate a fresh owned buffer instead — leave the
+  // upstream wrapper untouched.
+  if(!slot.handle || !slot.owned)
   {
-    buffer->destroy();
-    buffer->setSize(sz);
-    buffer->create();
+    static std::atomic_int idx = 0;
+    auto* fresh = rhi.newBuffer(
+        QRhiBuffer::Static, QRhiBuffer::StorageBuffer | QRhiBuffer::VertexBuffer,
+        vtx_buf.byte_size);
+    fresh->setName(
+        QString("Mesh::vtx_buf.%1")
+            .arg(idx.fetch_add(1, std::memory_order_relaxed))
+            .toLatin1());
+    if(!fresh->create())
+    {
+      qWarning() << "CustomMesh::update_vbo: fresh buffer->create() FAILED";
+      delete fresh;
+      return;
+    }
+    BUFTRACE() << "update_vbo(cpu) mesh=" << (void*)this
+               << " slot=" << buffer_index
+               << " allocating fresh owned buffer (was "
+               << (slot.handle ? "unowned upstream" : "empty") << ")"
+               << " new=" << (void*)fresh
+               << " size=" << (qint64)vtx_buf.byte_size;
+    slot.handle = fresh;
+    slot.owned = true;
+  }
+  else if(auto sz = vtx_buf.byte_size; sz != slot.handle->size())
+  {
+    qDebug() << "CustomMesh::update_vbo: resizing buffer from"
+             << slot.handle->size() << "to" << sz
+             << "buffer=" << (void*)slot.handle;
+    slot.handle->setSize(sz);
+    if(!slot.handle->create())
+      qWarning() << "CustomMesh::update_vbo: buffer->create() FAILED after resize!";
   }
   // FIXME support offset
   uploadStaticBufferWithStoredData(
-      &rb, buffer, 0, buffer->size(), (const char*)vtx_buf.raw_data.get());
+      &rb, slot.handle, 0, slot.handle->size(), (const char*)vtx_buf.raw_data.get());
 }
 
 void CustomMesh::update_vbo(
     int buffer_index, const ossia::geometry::gpu_buffer& vtx_buf, MeshBuffers& meshbuf,
-    QRhiResourceUpdateBatch& rb) const noexcept
+    QRhi& rhi, QRhiResourceUpdateBatch& rb) const noexcept
 {
   if(meshbuf.buffers.size() <= buffer_index)
     return;
 
   // FIXME offset, size ?
-  // FIXME check if memory of previous buffer gets freed?
-  meshbuf.buffers[buffer_index] = {static_cast<QRhiBuffer*>(vtx_buf.handle), 0, 0};
+  auto& slot = meshbuf.buffers[buffer_index];
+  auto* old_buf = slot.handle;
+  auto* new_buf = static_cast<QRhiBuffer*>(vtx_buf.handle);
+  if(old_buf != new_buf)
+  {
+    // Diag 009 — when the slot previously held an owned cpu-fed buffer,
+    // route it through deleteLater so QRhi's release queue tears it
+    // down (and any SRBs auto-rebind via m_id generation tracking on
+    // their next setShaderResources). Without this we leak both the
+    // QRhiBuffer wrapper and its underlying VkBuffer.
+    if(slot.owned && old_buf)
+    {
+      BUFTRACE() << "update_vbo(gpu) mesh=" << (void*)this
+                 << " slot=" << buffer_index
+                 << " deleteLater old owned=" << (void*)old_buf
+                 << " new=" << (void*)new_buf
+                 << " size=" << (qint64)vtx_buf.byte_size;
+      old_buf->deleteLater();
+    }
+    else
+    {
+      BUFTRACE() << "update_vbo(gpu) mesh=" << (void*)this
+                 << " slot=" << buffer_index
+                 << " old(unowned)=" << (void*)old_buf
+                 << " new=" << (void*)new_buf
+                 << " size=" << (qint64)vtx_buf.byte_size;
+    }
+  }
+  // Replacement entry must carry owned=false: the handle belongs to the
+  // upstream gpu_buffer producer. Default-constructed BufferView has
+  // owned=true → RenderList::release would `delete` a borrowed handle.
+  BufferView bv{};
+  bv.handle = new_buf;
+  bv.owned = false;
+  slot = bv;
 }
 
 void CustomMesh::update_index(
     int buffer_index, const ossia::geometry::cpu_buffer& idx_buf, MeshBuffers& meshbuf,
-    QRhiResourceUpdateBatch& rb) const noexcept
+    QRhi& rhi, QRhiResourceUpdateBatch& rb) const noexcept
 {
   if(meshbuf.buffers.size() <= buffer_index)
     return;
 
+  auto& slot = meshbuf.buffers[buffer_index];
   void* idx_buf_data = nullptr;
-  auto buffer = meshbuf.buffers[buffer_index].handle; // FIXME use offset here?
-  if(buffer)
+  if(geom.meshes[0].buffers.size() > 1)
   {
-    if(geom.meshes[0].buffers.size() > 1)
+    if(const auto idx_buf_size = idx_buf.byte_size; idx_buf_size > 0)
     {
-      if(const auto idx_buf_size = idx_buf.byte_size; idx_buf_size > 0)
+      idx_buf_data = idx_buf.raw_data.get();
+      // Diag 009 — same UAF guard as update_vbo(cpu): if the slot is
+      // empty or holds an upstream-owned (unowned) handle, do NOT
+      // setSize/create on it; allocate a fresh owned index buffer.
+      if(!slot.handle || !slot.owned)
       {
-        idx_buf_data = idx_buf.raw_data.get();
-        // FIXME what if index disappears
-        if(auto sz = idx_buf.byte_size; sz != buffer->size())
-        {
-          buffer->destroy();
-          buffer->setSize(sz);
-          buffer->create();
-        }
-        else
+        static std::atomic_int idx = 0;
+        auto* fresh = rhi.newBuffer(
+            QRhiBuffer::Static, QRhiBuffer::IndexBuffer, idx_buf_size);
+        fresh->setName(
+            QString("Mesh::idx_buf.%1")
+                .arg(idx.fetch_add(1, std::memory_order_relaxed))
+                .toLatin1());
+        if(!fresh->create())
         {
+          qWarning() << "CustomMesh::update_index: fresh buffer->create() FAILED";
+          delete fresh;
+          return;
         }
+        BUFTRACE() << "update_index(cpu) mesh=" << (void*)this
+                   << " slot=" << buffer_index
+                   << " allocating fresh owned index buffer (was "
+                   << (slot.handle ? "unowned upstream" : "empty") << ")"
+                   << " new=" << (void*)fresh
+                   << " size=" << (qint64)idx_buf_size;
+        slot.handle = fresh;
+        slot.owned = true;
+      }
+      else if(auto sz = idx_buf.byte_size; sz != slot.handle->size())
+      {
+        slot.handle->setSize(sz);
+        slot.handle->create();
       }
-    }
-    else
-    {
-      // FIXME what if index appears
     }
   }
   else
@@ -187,19 +308,49 @@ void CustomMesh::update_index(
     // FIXME what if index appears
   }
 
-  if(buffer && idx_buf_data)
+  if(slot.handle && idx_buf_data)
   {
     // FIXME support offset
     uploadStaticBufferWithStoredData(
-        &rb, buffer, 0, buffer->size(), (const char*)idx_buf_data);
+        &rb, slot.handle, 0, slot.handle->size(), (const char*)idx_buf_data);
   }
 }
 
 void CustomMesh::update_index(
     int buffer_index, const ossia::geometry::gpu_buffer& idx_buf, MeshBuffers& meshbuf,
-    QRhiResourceUpdateBatch& rb) const noexcept
+    QRhi& rhi, QRhiResourceUpdateBatch& rb) const noexcept
 {
   SCORE_ASSERT(meshbuf.buffers.size() > buffer_index);
+  auto& slot = meshbuf.buffers[buffer_index];
+  auto* old_buf = slot.handle;
+  auto* new_buf = static_cast<QRhiBuffer*>(idx_buf.handle);
+  if(old_buf != new_buf)
+  {
+    // Diag 009 — leak-fix: route a previously-owned handle through
+    // QRhi's release queue so we don't drop the wrapper on the floor
+    // when transitioning cpu→gpu on this slot.
+    if(slot.owned && old_buf)
+    {
+      BUFTRACE() << "update_index(gpu) mesh=" << (void*)this
+                 << " slot=" << buffer_index
+                 << " deleteLater old owned=" << (void*)old_buf
+                 << " new=" << (void*)new_buf
+                 << " size=" << (qint64)idx_buf.byte_size;
+      old_buf->deleteLater();
+    }
+    else
+    {
+      BUFTRACE() << "update_index(gpu) mesh=" << (void*)this
+                 << " slot=" << buffer_index
+                 << " old(unowned)=" << (void*)old_buf
+                 << " new=" << (void*)new_buf
+                 << " size=" << (qint64)idx_buf.byte_size;
+    }
+    BufferView bv{};
+    bv.handle = new_buf;
+    bv.owned = false;
+    slot = bv;
+  }
 }
 
 void CustomMesh::update(
@@ -208,47 +359,87 @@ void CustomMesh::update(
   if(geom.meshes.empty())
     return;
 
-  // FIXME multi-mesh
-  auto& input_mesh = geom.meshes[0];
-  if(input_mesh.buffers.empty())
+  // Grow output_meshbuf.buffers when the geometry has added more
+  // buffers than mb has slots for (e.g. a model swap from Box.gltf →
+  // Duck.gltf where Duck has more vertex buffers, or
+  // ScenePreprocessor appending instance + scene-aux entries beyond
+  // the existing slot count). Without this, update_vbo's
+  // `if(meshbuf.buffers.size() <= buffer_index) return;` silently
+  // drops writes for new high-index buffers, stale handles persist,
+  // and the next setVertexInput binds them as vertex inputs —
+  // validation flags `pBuffers[N] is INDEX_BUFFER / STORAGE_BUFFER,
+  // requires VERTEX_BUFFER`.
+  //
+  // We *grow* rather than re-init: re-initialising forces init()
+  // through its any-buffer-null bail-out (which emits null placeholders
+  // for the WHOLE sub-mesh whenever any single buffer is null), which
+  // breaks scenes where a conditional aux buffer transiently goes
+  // null. Growing preserves the live handles already bound to
+  // populated slots; new slots get null placeholders and the
+  // update_vbo / update_index loop below fills them in.
+  //
+  // Shrinking is intentionally not done: extra trailing slots beyond
+  // what g.input / g.index reference are harmless (the draw path
+  // never indexes into them), and shrinking would require explicit
+  // release of the truncated owned buffers.
+  std::size_t total_geom_buffers = 0;
+  for(const auto& m : geom.meshes)
+    total_geom_buffers += m.buffers.size();
+  if(output_meshbuf.buffers.size() < total_geom_buffers)
   {
-    return;
+    BUFTRACE() << "CustomMesh::update: growing MeshBuffers from "
+               << (qsizetype)output_meshbuf.buffers.size()
+               << " to " << (qsizetype)total_geom_buffers
+               << " slots (preserving existing handles)";
+    output_meshbuf.buffers.resize(
+        total_geom_buffers, BufferView{nullptr, 0, 0});
   }
+
   if(output_meshbuf.buffers.empty())
-  {
     output_meshbuf = init(rhi);
-  }
   if(output_meshbuf.buffers.empty())
-  {
     return;
-  }
 
-  int i = 0;
-  int index_i = input_mesh.index.buffer;
-
-  for(const auto& buf : input_mesh.buffers)
+  // Upload each sub-mesh's buffers, remapping local indices to the flat
+  // offset in output_meshbuf.buffers built by init().
+  std::size_t base = 0;
+  for(const auto& input_mesh : geom.meshes)
   {
-    if(i != index_i)
-    {
-      ossia::visit(
-          [&](auto& buf) { return update_vbo(i, buf, output_meshbuf, rb); }, buf.data);
-    }
-    else
+    if(input_mesh.buffers.empty())
+      continue;
+    if(base + input_mesh.buffers.size() > output_meshbuf.buffers.size())
+      break;
+
+    int i = 0;
+    const int index_i = input_mesh.index.buffer;
+    for(const auto& buf : input_mesh.buffers)
     {
-      ossia::visit(
-          [&](auto& buf) { return update_index(i, buf, output_meshbuf, rb); }, buf.data);
+      const int flat = int(base) + i;
+      if(i != index_i)
+      {
+        ossia::visit(
+            [&](auto& buf) { return update_vbo(flat, buf, output_meshbuf, rhi, rb); },
+            buf.data);
+      }
+      else
+      {
+        ossia::visit(
+            [&](auto& buf) { return update_index(flat, buf, output_meshbuf, rhi, rb); },
+            buf.data);
+      }
+      i++;
     }
-    i++;
+    base += input_mesh.buffers.size();
   }
 
-#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0)
-  // Update indirect draw buffer reference
-  if(input_mesh.indirect_count.handle)
+  // Indirect draw / cpu_draw_commands: same single-mesh scoping as init().
+  const auto& first_mesh = geom.meshes[0];
+  if(first_mesh.indirect_count.handle)
   {
     output_meshbuf.indirectDrawBuffer
-        = static_cast<QRhiBuffer*>(input_mesh.indirect_count.handle);
+        = static_cast<QRhiBuffer*>(first_mesh.indirect_count.handle);
     output_meshbuf.useIndirectDraw = true;
-    output_meshbuf.indirectDrawIndexed = (input_mesh.index.buffer >= 0);
+    output_meshbuf.indirectDrawIndexed = (first_mesh.index.buffer >= 0);
   }
   else
   {
@@ -256,7 +447,16 @@ void CustomMesh::update(
     output_meshbuf.useIndirectDraw = false;
     output_meshbuf.indirectDrawIndexed = false;
   }
-#endif
+
+  if(!first_mesh.cpu_draw_commands.empty())
+  {
+    output_meshbuf.cpuDrawCommands.assign(
+        first_mesh.cpu_draw_commands.begin(), first_mesh.cpu_draw_commands.end());
+  }
+
+  // Note: GPU readback for the indirect draw fallback is handled
+  // synchronously in RenderedRawRasterPipelineNode::runInitialPasses,
+  // which has access to both the command buffer and QRhi::finish().
 }
 
 Mesh::Flags CustomMesh::flags() const noexcept
@@ -306,6 +506,8 @@ void CustomMesh::preparePipeline(QRhiGraphicsPipeline &pip) const noexcept
   {
     pip.setDepthTest(true);
     pip.setDepthWrite(true);
+    // Reverse-Z project rule.
+    pip.setDepthOp(QRhiGraphicsPipeline::Greater);
   }
 
   pip.setTopology(this->topology);
@@ -321,6 +523,11 @@ void CustomMesh::preparePipeline(QRhiGraphicsPipeline &pip) const noexcept
 
 void CustomMesh::reload(const ossia::mesh_list &ml, const ossia::geometry_filter_list_ptr &f)
 {
+  BUFTRACE() << "CustomMesh::reload mesh=" << (void*)this
+             << " meshes=" << (qsizetype)ml.meshes.size()
+             << " first_buf_count="
+             << (ml.meshes.empty() ? (qsizetype)-1
+                                    : (qsizetype)ml.meshes[0].buffers.size());
   this->geom = ml;
   this->filters = f;
 
@@ -368,59 +575,174 @@ void CustomMesh::reload(const ossia::mesh_list &ml, const ossia::geometry_filter
   frontFace = (QRhiGraphicsPipeline::FrontFace)g.front_face;
 }
 
-void CustomMesh::draw(const MeshBuffers &bufs, QRhiCommandBuffer &cb) const noexcept
+bool CustomMesh::drawSingleMesh(
+    std::size_t mesh_index, std::size_t base, const MeshBuffers& bufs,
+    QRhiCommandBuffer& cb,
+    std::span<const FallbackBindingPlan::Slot> fallback_slots) const noexcept
 {
-  for(auto& g : this->geom.meshes)
+  if(mesh_index >= geom.meshes.size())
+    return false;
+  const auto& g = geom.meshes[mesh_index];
+
+  // Total vertex-input count = mesh bindings + fallback bindings. The
+  // fallback slots' binding_index values were allocated sequentially
+  // past the mesh's own bindings when the pipeline was built
+  // (remapPipelineVertexInputs); they land at indices sz, sz+1, ... here.
+  const auto mesh_input_count = g.input.size();
+  const auto total = mesh_input_count + fallback_slots.size();
+  QVarLengthArray<QRhiCommandBuffer::VertexInput> draw_inputs(total);
+
+  int i = 0;
+  for(auto& in : g.input)
   {
-    const auto sz = g.input.size();
+    const std::size_t flat = base + (std::size_t)in.buffer;
+    if(flat >= bufs.buffers.size())
+      return false;
+    auto buf = bufs.buffers[flat].handle;
+    if(!buf)
+      return false;
+    draw_inputs[i++] = {buf, in.byte_offset};
+  }
 
-    QVarLengthArray<QRhiCommandBuffer::VertexInput> draw_inputs(sz);
+  // Fallback slots. Each Slot::binding_index is expressed in the global
+  // binding-index space; for a single-sub-mesh raw-raster draw it's
+  // always `mesh_input_count + k` for the k'th slot, so we place the
+  // buffers by index.
+  for(const auto& slot : fallback_slots)
+  {
+    const std::size_t idx = (std::size_t)slot.binding_index;
+    if(idx >= total || !slot.buffer)
+      continue;   // defensive: skip malformed plans rather than dropping the draw
+    draw_inputs[idx] = {slot.buffer, 0};
+  }
 
-    int i = 0;
-    for(auto& in : g.input)
-    {
-      // FIXME buffer offset? input offset?
-      if(bufs.buffers.size() <= in.buffer)
-        return;
-
-      auto buf = bufs.buffers[in.buffer].handle;
-      if(!buf)
-        return;
-      draw_inputs[i++] = {buf, in.byte_offset};
-    }
+  if(g.index.buffer >= 0)
+  {
+    const std::size_t flat_idx = base + (std::size_t)g.index.buffer;
+    if(flat_idx >= bufs.buffers.size())
+      return false;
+    auto buf = bufs.buffers[flat_idx].handle;
+    const auto idxFmt = g.index.format == decltype(g.index)::uint16
+                            ? QRhiCommandBuffer::IndexUInt16
+                            : QRhiCommandBuffer::IndexUInt32;
+    // If this bind crashes with a dangling buffer, the `buf` pointer
+    // logged here will match ASan's freed-at report. The mesh= and
+    // slot= fields tell us which CustomMesh and which MeshBuffers
+    // entry retained it.
+    BUFTRACE() << "bindIndexBuffer mesh=" << (void*)this
+               << " sub=" << mesh_index << " slot=" << flat_idx
+               << " buf=" << (void*)buf
+               << " offset=" << (qint64)g.index.byte_offset
+               << " bufs.size=" << (qsizetype)bufs.buffers.size();
+    cb.setVertexInput(
+        0, (int)total, draw_inputs.data(), buf, g.index.byte_offset, idxFmt);
+  }
+  else
+  {
+    cb.setVertexInput(0, (int)total, draw_inputs.data());
+  }
 
-    if(g.index.buffer >= 0)
-    {
-      auto buf = bufs.buffers[g.index.buffer].handle;
-      const auto idxFmt = g.index.format == decltype(g.index)::uint16
-                              ? QRhiCommandBuffer::IndexUInt16
-                              : QRhiCommandBuffer::IndexUInt32;
-      cb.setVertexInput(0, sz, draw_inputs.data(), buf, g.index.byte_offset, idxFmt);
-    }
-    else
-    {
-      cb.setVertexInput(0, sz, draw_inputs.data());
-    }
+  // Per-mesh indirect override: when THIS submesh carries its own
+  // `indirect_count` handle (different from bufs.indirectDrawBuffer),
+  // use it instead. Required for multi-batch MDI (opaque + transparent
+  // split emitted by ScenePreprocessor) where each sub-mesh drives a
+  // separate indirect-cmd list. Same rule for `cpu_draw_commands`.
+  QRhiBuffer* effIndirectBuf = bufs.indirectDrawBuffer;
+  quint32     effIndirectCount = bufs.indirectDrawCount;
+  const auto* effCpuCmds = &bufs.cpuDrawCommands;
+  std::decay_t<decltype(bufs.cpuDrawCommands)> perMeshCmds;
+  if(auto* h = static_cast<QRhiBuffer*>(g.indirect_count.handle))
+  {
+    effIndirectBuf = h;
+    effIndirectCount
+        = (quint32)(g.indirect_count.byte_size / (5 * sizeof(uint32_t)));
+    if(effIndirectCount == 0)
+      effIndirectCount = 1;
+  }
+  if(!g.cpu_draw_commands.empty())
+  {
+    perMeshCmds.assign(g.cpu_draw_commands.begin(), g.cpu_draw_commands.end());
+    effCpuCmds = &perMeshCmds;
+  }
 
+  // Multi-draw indirect: runtime capability check, not compile-time.
+  // Only meaningful for single-sub-mesh MDI-mode geometries.
+  if(bufs.useIndirectDraw && effIndirectBuf)
+  {
 #if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0)
-    if(bufs.useIndirectDraw && bufs.indirectDrawBuffer)
+    if(bufs.gpuIndirectSupported)
     {
       if(bufs.indirectDrawIndexed)
-        cb.drawIndexedIndirect(bufs.indirectDrawBuffer, 0, 1);
+        cb.drawIndexedIndirect(
+            effIndirectBuf, bufs.indirectDrawOffset,
+            effIndirectCount, bufs.indirectDrawStride);
       else
-        cb.drawIndirect(bufs.indirectDrawBuffer, 0, 1);
-      continue;
+        cb.drawIndirect(
+            effIndirectBuf, bufs.indirectDrawOffset,
+            effIndirectCount, bufs.indirectDrawStride);
+      return true;
     }
 #endif
 
-    if(g.index.buffer > -1)
-    {
-      cb.drawIndexed(g.indices, g.instances);
-    }
-    else
+    // CPU fallback: iterate draw commands with correct firstInstance /
+    // baseVertex so each sub-draw gets its own per-draw data via
+    // gl_BaseInstance. Commands come from either the producer
+    // (ScenePreprocessor) or GPU readback (CSF).
+    if(!effCpuCmds->empty())
     {
-      cb.draw(g.vertices, g.instances);
+      const bool indexed = (g.index.buffer >= 0);
+      for(const auto& cmd : *effCpuCmds)
+      {
+        if(indexed)
+          cb.drawIndexed(
+              cmd.index_or_vertex_count, cmd.instance_count,
+              cmd.first_index_or_vertex, cmd.base_vertex, cmd.first_instance);
+        else
+          cb.draw(
+              cmd.index_or_vertex_count, cmd.instance_count,
+              cmd.first_index_or_vertex, cmd.first_instance);
+      }
+      return true;
     }
+    // No CPU commands yet (readback pending or first frame) — skip.
+    return false;
+  }
+
+  if(g.index.buffer > -1)
+    cb.drawIndexed(g.indices, g.instances);
+  else
+    cb.draw(g.vertices, g.instances);
+  return true;
+}
+
+void CustomMesh::draw(const MeshBuffers &bufs, QRhiCommandBuffer &cb) const noexcept
+{
+  // Default draw path: iterate sub-meshes without any per-mesh state swap.
+  // Works for single-mesh geometries and for MDI mode (one sub-mesh with an
+  // indirect buffer). For multi-sub-mesh + per-mesh SRB auxes (classic
+  // per-mesh ScenePreprocessor output), the caller should instead iterate
+  // drawSingleMesh() itself and rebind the SRB between sub-meshes.
+  std::size_t base = 0;
+  for(std::size_t i = 0; i < geom.meshes.size(); ++i)
+  {
+    drawSingleMesh(i, base, bufs, cb);
+    base += geom.meshes[i].buffers.size();
+  }
+}
+
+void CustomMesh::drawWithFallbackBindings(
+    const MeshBuffers& bufs, QRhiCommandBuffer& cb,
+    std::span<const FallbackBindingPlan::Slot> fallback_slots) const noexcept
+{
+  // Same as draw() but with the caller's fallback-binding plan threaded
+  // down to drawSingleMesh so the extra PerInstance identity buffers
+  // land in the vertex-input array at the indices the pipeline
+  // allocated for them.
+  std::size_t base = 0;
+  for(std::size_t i = 0; i < geom.meshes.size(); ++i)
+  {
+    drawSingleMesh(i, base, bufs, cb, fallback_slots);
+    base += geom.meshes[i].buffers.size();
   }
 }
 
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/CustomMesh.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/CustomMesh.hpp
index 5f8e977839..4dcfc7de72 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/CustomMesh.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/CustomMesh.hpp
@@ -1,13 +1,25 @@
 #pragma once
 #include <Gfx/Graph/Mesh.hpp>
+#include <Gfx/Graph/VertexFallbackPlan.hpp>
 
 #include <ossia/dataflow/geometry_port.hpp>
 
+#include <QDebug>
 #include <QtGui/private/qrhi_p.h>
 
+#include <span>
+
 namespace score::gfx
 {
 
+// [BUFTRACE] — diagnostic logging around QRhiBuffer lifetime during
+// live graph edits (defined in CustomMesh.cpp). Exposed so other TUs
+// (RenderList, ScenePreprocessorNode, RenderedRawRasterPipelineNode) can
+// use BUFTRACE() with the same env-var gating.
+SCORE_PLUGIN_GFX_EXPORT bool buftrace_enabled();
+#define BUFTRACE() if(::score::gfx::buftrace_enabled()) qDebug().nospace() << "[BUFTRACE] "
+
+
 class CustomMesh : public score::gfx::Mesh
 {
   ossia::mesh_list geom;
@@ -47,19 +59,19 @@ class CustomMesh : public score::gfx::Mesh
 
   void update_vbo(
       int buffer_index, const ossia::geometry::cpu_buffer& vtx_buf, MeshBuffers& meshbuf,
-      QRhiResourceUpdateBatch& rb) const noexcept;
+      QRhi& rhi, QRhiResourceUpdateBatch& rb) const noexcept;
 
   void update_vbo(
       int buffer_index, const ossia::geometry::gpu_buffer& vtx_buf, MeshBuffers& meshbuf,
-      QRhiResourceUpdateBatch& rb) const noexcept;
+      QRhi& rhi, QRhiResourceUpdateBatch& rb) const noexcept;
 
   void update_index(
       int buffer_index, const ossia::geometry::cpu_buffer& idx_buf, MeshBuffers& meshbuf,
-      QRhiResourceUpdateBatch& rb) const noexcept;
+      QRhi& rhi, QRhiResourceUpdateBatch& rb) const noexcept;
 
   void update_index(
       int buffer_index, const ossia::geometry::gpu_buffer& idx_buf, MeshBuffers& meshbuf,
-      QRhiResourceUpdateBatch& rb) const noexcept;
+      QRhi& rhi, QRhiResourceUpdateBatch& rb) const noexcept;
   void update(QRhi& rhi, MeshBuffers& output_meshbuf, QRhiResourceUpdateBatch& rb)
       const noexcept override;
   Flags flags() const noexcept override;
@@ -72,6 +84,33 @@ class CustomMesh : public score::gfx::Mesh
 
   void draw(const MeshBuffers& bufs, QRhiCommandBuffer& cb) const noexcept override;
 
+  // Fallback-aware variant: appends each `FallbackBindingPlan::Slot`
+  // buffer to the vertex-input array before issuing the draw. Used by
+  // raw-raster pipelines whose shaders declared "REQUIRED: false"
+  // VERTEX_INPUTS the upstream geometry doesn't provide. Non-virtual on
+  // purpose — only CustomMesh participates in the fallback path.
+  void drawWithFallbackBindings(
+      const MeshBuffers& bufs, QRhiCommandBuffer& cb,
+      std::span<const FallbackBindingPlan::Slot> fallback_slots) const noexcept;
+
+  // Draw a single sub-mesh (geom.meshes[mesh_index]) using the portion of
+  // `bufs.buffers` starting at `buffer_offset`. `buffer_offset` must match
+  // init()'s flat-concat layout: sum of geom.meshes[0..mesh_index-1].buffers.size().
+  // Returns true if a draw call was issued.
+  //
+  // Exposed so consumers that need per-sub-mesh state (e.g. RawRaster
+  // swapping the per_draw SSBO between meshes) can iterate sub-meshes
+  // themselves instead of invoking the fire-and-forget `draw()` above.
+  //
+  // `fallback_slots` (default empty) is merged into the vertex-input
+  // array at each slot's binding_index — bindings appended by the
+  // fallback-aware remap land past the mesh's own bindings, so slot
+  // indices are always contiguous after geom.meshes[mesh_index].input.
+  bool drawSingleMesh(
+      std::size_t mesh_index, std::size_t buffer_offset,
+      const MeshBuffers& bufs, QRhiCommandBuffer& cb,
+      std::span<const FallbackBindingPlan::Slot> fallback_slots = {}) const noexcept;
+
   const char* defaultVertexShader() const noexcept override;
 
   const ossia::geometry* semanticGeometry() const noexcept override
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/DepthNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/DepthNode.cpp
deleted file mode 100644
index 78277bae17..0000000000
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/DepthNode.cpp
+++ /dev/null
@@ -1,506 +0,0 @@
-#include "depthnode.hpp"
-
-#include <ossia/detail/ssize.hpp>
-
-DepthNode::DepthNode(const QShader& compute)
-{
-  m_computeS = compute;
-
-  input.push_back(new Port{this, {}, Types::Image, {}});
-  output.push_back(new Port{this, {}, Types::Image, {}});
-}
-
-struct RenderedDepthNode : score::gfx::NodeRenderer
-{
-  struct Pass
-  {
-    QRhiSampler* sampler{};
-    TextureRenderTarget renderTarget;
-    Pipeline p;
-    QRhiBuffer* processUBO{};
-  };
-  std::vector<Pass> m_passes;
-
-  DepthNode& n;
-
-  TextureRenderTarget m_lastPassRT;
-
-  std::vector<Sampler> m_samplers;
-
-  // Pipeline
-  Pipeline m_p;
-
-  QRhiBuffer* m_meshBuffer{};
-  QRhiBuffer* m_idxBuffer{};
-
-  QRhiBuffer* m_materialUBO{};
-  int m_materialSize{};
-  int64_t materialChangedIndex{-1};
-
-  RenderedDepthNode(const DepthNode& node) noexcept
-      : score::gfx::NodeRenderer{}
-      , n{const_cast<DepthNode&>(node)}
-  {
-  }
-
-  std::optional<QSize> renderTargetSize() const noexcept override { return {}; }
-
-  TextureRenderTarget createRenderTarget(const RenderState& state) override
-  {
-    auto sz = state.size;
-    if(auto true_sz = renderTargetSize())
-    {
-      sz = *true_sz;
-    }
-
-    m_lastPassRT = score::gfx::createRenderTarget(state, sz);
-    return m_lastPassRT;
-  }
-
-  QSize computeTextureSize(const isf::pass& pass)
-  {
-    QSize res = m_lastPassRT.renderTarget->pixelSize();
-
-    exprtk::symbol_table<float> syms;
-
-    syms.add_constant("var_WIDTH", res.width());
-    syms.add_constant("var_HEIGHT", res.height());
-    int port_k = 0;
-    for(const isf::input& input : n.m_descriptor.inputs)
-    {
-      auto port = n.input[port_k];
-      if(ossia::get_if<isf::float_input>(&input.data))
-      {
-        syms.add_constant("var_" + input.name, *(float*)port->value);
-      }
-      else
-      {
-        // TODO exprtk only handles the expression type...
-      }
-
-      port_k++;
-    }
-
-    if(auto expr = pass.width_expression; !expr.empty())
-    {
-      boost::algorithm::replace_all(expr, "$", "var_");
-      exprtk::expression<float> e;
-      e.register_symbol_table(syms);
-      exprtk::parser<float> parser;
-      bool ok = parser.compile(expr, e);
-      if(ok)
-        res.setWidth(e());
-      else
-        qDebug() << parser.error().c_str() << expr.c_str();
-    }
-    if(auto expr = pass.height_expression; !expr.empty())
-    {
-      boost::algorithm::replace_all(expr, "$", "var_");
-      exprtk::expression<float> e;
-      e.register_symbol_table(syms);
-      exprtk::parser<float> parser;
-      bool ok = parser.compile(expr, e);
-      if(ok)
-        res.setHeight(e());
-      else
-        qDebug() << parser.error().c_str() << expr.c_str();
-    }
-
-    return res;
-  }
-
-  int initShaderSamplers(Renderer& renderer)
-  {
-    QRhi& rhi = *renderer.state.rhi;
-    auto& input = n.input;
-    int cur_pos = 0;
-    for(auto in : input)
-    {
-      switch(in->type)
-      {
-        case Types::Empty:
-          break;
-        case Types::Int:
-        case Types::Float:
-          cur_pos += 4;
-          break;
-        case Types::Vec2:
-          cur_pos += 8;
-          if(cur_pos % 8 != 0)
-            cur_pos += 4;
-          break;
-        case Types::Vec3:
-          while(cur_pos % 16 != 0)
-          {
-            cur_pos += 4;
-          }
-          cur_pos += 12;
-          break;
-        case Types::Vec4:
-          while(cur_pos % 16 != 0)
-          {
-            cur_pos += 4;
-          }
-          cur_pos += 16;
-          break;
-        case Types::Image: {
-          auto sampler = rhi.newSampler(
-              QRhiSampler::Linear, QRhiSampler::Linear, QRhiSampler::None,
-              QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge);
-          SCORE_ASSERT(sampler->create());
-
-          auto texture = renderer.textureTargetForInputPort(*in);
-          m_samplers.push_back({sampler, texture});
-
-          if(cur_pos % 8 != 0)
-            cur_pos += 4;
-
-          *(float*)(n.m_materialData.get() + cur_pos) = texture->pixelSize().width();
-          *(float*)(n.m_materialData.get() + cur_pos + 4)
-              = texture->pixelSize().height();
-
-          cur_pos += 8;
-          break;
-        }
-        default:
-          break;
-      }
-    }
-    return cur_pos;
-  }
-
-  void initAudioTextures(Renderer& renderer)
-  {
-    QRhi& rhi = *renderer.state.rhi;
-    for(auto& texture : n.audio_textures)
-    {
-      auto sampler = rhi.newSampler(
-          QRhiSampler::Linear, QRhiSampler::Linear, QRhiSampler::None,
-          QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge);
-      sampler->create();
-
-      m_samplers.push_back({sampler, renderer.m_emptyTexture});
-      texture.samplers[&renderer] = {sampler, nullptr};
-    }
-  }
-
-  void initPassSamplers(Renderer& renderer, int& cur_pos)
-  {
-    QRhi& rhi = *renderer.state.rhi;
-    auto& model_passes = n.m_descriptor.passes;
-    for(int i = 0, N = model_passes.size(); i < N - 1; i++)
-    {
-      auto sampler = rhi.newSampler(
-          QRhiSampler::Linear, QRhiSampler::Linear, QRhiSampler::None,
-          QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge);
-      sampler->create();
-
-      const QSize texSize = computeTextureSize(model_passes[i]);
-
-      const auto fmt
-          = (model_passes[i].float_storage) ? QRhiTexture::RGBA32F : QRhiTexture::RGBA8;
-
-      auto tex = rhi.newTexture(
-          fmt, texSize, 1, QRhiTexture::Flag{QRhiTexture::RenderTarget});
-      tex->create();
-
-      m_samplers.push_back({sampler, tex});
-
-      if(cur_pos % 8 != 0)
-        cur_pos += 4;
-
-      *(float*)(n.m_materialData.get() + cur_pos) = texSize.width();
-      *(float*)(n.m_materialData.get() + cur_pos + 4) = texSize.height();
-
-      cur_pos += 8;
-    }
-  }
-
-  Pipeline
-  buildPassPipeline(Renderer& renderer, TextureRenderTarget tgt, QRhiBuffer* processUBO)
-  {
-    return score::gfx::buildPipeline(
-        renderer, n.mesh(), n.m_vertexS, n.m_fragmentS, tgt, processUBO, m_materialUBO,
-        m_samplers);
-  };
-
-  Pass createPass(Renderer& renderer, Sampler target)
-  {
-    QRhi& rhi = *renderer.state.rhi;
-    auto [sampler, tex] = target;
-
-    auto rt = rhi.newTextureRenderTarget({tex});
-    auto rp = rt->newCompatibleRenderPassDescriptor();
-    SCORE_ASSERT(rp);
-    rt->setRenderPassDescriptor(rp);
-    SCORE_ASSERT(rt->create());
-
-    QRhiBuffer* pubo{};
-    pubo = rhi.newBuffer(
-        QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, sizeof(ProcessUBO));
-    pubo->create();
-
-    auto pip = buildPassPipeline(renderer, TextureRenderTarget{.texture = tex, .renderPass = rp, .renderTarget = rt}, pubo);
-    auto srb = pip.srb;
-
-    // We have to replace the rendered-to texture by an empty one in each pass,
-    // as RHI does not support both reading and writing to a texture in the same pass.
-    {
-      QVarLengthArray<QRhiShaderResourceBinding> bindings;
-      for(auto it = srb->cbeginBindings(); it != srb->cendBindings(); ++it)
-      {
-        bindings.push_back(*it);
-
-        if(it->data()->type == QRhiShaderResourceBinding::SampledTexture)
-        {
-          if(it->data()->u.stex.texSamplers->tex == tex)
-          {
-            bindings.back().data()->u.stex.texSamplers->tex = renderer.m_emptyTexture;
-          }
-        }
-      }
-      srb->setBindings(bindings.begin(), bindings.end());
-      srb->create();
-    }
-    return Pass{sampler, {tex, rp, rt}, pip, pubo};
-  }
-
-  void init(Renderer& renderer) override
-  {
-    // init()
-    {
-      const auto& mesh = n.mesh();
-      if(!m_meshBuffer)
-      {
-        auto [mbuffer, ibuffer] = renderer.initMeshBuffer(mesh);
-        m_meshBuffer = mbuffer;
-        m_idxBuffer = ibuffer;
-      }
-    }
-
-    QRhi& rhi = *renderer.state.rhi;
-
-    m_materialSize = n.m_materialSize;
-    if(m_materialSize > 0)
-    {
-      m_materialUBO = rhi.newBuffer(
-          QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, m_materialSize);
-      SCORE_ASSERT(m_materialUBO->create());
-    }
-
-    int cur_pos = initShaderSamplers(renderer);
-
-    initAudioTextures(renderer);
-
-    auto& model_passes = n.m_descriptor.passes;
-    if(!model_passes.empty())
-    {
-      int first_pass_sampler_idx = std::ssize(m_samplers);
-
-      // First create all the samplers / textures
-      initPassSamplers(renderer, cur_pos);
-
-      // Then create the passes
-      for(int i = 0, N = model_passes.size(); i < N - 1; i++)
-      {
-        auto target = m_samplers[first_pass_sampler_idx + i];
-        auto pass = createPass(renderer, target);
-        m_passes.push_back(pass);
-      }
-    }
-
-    // Last pass is the main write
-    {
-      QRhiBuffer* pubo{};
-      pubo = rhi.newBuffer(
-          QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, sizeof(ProcessUBO));
-      pubo->create();
-
-      auto p = buildPassPipeline(renderer, m_lastPassRT, pubo);
-      m_passes.push_back(Pass{nullptr, m_lastPassRT, p, pubo});
-    }
-  }
-
-  void update(Renderer& renderer, QRhiResourceUpdateBatch& res) override
-  {
-    {
-      if(m_materialUBO && m_materialSize > 0
-         && materialChangedIndex != n.materialChanged)
-      {
-        char* data = n.m_materialData.get();
-        res.updateDynamicBuffer(m_materialUBO, 0, m_materialSize, data);
-        materialChangedIndex = n.materialChanged;
-      }
-    }
-
-    QRhi& rhi = *renderer.state.rhi;
-    for(auto& audio : n.audio_textures)
-    {
-      bool textureChanged = false;
-      auto& [rhiSampler, rhiTexture] = audio.samplers[&renderer];
-      const auto curSz = (rhiTexture) ? rhiTexture->pixelSize() : QSize{};
-      int numSamples = curSz.width() * curSz.height();
-      if(numSamples != audio.data.size())
-      {
-        delete rhiTexture;
-        rhiTexture = nullptr;
-        textureChanged = true;
-      }
-
-      if(!rhiTexture)
-      {
-        if(audio.channels > 0)
-        {
-          int samples = audio.data.size() / audio.channels;
-          rhiTexture = rhi.newTexture(
-              QRhiTexture::D32F, {samples, audio.channels}, 1, QRhiTexture::Flag{});
-          rhiTexture->create();
-          textureChanged = true;
-        }
-        else
-        {
-          rhiTexture = nullptr;
-          textureChanged = true;
-        }
-      }
-
-      if(textureChanged)
-      {
-        score::gfx::replaceTexture(
-            *m_p.srb, rhiSampler, rhiTexture ? rhiTexture : renderer.m_emptyTexture);
-      }
-
-      if(rhiTexture)
-      {
-        QRhiTextureSubresourceUploadDescription subdesc(
-            audio.data.data(), audio.data.size() * 4);
-        QRhiTextureUploadEntry entry{0, 0, subdesc};
-        QRhiTextureUploadDescription desc{entry};
-        res.uploadTexture(rhiTexture, desc);
-      }
-    }
-
-    {
-      // Update all the process UBOs
-      for(int i = 0, N = m_passes.size(); i < N; i++)
-      {
-        n.standardUBO.passIndex = i;
-        res.updateDynamicBuffer(
-            m_passes[i].processUBO, 0, sizeof(ProcessUBO), &this->n.standardUBO);
-      }
-    }
-  }
-
-  void releaseWithoutRenderTarget(Renderer& r) override
-  {
-    // customRelease
-    {
-      for(auto& texture : n.audio_textures)
-      {
-        auto it = texture.samplers.find(&r);
-        if(it != texture.samplers.end())
-        {
-          if(auto tex = it->second.second)
-          {
-            if(tex != r.m_emptyTexture)
-              tex->deleteLater();
-          }
-        }
-      }
-
-      for(auto& pass : m_passes)
-      {
-        // TODO do we also want to remove the last pass texture here ?!
-        pass.p.release();
-        pass.renderTarget.release();
-        pass.processUBO->deleteLater();
-      }
-
-      m_passes.clear();
-    }
-
-    for(auto sampler : m_samplers)
-    {
-      delete sampler.sampler;
-      // texture isdeleted elsewxheree
-    }
-    m_samplers.clear();
-
-    delete m_materialUBO;
-    m_materialUBO = nullptr;
-
-    m_p.release();
-
-    m_meshBuffer = nullptr;
-  }
-
-  void release(Renderer& r) override { releaseWithoutRenderTarget(r); }
-
-  void runPass(
-      Renderer& renderer, QRhiCommandBuffer& cb, QRhiResourceUpdateBatch& res) override
-  {
-    // if(m_passes.empty())
-    //   return RenderedNode::runPass(renderer, cb, res);
-
-    // Update a first time everything
-
-    // PASSINDEX must be set to the last index
-    // FIXME
-    n.standardUBO.passIndex = m_passes.size() - 1;
-
-    update(renderer, res);
-
-    auto updateBatch = &res;
-
-    // Draw the passes
-    for(const auto& pass : m_passes)
-    {
-      SCORE_ASSERT(pass.renderTarget.renderTarget);
-      SCORE_ASSERT(pass.p.pipeline);
-      SCORE_ASSERT(pass.p.srb);
-      // TODO : combine all the uniforms..
-
-      auto rt = pass.renderTarget.renderTarget;
-      auto pipeline = pass.p.pipeline;
-      auto srb = pass.p.srb;
-      auto texture = pass.renderTarget.texture;
-
-      // TODO need to free stuff
-      cb.beginPass(rt, Qt::black, {1.0f, 0}, updateBatch);
-      {
-        cb.setGraphicsPipeline(pipeline);
-        cb.setShaderResources(srb);
-
-        if(texture)
-        {
-          cb.setViewport(QRhiViewport(
-              0, 0, texture->pixelSize().width(), texture->pixelSize().height()));
-        }
-        else
-        {
-          const auto sz = renderer.state.size;
-          cb.setViewport(QRhiViewport(0, 0, sz.width(), sz.height()));
-        }
-
-        assert(this->m_meshBuffer);
-        assert(this->m_meshBuffer->usage().testFlag(QRhiBuffer::VertexBuffer));
-        n.mesh().setupBindings(*this->m_meshBuffer, this->m_idxBuffer, cb);
-
-        cb.draw(n.mesh().vertexCount);
-      }
-
-      cb.endPass();
-
-      if(pass.p.pipeline != m_passes.back().p.pipeline)
-      {
-        // Not the last pass: we have to use another resource batch
-        updateBatch = renderer.state.rhi->nextResourceUpdateBatch();
-      }
-    }
-  }
-};
-
-score::gfx::NodeRenderer* DepthNode::createRenderer(Renderer& r) const noexcept
-{
-  return new RenderedDepthNode{*this};
-}
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/DepthNode.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/DepthNode.hpp
deleted file mode 100644
index 5ced2459a7..0000000000
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/DepthNode.hpp
+++ /dev/null
@@ -1,21 +0,0 @@
-#pragma once
-#include "mesh.hpp"
-#include "node.hpp"
-#include "renderer.hpp"
-
-namespace score::gfx
-{
-struct RenderedDepthNode;
-struct DepthNode : score::gfx::ProcessNode
-{
-  DepthNode(const QShader& compute);
-
-  virtual ~DepthNode();
-
-  score::gfx::NodeRenderer* createRenderer(RenderList& r) const noexcept;
-
-private:
-  friend struct RenderedISFNode;
-  QShader m_computeS;
-};
-}
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/DirectVideoNodeRenderer.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/DirectVideoNodeRenderer.cpp
index feb3d30c62..ca9254f3bb 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/DirectVideoNodeRenderer.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/DirectVideoNodeRenderer.cpp
@@ -1114,6 +1114,8 @@ void DirectVideoNodeRenderer::createPipelines(RenderList& r)
   if(m_gpu)
   {
     auto shaders = m_gpu->init(r);
+    m_cachedVertexShader = shaders.first;
+    m_cachedFragmentShader = shaders.second;
     SCORE_ASSERT(m_p.empty());
     score::gfx::defaultPassesInit(
         m_p, this->node().output[0]->edges, r, r.defaultQuad(), shaders.first,
@@ -1122,6 +1124,15 @@ void DirectVideoNodeRenderer::createPipelines(RenderList& r)
 }
 
 void DirectVideoNodeRenderer::init(RenderList& renderer, QRhiResourceUpdateBatch& res)
+{
+  initState(renderer, res);
+
+  for(Edge* edge : this->node().output[0]->edges)
+    addOutputPass(renderer, *edge, res);
+}
+
+void DirectVideoNodeRenderer::initState(
+    RenderList& renderer, QRhiResourceUpdateBatch& res)
 {
   auto& rhi = *renderer.state.rhi;
 
@@ -1148,7 +1159,15 @@ void DirectVideoNodeRenderer::init(RenderList& renderer, QRhiResourceUpdateBatch
   }
 
   createGpuDecoder(rhi);
-  createPipelines(renderer);
+
+  // Cache shaders from the GPU decoder so addOutputPass() can use them
+  if(m_gpu)
+  {
+    auto shaders = m_gpu->init(renderer);
+    m_cachedVertexShader = shaders.first;
+    m_cachedFragmentShader = shaders.second;
+  }
+
   m_recomputeScale = true;
 }
 
@@ -1289,6 +1308,48 @@ void DirectVideoNodeRenderer::update(
 }
 
 void DirectVideoNodeRenderer::release(RenderList& r)
+{
+  releaseState(r);
+}
+
+void DirectVideoNodeRenderer::addOutputPass(
+    RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res)
+{
+  if(!m_gpu)
+    return;
+  if(!m_cachedVertexShader.isValid() || !m_cachedFragmentShader.isValid())
+    return;
+
+  auto rt = renderer.renderTargetForOutput(edge);
+  if(rt.renderTarget)
+  {
+    auto pip = score::gfx::buildPipeline(
+        renderer, renderer.defaultQuad(), m_cachedVertexShader, m_cachedFragmentShader,
+        rt, m_processUBO, m_materialUBO, m_gpu->samplers);
+    if(pip.pipeline)
+      m_p.emplace_back(&edge, Pass{rt, pip, nullptr});
+  }
+}
+
+void DirectVideoNodeRenderer::removeOutputPass(RenderList& renderer, Edge& edge)
+{
+  auto it = ossia::find_if(m_p, [&](auto& p) { return p.first == &edge; });
+  if(it != m_p.end())
+  {
+    it->second.p.release();
+    if(it->second.processUBO)
+      it->second.processUBO->deleteLater();
+    m_p.erase(it);
+  }
+}
+
+bool DirectVideoNodeRenderer::hasOutputPassForEdge(Edge& edge) const
+{
+  return ossia::find_if(m_p, [&](const auto& p) { return p.first == &edge; })
+         != m_p.end();
+}
+
+void DirectVideoNodeRenderer::releaseState(RenderList& r)
 {
   // Destroy GPU decoder BEFORE closeFile() frees m_hwDeviceCtx.
   // HW decoders (CUDA, Vulkan) hold references to the HW device context
@@ -1299,6 +1360,9 @@ void DirectVideoNodeRenderer::release(RenderList& r)
     m_gpu.reset();
   }
 
+  m_cachedVertexShader = {};
+  m_cachedFragmentShader = {};
+
   delete m_processUBO;
   m_processUBO = nullptr;
 
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/DirectVideoNodeRenderer.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/DirectVideoNodeRenderer.hpp
index 3c0e766f5c..bdee9ccd9f 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/DirectVideoNodeRenderer.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/DirectVideoNodeRenderer.hpp
@@ -63,6 +63,13 @@ class DirectVideoNodeRenderer : public NodeRenderer
   void update(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) override;
   void release(RenderList& r) override;
 
+  void initState(RenderList& renderer, QRhiResourceUpdateBatch& res) override;
+  void releaseState(RenderList& renderer) override;
+  void addOutputPass(
+      RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) override;
+  void removeOutputPass(RenderList& renderer, Edge& edge) override;
+  bool hasOutputPassForEdge(Edge& edge) const override;
+
 private:
   const VideoNodeBase& node() const noexcept
   {
@@ -131,6 +138,8 @@ class DirectVideoNodeRenderer : public NodeRenderer
   };
 
   std::unique_ptr<GPUVideoDecoder> m_gpu;
+  QShader m_cachedVertexShader;
+  QShader m_cachedFragmentShader;
   score::gfx::ScaleMode m_currentScaleMode{};
 
   int64_t m_lastRequestedFlicks{-1};
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/FlattenedSceneFilterNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/FlattenedSceneFilterNode.cpp
new file mode 100644
index 0000000000..145ae597dd
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/FlattenedSceneFilterNode.cpp
@@ -0,0 +1,186 @@
+#include <Gfx/Graph/FlattenedSceneFilterNode.hpp>
+#include <Gfx/Graph/NodeRenderer.hpp>
+#include <Gfx/Graph/RenderList.hpp>
+
+#include <ossia/detail/hash.hpp>
+#include <ossia/network/value/value_conversion.hpp>
+
+#include <algorithm>
+
+namespace score::gfx
+{
+
+struct RenderedFlattenedSceneFilterNode final : NodeRenderer
+{
+  const FlattenedSceneFilterNode& m_node;
+  ossia::geometry_spec m_outputSpec;
+  ossia::geometry_spec m_lastInput;
+  int m_lastMode{-1};
+  int m_lastMatch{0};
+  std::string m_lastMatchStr;
+
+  RenderedFlattenedSceneFilterNode(const FlattenedSceneFilterNode& n)
+      : NodeRenderer{n}
+      , m_node{n}
+  {
+  }
+
+  void init(RenderList&, QRhiResourceUpdateBatch&) override { m_initialized = true; }
+  void release(RenderList&) override
+  {
+    m_outputSpec = {};
+    m_lastInput = {};
+    m_lastMode = -1;
+    m_lastMatchStr.clear();
+    m_initialized = false;
+  }
+
+  bool predicate(
+      const ossia::geometry& g, int mode, uint32_t match,
+      uint32_t match_str_hash) const noexcept
+  {
+    switch(mode)
+    {
+      case 0:  return g.filter_tag == match;
+      case 1:  return g.filter_tag != match;
+      case 2:  return g.filter_material_index == match;
+      case 3:  return g.filter_material_index != match;
+      case 4:  return (uint32_t)g.blend == match;
+      case 5:  return (uint32_t)g.blend != match;
+      case 6:  return g.depth_write == (match != 0);
+      case 7:  return g.depth_write != (match != 0);
+      case 8:  return (uint32_t)g.cull_mode == match;
+      case 9:  return (uint32_t)g.cull_mode != match;
+      case 10: return (uint32_t)g.topology == match;
+      case 11: return (uint32_t)g.topology != match;
+      case 12: return g.filter_tag == match_str_hash;
+      case 13: return g.filter_tag != match_str_hash;
+      default: return true;
+    }
+  }
+
+  void rebuild()
+  {
+    m_outputSpec.meshes = std::make_shared<ossia::mesh_list>();
+    m_outputSpec.filters
+        = this->geometry.filters
+              ? this->geometry.filters
+              : std::make_shared<ossia::geometry_filter_list>();
+
+    if(!this->geometry.meshes)
+      return;
+
+    const uint32_t matchU = (uint32_t)m_node.m_match;
+    // Same hash producers stamp on filter_tag (rapidhash truncated to 32
+    // bits). Empty match_str short-circuits to 0u so it matches the
+    // "untagged" sentinel rather than rapidhash-of-empty (a non-zero
+    // value that would never match anything in practice).
+    const uint32_t matchStrHash
+        = m_node.m_match_str.empty()
+              ? 0u
+              : (uint32_t)ossia::hash_string(m_node.m_match_str);
+    for(const auto& g : this->geometry.meshes->meshes)
+    {
+      if(predicate(g, m_node.m_mode, matchU, matchStrHash))
+        m_outputSpec.meshes->meshes.push_back(g);
+    }
+    m_outputSpec.meshes->dirty_index = this->geometry.meshes->dirty_index;
+  }
+
+  void update(RenderList&, QRhiResourceUpdateBatch&, Edge*) override
+  {
+    const bool geomChanged = (this->geometry != m_lastInput) || this->geometryChanged;
+    const bool paramsChanged
+        = (m_node.m_mode != m_lastMode) || (m_node.m_match != m_lastMatch)
+          || (m_node.m_match_str != m_lastMatchStr);
+    if(!geomChanged && !paramsChanged && m_outputSpec.meshes)
+      return;
+
+    rebuild();
+    m_lastInput = this->geometry;
+    m_lastMode = m_node.m_mode;
+    m_lastMatch = m_node.m_match;
+    m_lastMatchStr = m_node.m_match_str;
+    this->geometryChanged = false;
+  }
+
+  void runInitialPasses(
+      RenderList& renderer, QRhiCommandBuffer&, QRhiResourceUpdateBatch*&,
+      Edge& edge) override
+  {
+    if(!m_outputSpec.meshes)
+      return;
+    auto* sink = edge.sink;
+    if(!sink || !sink->node)
+      return;
+    auto rn_it = sink->node->renderedNodes.find(&renderer);
+    if(rn_it == sink->node->renderedNodes.end())
+      return;
+    auto it = std::find(sink->node->input.begin(), sink->node->input.end(), sink);
+    if(it == sink->node->input.end())
+      return;
+    int port_idx = (int)(it - sink->node->input.begin());
+    rn_it->second->process(port_idx, m_outputSpec, edge.source);
+  }
+
+  void runRenderPass(RenderList&, QRhiCommandBuffer&, Edge&) override { }
+
+  // Data-only renderer — no per-edge GPU pass state to release.
+  void removeOutputPass(RenderList&, Edge&) override { }
+};
+
+FlattenedSceneFilterNode::FlattenedSceneFilterNode()
+{
+  // Port 0: geometry input
+  input.push_back(new Port{this, {}, Types::Geometry, {}});
+  // Port 1: filter mode
+  {
+    auto* data = new int{0};
+    input.push_back(new Port{this, data, Types::Int, {}});
+  }
+  // Port 2: match value (int, modes 0..11)
+  {
+    auto* data = new int{0};
+    input.push_back(new Port{this, data, Types::Int, {}});
+  }
+  // Port 3: match string (modes 12/13). Carried as a control-only port
+  // (no GPU edge type — strings flow through ossia::value via process()
+  // rather than as a GPU resource handle).
+  {
+    auto* data = new std::string{};
+    input.push_back(new Port{this, data, Types::Empty, {}});
+  }
+  output.push_back(new Port{this, {}, Types::Geometry, {}});
+}
+
+FlattenedSceneFilterNode::~FlattenedSceneFilterNode() = default;
+
+void FlattenedSceneFilterNode::process(int32_t port, const ossia::value& v)
+{
+  switch(port)
+  {
+    case 1:
+      m_mode = ossia::convert<int>(v);
+      materialChange();
+      break;
+    case 2:
+      m_match = ossia::convert<int>(v);
+      materialChange();
+      break;
+    case 3:
+      m_match_str = ossia::convert<std::string>(v);
+      materialChange();
+      break;
+    default:
+      ProcessNode::process(port, v);
+      break;
+  }
+}
+
+NodeRenderer*
+FlattenedSceneFilterNode::createRenderer(RenderList&) const noexcept
+{
+  return new RenderedFlattenedSceneFilterNode{*this};
+}
+
+}
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/FlattenedSceneFilterNode.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/FlattenedSceneFilterNode.hpp
new file mode 100644
index 0000000000..fb0bb5a2cd
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/FlattenedSceneFilterNode.hpp
@@ -0,0 +1,63 @@
+#pragma once
+#include <Gfx/Graph/Node.hpp>
+
+namespace score::gfx
+{
+
+/**
+ * @brief Per-pass filter on a flattened scene: geometry_spec → geometry_spec.
+ *
+ * Reads the `filter_tag` and `filter_material_index` metadata fields that
+ * ScenePreprocessorNode writes onto every output geometry, and emits a new
+ * geometry_spec containing only the draws that match the configured
+ * predicate. All underlying GPU buffers are shared via `shared_ptr` — the
+ * filter only rewrites the mesh_list; no GPU data is copied.
+ *
+ * Inputs:
+ *   - Port 0: Geometry (Types::Geometry)
+ *   - Port 1: Filter mode (Types::Int):
+ *       0  = tag equals match value
+ *       1  = tag differs from match value
+ *       2  = material index equals match value
+ *       3  = material index differs from match value
+ *       4  = blend_mode equals match (0 = opaque, 1 = premul-alpha)
+ *       5  = blend_mode differs from match
+ *       6  = depth_write equals (match != 0)
+ *       7  = depth_write differs from (match != 0)
+ *       8  = cull_mode equals match (0 = none, 1 = front, 2 = back)
+ *       9  = cull_mode differs from match
+ *       10 = topology equals match (0 = triangles, 1 = tri strip, …)
+ *       11 = topology differs from match
+ *       12 = format_id equals match_str (rapidhash of match_str truncated
+ *            to 32 bits compared with filter_tag; an empty match_str
+ *            short-circuits to 0u so it matches the "untagged" sentinel
+ *            rather than the rapidhash of the empty string)
+ *       13 = format_id differs from match_str
+ *   - Port 2: Match value (Types::Int) — user-supplied, interpreted per mode
+ *   - Port 3: Match string (Types::Empty control) — used by modes 12/13
+ *
+ * Per-draw filtering (e.g. "alphaMode=BLEND draws inside a single MDI
+ * batch") is NOT handled here — ScenePreprocessor emits one geometry
+ * per MDI batch so mesh-level fields collapse to 0. Use a Tier-3
+ * CSF compute filter for per-draw cases; this node is for multi-mesh
+ * inputs (per-object producers, pre-MDI composition).
+ *
+ * Outputs:
+ *   - Port 0: Geometry (Types::Geometry)
+ */
+class SCORE_PLUGIN_GFX_EXPORT FlattenedSceneFilterNode : public ProcessNode
+{
+public:
+  FlattenedSceneFilterNode();
+  ~FlattenedSceneFilterNode() override;
+
+  score::gfx::NodeRenderer* createRenderer(RenderList& r) const noexcept override;
+
+  void process(int32_t port, const ossia::value& v) override;
+
+  int m_mode{0};
+  int m_match{0};
+  std::string m_match_str;
+};
+
+}
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/GeometryFilterNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/GeometryFilterNode.cpp
index 07b080f381..b7172e5c11 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/GeometryFilterNode.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/GeometryFilterNode.cpp
@@ -28,7 +28,23 @@ struct geometry_input_port_vis
 
   void operator()(const isf::long_input& in) noexcept
   {
-    *reinterpret_cast<int*>(data) = in.def;
+    // Enum mode: in.def is the *index* into VALUES, but the shader and the
+    // downstream ComboBox-driven port both consume the numeric VALUE at that
+    // index. Resolve here so the initial UBO matches post-interaction state.
+    // String-valued VALUES fall back to the index (GLSL can't receive strings).
+    int initial = (int)in.def;
+    if(!in.values.empty())
+    {
+      auto idx = std::min<std::size_t>(in.def, in.values.size() - 1);
+      const auto& v = in.values[idx];
+      if(auto i = ossia::get_if<int64_t>(&v))
+        initial = (int)*i;
+      else if(auto d = ossia::get_if<double>(&v))
+        initial = (int)*d;
+      else
+        initial = (int)idx;
+    }
+    *reinterpret_cast<int*>(data) = initial;
     self.input.push_back(new Port{&self, data, Types::Int, {}});
     data += 4;
     sz += 4;
@@ -136,6 +152,12 @@ struct geometry_input_port_vis
     // Storage buffers are typically managed by the system
     // No UI controls or uniform buffer data needed
   }
+
+  void operator()(const isf::uniform_input& in) noexcept
+  {
+    // UBO inputs are sourced from upstream Buffer ports; no material-UBO
+    // storage needed here.
+  }
   
   void operator()(const isf::texture_input& in) noexcept
   {
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/GeometryFilterNodeRenderer.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/GeometryFilterNodeRenderer.cpp
index 10c644d73f..0091d3882e 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/GeometryFilterNodeRenderer.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/GeometryFilterNodeRenderer.cpp
@@ -20,6 +20,11 @@ TextureRenderTarget GeometryFilterNodeRenderer::renderTargetForInput(const Port&
 }
 
 void GeometryFilterNodeRenderer::init(RenderList& renderer, QRhiResourceUpdateBatch& res)
+{
+  initState(renderer, res);
+}
+
+void GeometryFilterNodeRenderer::initState(RenderList& renderer, QRhiResourceUpdateBatch& res)
 {
   QRhi& rhi = *renderer.state.rhi;
 
@@ -30,7 +35,10 @@ void GeometryFilterNodeRenderer::init(RenderList& renderer, QRhiResourceUpdateBa
         = rhi.newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, m_materialSize);
     m_materialUBO->setName("GeometryFilterNodeRenderer.ubo");
     SCORE_ASSERT(m_materialUBO->create());
+    if(node().m_material_data)
+      res.updateDynamicBuffer(m_materialUBO, 0, m_materialSize, node().m_material_data.get());
   }
+  m_initialized = true;
 }
 
 void GeometryFilterNodeRenderer::update(
@@ -47,8 +55,17 @@ void GeometryFilterNodeRenderer::update(
 
 void GeometryFilterNodeRenderer::release(RenderList& r)
 {
-  delete m_materialUBO;
+  releaseState(r);
+}
+
+void GeometryFilterNodeRenderer::releaseState(RenderList& r)
+{
+  if(!m_initialized)
+    return;
+  if(m_materialUBO)
+    m_materialUBO->deleteLater();
   m_materialUBO = nullptr;
+  m_initialized = false;
 }
 
 void GeometryFilterNodeRenderer::runInitialPasses(
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/GeometryFilterNodeRenderer.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/GeometryFilterNodeRenderer.hpp
index 48242c10b3..64868f5823 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/GeometryFilterNodeRenderer.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/GeometryFilterNodeRenderer.hpp
@@ -11,8 +11,10 @@ struct SCORE_PLUGIN_GFX_EXPORT GeometryFilterNodeRenderer : score::gfx::NodeRend
 
   TextureRenderTarget renderTargetForInput(const Port& p) override;
   void init(RenderList& renderer, QRhiResourceUpdateBatch& res) override;
+  void initState(RenderList& renderer, QRhiResourceUpdateBatch& res) override;
   void update(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) override;
   void release(RenderList& r) override;
+  void releaseState(RenderList& r) override;
 
   void runInitialPasses(
       RenderList&, QRhiCommandBuffer& commands, QRhiResourceUpdateBatch*& res,
@@ -20,6 +22,9 @@ struct SCORE_PLUGIN_GFX_EXPORT GeometryFilterNodeRenderer : score::gfx::NodeRend
 
   void runRenderPass(RenderList&, QRhiCommandBuffer& commands, Edge& edge) override;
 
+  // Data-only renderer — no per-edge GPU pass state to release.
+  void removeOutputPass(RenderList&, Edge&) override { }
+
   QRhiBuffer* material() const noexcept { return m_materialUBO; }
 
 private:
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/GpuResourceRegistry.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/GpuResourceRegistry.cpp
new file mode 100644
index 0000000000..82757ebbef
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/GpuResourceRegistry.cpp
@@ -0,0 +1,983 @@
+#include <Gfx/Graph/GpuResourceRegistry.hpp>
+
+#include <Gfx/Graph/CustomMesh.hpp>  // BUFTRACE
+#include <Gfx/Graph/RenderList.hpp>
+#include <Gfx/Graph/RhiClearBuffer.hpp>
+#include <Gfx/Graph/SceneGPUState.hpp>  // MaterialGPU layout
+
+#include <score/tools/Debug.hpp>
+
+#include <QDebug>
+#include <QImage>
+
+namespace score::gfx
+{
+namespace
+{
+// Per-arena configuration. Capacities are the Phase 1 hard cap; growth is
+// a Phase 2 concern (allocate() logs + returns invalid Slot on overflow).
+// Sizes are deliberately conservative — a typical scene has 1-6 cameras,
+// 1-16 lights, 10-50 materials, 50-1000 draws. The caps below allow
+// ~50× headroom before we need grow-in-place.
+// Per-arena configuration: fixed-stride layout. Buffer capacity is
+// stride × slot_count. Consumer shaders index arena.entries[slot_index]
+// with std430 stride == slot_stride.
+struct ArenaConfig
+{
+  uint32_t slot_stride;   // byte stride per slot
+  uint32_t slot_count;    // number of slots
+  QRhiBuffer::UsageFlags usage;
+  QRhiBuffer::Type type;
+  const char* name;
+};
+
+// Entry order MUST match the Arena enum in GpuResourceRegistry.hpp.
+constexpr ArenaConfig kArenaConfigs[(std::size_t)GpuResourceRegistry::Arena::Count_]
+    = {
+        // RawCamera — 64 B stride × 32 slots = 2 KiB. UBO dynamic.
+        {64, 32, QRhiBuffer::UniformBuffer, QRhiBuffer::Dynamic,
+         "GpuResourceRegistry::raw_camera"},
+
+        // RawLight — 64 B stride × 4096 slots = 256 KiB. SSBO static
+        //   (QRhi forbids StorageBuffer + Dynamic). Sized for VJ /
+        //   particle-driven workflows that emit thousands of procedural
+        //   lights via pack_lights_from_points / wander_lights_inline /
+        //   grid_lights_inline. Typical 3D-file scenes (a handful of
+        //   scene-node lights) pay only for the first N used slots —
+        //   the rest is dormant device-local memory, no per-frame
+        //   upload cost. Keep in sync with ScenePreprocessor's
+        //   lightIdxBytes floor (must be slot_count * 4 bytes).
+        {64, 4096, QRhiBuffer::StorageBuffer, QRhiBuffer::Static,
+         "GpuResourceRegistry::raw_light"},
+
+        // RawTransform — 64 B stride × 16384 slots = 1 MiB. Sized for
+        //   heavy glTF / FBX scenes with 5-10k nodes.
+        {64, 16384, QRhiBuffer::StorageBuffer, QRhiBuffer::Static,
+         "GpuResourceRegistry::raw_transform"},
+
+        // Material — 80 B stride × 32768 slots = 2.5 MiB. Shader indexes
+        //   this arena directly as scene_materials.entries[material_index].
+        //   Sized for enterprise / architectural-scale USD content (city
+        //   assemblies, CAD exports, Pixar Kitchen_set-class scenes) —
+        //   those routinely pack 1k–20k unique materials across all their
+        //   per-prop references. Small scenes pay only for the first N
+        //   used slots; the rest is dormant SSBO space.
+        {80, 32768, QRhiBuffer::StorageBuffer, QRhiBuffer::Static,
+         "GpuResourceRegistry::material"},
+
+        // Env — 64 B stride × 8 slots = 512 B. UBO dynamic.
+        {64, 8, QRhiBuffer::UniformBuffer, QRhiBuffer::Dynamic,
+         "GpuResourceRegistry::env"},
+};
+
+} // namespace
+
+GpuResourceRegistry::~GpuResourceRegistry()
+{
+  destroy();
+}
+
+void GpuResourceRegistry::init(QRhi& rhi, QRhiResourceUpdateBatch& batch)
+{
+  SCORE_ASSERT(!m_rhi);
+  m_rhi = &rhi;
+
+  for(std::size_t i = 0; i < m_arenas.size(); ++i)
+  {
+    auto& a = m_arenas[i];
+    const auto& cfg = kArenaConfigs[i];
+    const uint32_t bytes = cfg.slot_stride * cfg.slot_count;
+
+    a.buffer = rhi.newBuffer(cfg.type, cfg.usage, bytes);
+    a.buffer->setName(cfg.name);
+    if(!a.buffer->create())
+    {
+      qWarning() << "GpuResourceRegistry: failed to create arena buffer"
+                 << cfg.name
+                 << "— falling back to null (allocations will fail)";
+      delete a.buffer;
+      a.buffer = nullptr;
+      continue;
+    }
+    // Zero-fill the arena. Vulkan does NOT initialise VkBuffer memory
+    // — the underlying device-memory page contains whatever was there
+    // before. Arenas are sparse-uploaded by producers (each Light /
+    // Material / Transform / Camera node writes only its own slot);
+    // unused slots stay at their initial value. After a fresh
+    // RenderList (resize), every consumer indexing past the populated
+    // range reads device-memory garbage. Especially visible for lights:
+    // shaders compose world-space light positions via
+    // world_transforms.data[L.transform_slot], and L.color/range read
+    // from the RawLight arena — both arenas garbage on the resize
+    // frame produces the user's "wildly different lighting per
+    // resize" symptom (saturated colours, blown-out highlights, very
+    // dark, varying per attempt).
+    //
+    // Cost: ~4 MiB total upload per RenderList init across all arenas
+    // (RawCamera 2 KiB + RawLight 256 KiB + RawTransform 1 MiB +
+    // Material 2.5 MiB + Env 512 B). One-time per resize, negligible.
+    // RhiClearBuffer routes Dynamic buffers via chunked
+    // updateDynamicBuffer and Static buffers via uploadStaticBuffer
+    // — both fed from a thread-local zero pool so we don't pay a
+    // per-arena std::vector<char>(bytes, 0) allocation on every
+    // RenderList init.
+    RhiClearBuffer::clearBuffer(rhi, batch, a.buffer, 0, bytes);
+
+    a.slot_stride = cfg.slot_stride;
+    a.slot_count = cfg.slot_count;
+    a.usage = cfg.usage;
+    a.type = cfg.type;
+    // Generation table sized to slot_count. Start at 1 so a freshly-
+    // default gpu_slot_ref (generation=0) never matches a real slot.
+    a.slot_generations.assign(cfg.slot_count, 1u);
+    // Free-list stack: push slots in reverse order so pop yields slot
+    // index 0, 1, 2, ... in allocation order. Keeps the arena buffer
+    // densely packed at the front, which downstream tooling may assume.
+    a.free_slots.clear();
+    a.free_slots.reserve(cfg.slot_count);
+    for(uint32_t s = cfg.slot_count; s-- > 0;)
+      a.free_slots.push_back(s);
+  }
+
+  // Reserve Material arena slot 0 as the "default material" sentinel.
+  // arenaSlotForMaterial(nullptr) returns 0; seedDefaults() writes a
+  // white-dielectric MaterialGPU into that slot once a resource-update
+  // batch is available. Pop from the free-list now so no producer can
+  // claim it. (Other arenas keep slot 0 available — only Material has
+  // the "null fallback" semantics.)
+  {
+    auto& mat = m_arenas[(std::size_t)Arena::Material];
+    if(!mat.free_slots.empty() && mat.free_slots.back() == 0u)
+      mat.free_slots.pop_back();
+  }
+
+  // Mesh arena — one QRhiBuffer per attribute stream, plus TWO shared
+  // OffsetAllocators (vertex-units and index-units). See the
+  // "CRITICAL invariant" block in GpuResourceRegistry.hpp for why the
+  // allocators are NOT per-stream: a single baseVertex applies to all
+  // vertex bindings, so per-mesh byte offsets across streams must be
+  // proportional to per-stream stride. One allocator → one logical
+  // vertex slot → guaranteed lockstep.
+  for(std::size_t i = 0; i < m_meshStreams.size(); ++i)
+  {
+    auto& s = m_meshStreams[i];
+    const uint32_t bytes = kMeshCapBytes[i];
+
+    using UF = QRhiBuffer::UsageFlags;
+    UF usage;
+    if(i == (std::size_t)MeshStream::Indices)
+      usage = UF(QRhiBuffer::IndexBuffer);
+    else
+      usage = UF(QRhiBuffer::VertexBuffer | QRhiBuffer::StorageBuffer);
+
+    s.buffer = rhi.newBuffer(QRhiBuffer::Static, usage, bytes);
+    const char* names[(std::size_t)MeshStream::Count_] = {
+        "MeshArena::positions", "MeshArena::normals",
+        "MeshArena::texcoords", "MeshArena::tangents",
+        "MeshArena::colors",    "MeshArena::texcoords1",
+        "MeshArena::indices"};
+    s.buffer->setName(names[i]);
+    if(!s.buffer->create())
+    {
+      qWarning() << "GpuResourceRegistry: failed to create mesh arena stream"
+                 << names[i] << "— acquireMeshSlab will return null.";
+      delete s.buffer;
+      s.buffer = nullptr;
+      continue;
+    }
+    s.capacity_bytes = bytes;
+    s.usage = usage;
+  }
+
+  // Shared vertex/index allocators. Capacity in SLOTS, not bytes.
+  // For vertex slots: every vertex stream must accommodate
+  // capacity_slots × its_stride bytes. The min over the four vertex
+  // streams determines the safe cap.
+  uint32_t vertSlotCap = 0xFFFFFFFFu;
+  for(std::size_t i = 0; i < (std::size_t)MeshStream::Indices; ++i)
+  {
+    if(!m_meshStreams[i].buffer)
+    {
+      vertSlotCap = 0;
+      break;
+    }
+    vertSlotCap = std::min(
+        vertSlotCap, m_meshStreams[i].capacity_bytes / kMeshStride[i]);
+  }
+  m_vertexSlotsCapacity = vertSlotCap;
+  if(vertSlotCap > 0)
+  {
+    m_vertexAllocator = std::make_unique<OffsetAllocator::Allocator>(
+        vertSlotCap, 128u * 1024u);
+  }
+
+  const auto& idxStream = m_meshStreams[(std::size_t)MeshStream::Indices];
+  m_indexSlotsCapacity = idxStream.buffer
+      ? idxStream.capacity_bytes
+            / kMeshStride[(std::size_t)MeshStream::Indices]
+      : 0u;
+  if(m_indexSlotsCapacity > 0)
+  {
+    m_indexAllocator = std::make_unique<OffsetAllocator::Allocator>(
+        m_indexSlotsCapacity, 128u * 1024u);
+  }
+
+  m_vertexSlotsUsed = 0;
+  m_indexSlotsUsed = 0;
+}
+
+void GpuResourceRegistry::seedDefaults(QRhiResourceUpdateBatch& batch)
+{
+  if(m_defaults_seeded)
+    return;
+
+  // Material arena slot 0 — the default material returned by
+  // arenaSlotForMaterial(nullptr). MaterialGPU's in-class initializers
+  // are exactly the right defaults (white baseColor, metallic=0,
+  // roughness=0.5, occlusion=1, no emissive, all texture refs null), so
+  // a default-constructed instance is the byte payload we want.
+  auto& mat = m_arenas[(std::size_t)Arena::Material];
+  if(mat.buffer && mat.slot_stride >= sizeof(MaterialGPU))
+  {
+    MaterialGPU defaultMat{};
+    batch.uploadStaticBuffer(
+        mat.buffer, /*offset=*/0,
+        (quint32)sizeof(MaterialGPU), &defaultMat);
+  }
+
+  m_defaults_seeded = true;
+}
+
+void GpuResourceRegistry::destroy(RenderList& renderer)
+{
+  // Route every arena buffer release through RenderList::releaseBuffer
+  // so the RenderList's bookkeeping sees the release and the buffer is
+  // destroyed through the same code path as every other QRhiBuffer in
+  // the pipeline.
+  for(auto& a : m_arenas)
+  {
+    if(a.buffer)
+    {
+      renderer.releaseBuffer(a.buffer);
+      a.buffer = nullptr;
+    }
+    a.slot_stride = 0;
+    a.slot_count = 0;
+    for(auto& g : a.slot_generations)
+      ++g;
+    a.slot_generations.clear();
+    a.free_slots.clear();
+  }
+  m_defaults_seeded = false;
+  for(auto& ch : m_textureChannels)
+  {
+    for(auto& b : ch.buckets)
+    {
+      if(b.array)
+      {
+        b.array->deleteLater();
+        b.array = nullptr;
+      }
+      if(b.sampler)
+      {
+        b.sampler->deleteLater();
+        b.sampler = nullptr;
+      }
+      b.layers = 0;
+      b.layerMap.clear();
+    }
+    ch.buckets.clear();
+    ch.dynamicSlotMap.clear();
+    ch.dynamicTextures.clear();
+    ch.dynamicSlotLastUse.clear();
+    ch.dynamicSlotCounter = 0;
+  }
+  // Mesh arena teardown. Route through releaseBuffer (same invariant
+  // as the component arenas) so downstream MeshBuffers that still
+  // reference one of our slab offsets don't hit use-after-free.
+  for(auto& s : m_meshStreams)
+  {
+    if(s.buffer)
+    {
+      renderer.releaseBuffer(s.buffer);
+      s.buffer = nullptr;
+    }
+    s.capacity_bytes = 0;
+  }
+  m_vertexAllocator.reset();
+  m_indexAllocator.reset();
+  m_vertexSlotsCapacity = 0;
+  m_indexSlotsCapacity = 0;
+  m_vertexSlotsUsed = 0;
+  m_indexSlotsUsed = 0;
+  m_meshSlabs.clear();
+  m_pendingReleases.clear();
+  m_rhi = nullptr;
+}
+
+void GpuResourceRegistry::destroyOwned()
+{
+  // OutputNode-side teardown. The registry now persists across
+  // RenderList rebuilds (resize fast path), so destroy(RenderList&)'s
+  // RL-routed releaseBuffer path is bypassed during normal RL rebuild.
+  // When the OutputNode's QRhi is about to go away (destroyOutput,
+  // setSwapchainFormat, ~OutputNode), we have to tear down our QRhi
+  // resources directly — there is no live RenderList to plumb through
+  // and the QRhi is still alive (callers MUST invoke this BEFORE
+  // RenderState::destroy()).
+  //
+  // `delete` on a QRhiBuffer / QRhiTexture / QRhiSampler runs its
+  // destructor which calls destroy() on the underlying GPU resource
+  // and then frees the wrapper. Mirrors the direct deletes
+  // RenderList::release does for m_outputUBO / m_emptyTexture* — same
+  // safety contract (QRhi still alive).
+  for(auto& a : m_arenas)
+  {
+    delete a.buffer;
+    a.buffer = nullptr;
+    a.slot_stride = 0;
+    a.slot_count = 0;
+    for(auto& g : a.slot_generations)
+      ++g;
+    a.slot_generations.clear();
+    a.free_slots.clear();
+  }
+  m_defaults_seeded = false;
+  for(auto& ch : m_textureChannels)
+  {
+    for(auto& b : ch.buckets)
+    {
+      delete b.array;
+      b.array = nullptr;
+      delete b.sampler;
+      b.sampler = nullptr;
+      b.layers = 0;
+      b.layerMap.clear();
+    }
+    ch.buckets.clear();
+    ch.dynamicSlotMap.clear();
+    ch.dynamicTextures.clear();
+    ch.dynamicSlotLastUse.clear();
+    ch.dynamicSlotCounter = 0;
+  }
+  for(auto& s : m_meshStreams)
+  {
+    delete s.buffer;
+    s.buffer = nullptr;
+    s.capacity_bytes = 0;
+  }
+  m_vertexAllocator.reset();
+  m_indexAllocator.reset();
+  m_vertexSlotsCapacity = 0;
+  m_indexSlotsCapacity = 0;
+  m_vertexSlotsUsed = 0;
+  m_indexSlotsUsed = 0;
+  m_meshSlabs.clear();
+  m_pendingReleases.clear();
+  m_rhi = nullptr;
+}
+
+void GpuResourceRegistry::destroy()
+{
+  // Destructor fallback — nulls the buffer pointers without touching
+  // the QRhi. Safe when destroy(RenderList&) already ran; leaks the
+  // QRhiBuffer wrapper if QRhi has been torn down without a prior
+  // RenderList-routed release (deleteLater on a dangling buffer would
+  // crash, and leaking the wrapper is the lesser evil).
+  for(auto& a : m_arenas)
+  {
+    a.buffer = nullptr;
+    a.slot_stride = 0;
+    a.slot_count = 0;
+    for(auto& g : a.slot_generations)
+      ++g;
+    a.slot_generations.clear();
+    a.free_slots.clear();
+  }
+  m_defaults_seeded = false;
+  for(auto& ch : m_textureChannels)
+  {
+    // Do NOT deleteLater on textures here — if QRhi has already been
+    // torn down their storage is gone. Leak the wrapper, same rule
+    // as arena buffers above.
+    for(auto& b : ch.buckets)
+    {
+      b.array = nullptr;
+      b.sampler = nullptr;
+      b.layers = 0;
+      b.layerMap.clear();
+    }
+    ch.buckets.clear();
+    ch.dynamicSlotMap.clear();
+    ch.dynamicTextures.clear();
+    ch.dynamicSlotLastUse.clear();
+    ch.dynamicSlotCounter = 0;
+  }
+  // Mesh arena: null the buffers (leaking the wrappers, same rule);
+  // tear down allocators since those are pure CPU-side.
+  for(auto& s : m_meshStreams)
+  {
+    s.buffer = nullptr;
+    s.capacity_bytes = 0;
+  }
+  m_vertexAllocator.reset();
+  m_indexAllocator.reset();
+  m_vertexSlotsCapacity = 0;
+  m_indexSlotsCapacity = 0;
+  m_vertexSlotsUsed = 0;
+  m_indexSlotsUsed = 0;
+  m_meshSlabs.clear();
+  m_pendingReleases.clear();
+  m_rhi = nullptr;
+}
+
+const char* GpuResourceRegistry::textureChannelArrayName(TextureChannel ch) noexcept
+{
+  switch(ch)
+  {
+    case TextureChannel::BaseColor:  return "baseColorArray";
+    case TextureChannel::MetalRough: return "metalRoughArray";
+    case TextureChannel::Normal:     return "normalArray";
+    case TextureChannel::Emissive:   return "emissiveArray";
+    case TextureChannel::Occlusion:  return "occlusionArray";
+    default:                         return "";
+  }
+}
+
+const char* GpuResourceRegistry::textureChannelDynBaseName(TextureChannel ch) noexcept
+{
+  switch(ch)
+  {
+    case TextureChannel::BaseColor:  return "baseColorDyn";
+    case TextureChannel::MetalRough: return "metalRoughDyn";
+    case TextureChannel::Normal:     return "normalDyn";
+    case TextureChannel::Emissive:   return "emissiveDyn";
+    case TextureChannel::Occlusion:  return "occlusionDyn";
+    default:                         return "";
+  }
+}
+
+QRhiTexture::Flags GpuResourceRegistry::textureChannelFlags(TextureChannel ch) noexcept
+{
+  switch(ch)
+  {
+    case TextureChannel::BaseColor:
+    case TextureChannel::Emissive:
+      return QRhiTexture::sRGB;
+    // Occlusion is a single-channel data texture (R = occlusion). Linear,
+    // not sRGB. RGBA8 for now (we use only the R channel) — a future
+    // optimisation could route to R8 to save VRAM.
+    default:
+      return {};
+  }
+}
+
+
+int GpuResourceRegistry::resolveDynamicSlot(
+    TextureChannel channel, void* native_handle) noexcept
+{
+  if(!native_handle)
+    return -1;
+  auto* tex = static_cast<QRhiTexture*>(native_handle);
+  // Key by QRhi's monotonic globalResourceId rather than the raw
+  // pointer. The pointer can be recycled by the heap allocator after
+  // the previous QRhiTexture is destroyed (qrhivulkan.cpp:5909-5912
+  // documents this exact hazard for QRhi's own SRB tracking, which
+  // pairs the pointer with `m_id`). Using the id makes a stale entry
+  // simply mismatch instead of aliasing onto a fresh resource.
+  const quint64 key = tex->globalResourceId();
+  auto& ch = textureChannel(channel);
+  const uint64_t now = ++ch.dynamicSlotCounter;
+
+  // Hit: refresh access stamp and return existing slot.
+  auto it = ch.dynamicSlotMap.find(key);
+  if(it != ch.dynamicSlotMap.end())
+  {
+    const int slot = it->second;
+    if(slot >= 0 && slot < (int)ch.dynamicSlotLastUse.size())
+      ch.dynamicSlotLastUse[slot] = now;
+    return slot;
+  }
+
+  // Miss with room: append a new slot.
+  if((int)ch.dynamicTextures.size() < kMaxDynamicSlots)
+  {
+    const int slot = (int)ch.dynamicTextures.size();
+    ch.dynamicSlotMap[key] = slot;
+    ch.dynamicTextures.push_back(tex);
+    ch.dynamicSlotLastUse.push_back(now);
+    return slot;
+  }
+
+  // Miss with full map: LRU-evict the slot with the oldest access stamp.
+  // Without this branch a long session that swaps capture sources or
+  // resizes a video texture more than kMaxDynamicSlots times pinned the
+  // map at its initial entries; every subsequent texture returned -1 and
+  // dynamic-textured materials silently blanked.
+  int victim = 0;
+  uint64_t victimStamp = ch.dynamicSlotLastUse[0];
+  for(int i = 1; i < (int)ch.dynamicSlotLastUse.size(); ++i)
+  {
+    if(ch.dynamicSlotLastUse[i] < victimStamp)
+    {
+      victim = i;
+      victimStamp = ch.dynamicSlotLastUse[i];
+    }
+  }
+  // Drop the old key→slot mapping (linear scan since flat_map keys are
+  // ids, not slot indices). N is bounded by kMaxDynamicSlots so this is
+  // a few comparisons.
+  for(auto it2 = ch.dynamicSlotMap.begin(); it2 != ch.dynamicSlotMap.end(); ++it2)
+  {
+    if(it2->second == victim)
+    {
+      ch.dynamicSlotMap.erase(it2);
+      break;
+    }
+  }
+  ch.dynamicSlotMap[key] = victim;
+  ch.dynamicTextures[victim] = tex;
+  ch.dynamicSlotLastUse[victim] = now;
+  return victim;
+}
+
+
+GpuResourceRegistry::Slot GpuResourceRegistry::allocate(Arena arena, uint32_t size)
+{
+  Slot slot;
+  slot.arena = arena;
+  slot.size = size;
+
+  auto& a = m_arenas[(std::size_t)arena];
+  if(!a.buffer || a.slot_stride == 0)
+  {
+    qWarning() << "GpuResourceRegistry::allocate: arena"
+               << (int)arena << "is not initialised";
+    return slot;
+  }
+  if(size > a.slot_stride)
+  {
+    qWarning() << "GpuResourceRegistry::allocate: requested size"
+               << size << "exceeds arena"
+               << kArenaConfigs[(std::size_t)arena].name << "stride"
+               << a.slot_stride;
+    return slot;
+  }
+  if(a.free_slots.empty())
+  {
+    qWarning() << "GpuResourceRegistry::allocate: arena"
+               << kArenaConfigs[(std::size_t)arena].name
+               << "is full — all" << a.slot_count << "slots in use";
+    return slot;
+  }
+  slot.slot_index = a.free_slots.back();
+  a.free_slots.pop_back();
+  // Bump and stamp the generation. Any gpu_slot_ref still holding the
+  // previous generation for this slot index will fail isLive().
+  slot.generation = ++a.slot_generations[slot.slot_index];
+  return slot;
+}
+
+void GpuResourceRegistry::free(Slot& slot)
+{
+  if(!slot.valid())
+    return;
+  auto& a = m_arenas[(std::size_t)slot.arena];
+  if(slot.slot_index < a.slot_generations.size())
+  {
+    // Bump the generation first so any dangling ref from this Slot
+    // fails isLive() regardless of whether the slot gets re-allocated.
+    ++a.slot_generations[slot.slot_index];
+    a.free_slots.push_back(slot.slot_index);
+  }
+  slot.slot_index = Slot::kInvalidIndex;
+  slot.generation = 0;
+}
+
+QRhiBuffer* GpuResourceRegistry::buffer(Arena arena) const noexcept
+{
+  return m_arenas[(std::size_t)arena].buffer;
+}
+
+uint32_t GpuResourceRegistry::slotOffset(const Slot& slot) const noexcept
+{
+  if(!slot.valid())
+    return 0u;
+  return slot.slot_index * m_arenas[(std::size_t)slot.arena].slot_stride;
+}
+
+uint32_t GpuResourceRegistry::arenaSlotStride(Arena arena) const noexcept
+{
+  return m_arenas[(std::size_t)arena].slot_stride;
+}
+
+uint32_t GpuResourceRegistry::arenaSlotCount(Arena arena) const noexcept
+{
+  return m_arenas[(std::size_t)arena].slot_count;
+}
+
+void GpuResourceRegistry::updateSlot(
+    QRhiResourceUpdateBatch& res, const Slot& slot, const void* data,
+    uint32_t size) noexcept
+{
+  if(!slot.valid() || !data || size == 0)
+    return;
+  auto& a = m_arenas[(std::size_t)slot.arena];
+  if(!a.buffer)
+    return;
+
+  const uint32_t offset = slotOffset(slot);
+  SCORE_ASSERT(offset + size <= a.slot_stride * a.slot_count);
+
+  if(a.type == QRhiBuffer::Dynamic)
+    res.updateDynamicBuffer(a.buffer, offset, size, data);
+  else
+    res.uploadStaticBuffer(a.buffer, offset, size, data);
+}
+
+// ─── Mesh arena manager ──────────────────────────────────────────
+
+GpuResourceRegistry::MeshSlab* GpuResourceRegistry::acquireMeshSlab(
+    uint64_t stable_id, uint32_t vertex_count, uint32_t index_count,
+    uint32_t current_frame) noexcept
+{
+  if(stable_id == 0)
+    return nullptr;  // caller without stable_id — skip slab caching
+
+  // Fast path: existing slab, same counts. Zero-cost hit.
+  auto it = m_meshSlabs.find(stable_id);
+  if(it != m_meshSlabs.end())
+  {
+    auto& slab = it->second;
+    if(slab.vertex_count == vertex_count && slab.index_count == index_count)
+    {
+      slab.freshly_allocated = false;
+      return &slab;
+    }
+    // Count mismatch — same mesh primitive re-emitting with different
+    // counts. Defer the free to the grace queue so an in-flight draw
+    // referencing the old offset doesn't read freed-and-reused bytes.
+    //
+    // Stamp `released_frame = current_frame` so the next sweep waits
+    // `grace` frames *from this enqueue*, matching QRhi's deferred-
+    // release contract (which keys on the submission frame slot, not 0).
+    // Stamping 0 here would collapse the safety to "wait `grace` frames
+    // after boot" — a one-time delay that vanishes the moment
+    // current_frame >= grace, after which every count-mismatch enqueue
+    // is freed on the very next sweep (same-frame UAF).
+    //
+    // Decrement the *Used trackers eagerly here so the new allocation
+    // below sees an accurate "live slabs" footprint while the old slot
+    // sits in pending-releases. The actual OffsetAllocator::free runs
+    // in sweepMeshSlabs phase-2 once `released_frame + grace <=
+    // current_frame`, but that path will NOT decrement again (single
+    // decrement per slab — at logical-release time).
+    if(m_vertexAllocator
+       && slab.vertex_slot.metadata != OffsetAllocator::Allocation::NO_SPACE)
+    {
+      const auto sz = m_vertexAllocator->allocationSize(slab.vertex_slot);
+      if(m_vertexSlotsUsed >= sz)
+        m_vertexSlotsUsed -= sz;
+    }
+    if(m_indexAllocator
+       && slab.index_slot.metadata != OffsetAllocator::Allocation::NO_SPACE)
+    {
+      const auto sz = m_indexAllocator->allocationSize(slab.index_slot);
+      if(m_indexSlotsUsed >= sz)
+        m_indexSlotsUsed -= sz;
+    }
+    PendingRelease pr;
+    pr.stable_id = stable_id;
+    pr.released_frame = current_frame;
+    pr.vertex_slot = slab.vertex_slot;
+    pr.index_slot = slab.index_slot;
+    m_pendingReleases.push_back(pr);
+    m_meshSlabs.erase(it);
+  }
+
+  // Drain any pending releases that have served their grace BEFORE
+  // attempting the fresh allocate. Otherwise an immediate count-mismatch
+  // (this call) plus a previously-queued release that is grace-elapsed
+  // would force the OffsetAllocator to find space for `new + old` bytes,
+  // even though the old bytes are safe to reuse — manifesting as a
+  // spurious "vertex/index pool exhausted" qWarning under live-edit on
+  // a near-capacity scene. The same `grace=2` invariant that
+  // sweepMeshSlabs uses is preserved here.
+  drainExpiredPendingReleases(current_frame, /*grace=*/2u);
+
+  if(!m_vertexAllocator || !m_indexAllocator)
+    return nullptr;
+
+  // Fresh allocation. ONE vertex slot (in vertex units) shared by
+  // positions/normals/texcoords/tangents, ONE index slot.
+  MeshSlab slab;
+  slab.stable_id = stable_id;
+  slab.vertex_count = vertex_count;
+  slab.index_count = index_count;
+  slab.freshly_allocated = true;
+
+  if(vertex_count > 0)
+  {
+    slab.vertex_slot = m_vertexAllocator->allocate(vertex_count);
+    if(slab.vertex_slot.offset == OffsetAllocator::Allocation::NO_SPACE)
+    {
+      qWarning() << "GpuResourceRegistry::acquireMeshSlab: vertex pool "
+                    "exhausted (requested"
+                 << vertex_count << "verts; free"
+                 << m_vertexAllocator->storageReport().totalFreeSpace
+                 << "vertex slots). Skipping mesh stable_id="
+                 << qulonglong(stable_id);
+      return nullptr;
+    }
+    m_vertexSlotsUsed += vertex_count;
+  }
+  BUFTRACE() << "[MeshSlab] alloc id=" << qulonglong(stable_id)
+             << " vc=" << vertex_count << " ic=" << index_count
+             << " vSlot=" << slab.vertex_slot.offset
+             << " (used=" << m_vertexSlotsUsed << "/" << m_vertexSlotsCapacity
+             << ")";
+
+  if(index_count > 0)
+  {
+    slab.index_slot = m_indexAllocator->allocate(index_count);
+    if(slab.index_slot.offset == OffsetAllocator::Allocation::NO_SPACE)
+    {
+      qWarning() << "GpuResourceRegistry::acquireMeshSlab: index pool "
+                    "exhausted (requested"
+                 << index_count << "indices; free"
+                 << m_indexAllocator->storageReport().totalFreeSpace
+                 << "index slots). Skipping mesh stable_id="
+                 << qulonglong(stable_id);
+      // Roll back the vertex allocation we just made.
+      if(vertex_count > 0
+         && slab.vertex_slot.metadata != OffsetAllocator::Allocation::NO_SPACE)
+      {
+        m_vertexAllocator->free(slab.vertex_slot);
+        if(m_vertexSlotsUsed >= vertex_count)
+          m_vertexSlotsUsed -= vertex_count;
+      }
+      return nullptr;
+    }
+    m_indexSlotsUsed += index_count;
+  }
+
+  const auto [inserted_it, ok] = m_meshSlabs.emplace(stable_id, slab);
+  return ok ? &inserted_it->second : nullptr;
+}
+
+void GpuResourceRegistry::markMeshSlabSeen(
+    uint64_t stable_id, uint32_t current_frame) noexcept
+{
+  auto it = m_meshSlabs.find(stable_id);
+  if(it != m_meshSlabs.end())
+    it->second.last_seen_frame = current_frame;
+}
+
+void GpuResourceRegistry::drainExpiredPendingReleases(
+    uint32_t current_frame, uint32_t grace) noexcept
+{
+  // Process the grace queue: any release submitted at least `grace`
+  // frames ago is safe to actually free from the OffsetAllocator now.
+  // The *Used trackers are NOT decremented here — the enqueue site
+  // (releaseMeshSlab / sweepMeshSlabs phase-1 / acquireMeshSlab's
+  // count-mismatch path) decrements eagerly so callers see "live
+  // slabs" as the footprint, not "live + grace-pending".
+  for(auto it = m_pendingReleases.begin(); it != m_pendingReleases.end();)
+  {
+    if(current_frame >= grace
+       && it->released_frame + grace <= current_frame)
+    {
+      BUFTRACE() << "[MeshSlab] free  id=" << qulonglong(it->stable_id)
+                 << " vSlot=" << it->vertex_slot.offset
+                 << " iSlot=" << it->index_slot.offset
+                 << " released_at=" << it->released_frame
+                 << " current=" << current_frame;
+      if(m_vertexAllocator
+         && it->vertex_slot.metadata != OffsetAllocator::Allocation::NO_SPACE)
+      {
+        m_vertexAllocator->free(it->vertex_slot);
+      }
+      if(m_indexAllocator
+         && it->index_slot.metadata != OffsetAllocator::Allocation::NO_SPACE)
+      {
+        m_indexAllocator->free(it->index_slot);
+      }
+      it = m_pendingReleases.erase(it);
+    }
+    else
+    {
+      ++it;
+    }
+  }
+}
+
+void GpuResourceRegistry::sweepMeshSlabs(
+    uint32_t current_frame, uint32_t grace) noexcept
+{
+  // Two-phase: move slabs past their grace into m_pendingReleases
+  // (carrying their vertex+index Allocations), then process already-
+  // pending releases whose grace has elapsed and actually free from
+  // the OffsetAllocators.
+  //
+  // The grace period guards against use-after-free: an
+  // indirect_draw_cmds entry issued last frame may still reference
+  // the slab's byte offset through an in-flight draw on the GPU.
+  // Waiting `grace >= FramesInFlight + 1` frames ensures the GPU is
+  // done with it.
+  for(auto it = m_meshSlabs.begin(); it != m_meshSlabs.end();)
+  {
+    // Underflow-safe comparison: if current_frame is less than grace,
+    // nothing is old enough yet.
+    if(current_frame >= grace
+       && it->second.last_seen_frame + grace <= current_frame)
+    {
+      // Eagerly decrement *Used trackers at logical-release time so
+      // the per-frame "live footprint" reflects active slabs only,
+      // not grace-pending ones. Phase-2 (drainExpiredPendingReleases)
+      // performs the OffsetAllocator::free without re-decrementing.
+      if(m_vertexAllocator
+         && it->second.vertex_slot.metadata
+                != OffsetAllocator::Allocation::NO_SPACE)
+      {
+        const auto sz
+            = m_vertexAllocator->allocationSize(it->second.vertex_slot);
+        if(m_vertexSlotsUsed >= sz) m_vertexSlotsUsed -= sz;
+      }
+      if(m_indexAllocator
+         && it->second.index_slot.metadata
+                != OffsetAllocator::Allocation::NO_SPACE)
+      {
+        const auto sz
+            = m_indexAllocator->allocationSize(it->second.index_slot);
+        if(m_indexSlotsUsed >= sz) m_indexSlotsUsed -= sz;
+      }
+      PendingRelease pr;
+      pr.stable_id = it->first;
+      pr.released_frame = current_frame;
+      pr.vertex_slot = it->second.vertex_slot;
+      pr.index_slot = it->second.index_slot;
+      m_pendingReleases.push_back(pr);
+      it = m_meshSlabs.erase(it);
+    }
+    else
+    {
+      ++it;
+    }
+  }
+
+  drainExpiredPendingReleases(current_frame, grace);
+}
+
+void GpuResourceRegistry::releaseMeshSlab(
+    uint64_t stable_id, uint32_t current_frame) noexcept
+{
+  auto it = m_meshSlabs.find(stable_id);
+  if(it == m_meshSlabs.end())
+    return;
+  // Route through the pending-releases grace queue rather than freeing the
+  // OffsetAllocator sub-allocation immediately. The backing QRhiBuffer is
+  // long-lived; only the sub-allocation offset is guarded here. Freeing it
+  // at once would let the allocator hand the same offset out again this frame,
+  // producing a UAF for any in-flight GPU draw that still references it.
+  // sweepMeshSlabs() drains m_pendingReleases once released_frame + grace <=
+  // current_frame, matching QRhi's own deferred-release contract.
+  //
+  // Eagerly decrement *Used trackers at logical-release time (single
+  // decrement per slab; phase-2 drain does not re-decrement).
+  if(m_vertexAllocator
+     && it->second.vertex_slot.metadata
+            != OffsetAllocator::Allocation::NO_SPACE)
+  {
+    const auto sz
+        = m_vertexAllocator->allocationSize(it->second.vertex_slot);
+    if(m_vertexSlotsUsed >= sz) m_vertexSlotsUsed -= sz;
+  }
+  if(m_indexAllocator
+     && it->second.index_slot.metadata
+            != OffsetAllocator::Allocation::NO_SPACE)
+  {
+    const auto sz
+        = m_indexAllocator->allocationSize(it->second.index_slot);
+    if(m_indexSlotsUsed >= sz) m_indexSlotsUsed -= sz;
+  }
+  PendingRelease pr;
+  pr.stable_id = stable_id;
+  pr.released_frame = current_frame;
+  pr.vertex_slot = it->second.vertex_slot;
+  pr.index_slot = it->second.index_slot;
+  m_pendingReleases.push_back(pr);
+  m_meshSlabs.erase(it);
+}
+
+uint32_t GpuResourceRegistry::meshSlabOffsetBytes(
+    const MeshSlab& slab, MeshStream stream) const noexcept
+{
+  // Single source of truth for per-stream byte offsets:
+  //   vertex streams → vertex_slot.offset (in vertex units) × stride
+  //   index  stream  → index_slot.offset  (in index  units) × 4
+  // Independent allocators per stream would let these diverge, which
+  // would silently produce wrong attribute reads under fragmentation.
+  if(stream == MeshStream::Indices)
+    return slab.index_slot.offset
+        * kMeshStride[(std::size_t)MeshStream::Indices];
+  return slab.vertex_slot.offset * kMeshStride[(std::size_t)stream];
+}
+
+QRhiBuffer* GpuResourceRegistry::meshStreamBuffer(MeshStream s) const noexcept
+{
+  return m_meshStreams[(std::size_t)s].buffer;
+}
+
+void GpuResourceRegistry::uploadMeshStream(
+    QRhiResourceUpdateBatch& res, const MeshSlab& slab,
+    MeshStream s, const void* data, uint32_t size) noexcept
+{
+  auto& stream = m_meshStreams[(std::size_t)s];
+  if(!stream.buffer || !data || size == 0)
+    return;
+  const uint32_t offset = meshSlabOffsetBytes(slab, s);
+  // Guard against out-of-bounds writes. Slab capacity in bytes:
+  //   vertex streams: vertex_count × stride
+  //   index  stream:  index_count  × 4
+  const uint32_t slot_capacity_bytes
+      = (s == MeshStream::Indices)
+          ? slab.index_count * kMeshStride[(std::size_t)MeshStream::Indices]
+          : slab.vertex_count * kMeshStride[(std::size_t)s];
+  if(size > slot_capacity_bytes)
+  {
+    qWarning() << "GpuResourceRegistry::uploadMeshStream: upload" << size
+               << "bytes exceeds slab capacity" << slot_capacity_bytes
+               << "(stream" << (int)s << ")";
+    return;
+  }
+  if(offset + size > stream.capacity_bytes)
+  {
+    qWarning() << "GpuResourceRegistry::uploadMeshStream: upload offset+size"
+               << (offset + size) << "exceeds stream capacity"
+               << stream.capacity_bytes << "(stream" << (int)s << ")";
+    return;
+  }
+  res.uploadStaticBuffer(stream.buffer, offset, size, data);
+}
+
+uint32_t GpuResourceRegistry::meshStreamUsedBytes(MeshStream s) const noexcept
+{
+  if(s == MeshStream::Indices)
+    return m_indexSlotsUsed * kMeshStride[(std::size_t)MeshStream::Indices];
+  return m_vertexSlotsUsed * kMeshStride[(std::size_t)s];
+}
+
+uint32_t GpuResourceRegistry::meshStreamFreeBytes(MeshStream s) const noexcept
+{
+  if(s == MeshStream::Indices)
+  {
+    if(!m_indexAllocator) return 0u;
+    return m_indexAllocator->storageReport().totalFreeSpace
+        * kMeshStride[(std::size_t)MeshStream::Indices];
+  }
+  if(!m_vertexAllocator) return 0u;
+  return m_vertexAllocator->storageReport().totalFreeSpace
+      * kMeshStride[(std::size_t)s];
+}
+
+} // namespace score::gfx
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/GpuResourceRegistry.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/GpuResourceRegistry.hpp
new file mode 100644
index 0000000000..93a6d7259d
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/GpuResourceRegistry.hpp
@@ -0,0 +1,796 @@
+#pragma once
+
+#include <score_plugin_gfx_export.h>
+
+#include <ossia/dataflow/geometry_port.hpp>  // ossia::gpu_slot_ref
+#include <ossia/detail/flat_map.hpp>
+#include <ossia/detail/hash_map.hpp>
+
+#ifndef OFFSETALLOCATOR_HPP_2026_04_24
+#define OFFSETALLOCATOR_HPP_2026_04_24
+#include <offsetAllocator.hpp>
+#endif
+
+#include <QtGui/private/qrhi_p.h>
+
+#include <array>
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+namespace score::gfx
+{
+class RenderList;
+
+/**
+ * @brief Per-RenderList arena store for GPU-resident scene data.
+ *
+ * Owns one QRhiBuffer per well-known arena kind (camera UBO, light SSBO,
+ * material SSBO, per-draw SSBO, …) and hands out offset-based slots via
+ * a fixed-stride free-list. Source nodes (Camera, Light, PBRMesh, …) hold
+ * a slot for their lifetime and write their packed bytes into it at
+ * their own `update()`; the preprocessor binds the registry's buffers as
+ * scene auxiliaries. No CPU→GPU work happens in the preprocessor's render
+ * path — every upload is gated to a source-node message.
+ *
+ * Phase 1: scalar UBO / SSBO arenas only. Texture-array layer
+ * allocation (baseColorArray, metalRoughArray, …) stays inside the
+ * existing ScenePreprocessor::ChannelState for now; it will migrate into
+ * this registry in a later pass.
+ *
+ * Lifetime: created on RenderList::init, destroyed on RenderList::release.
+ * Not thread-safe — all calls must come from the render thread.
+ */
+class SCORE_PLUGIN_GFX_EXPORT GpuResourceRegistry
+{
+public:
+  // Well-known arenas. Size tables live in GpuResourceRegistry.cpp and
+  // match the packed GPU layouts declared in SceneGPUState.hpp +
+  // CameraMath.hpp. Extend the enum carefully — every entry implies a
+  // QRhiBuffer allocation at init time.
+  //
+  // The Raw* arenas are written by source halp nodes (Camera, Light,
+  // Transform3D, …) at their own operator()() time — view-independent,
+  // aspect-ratio-agnostic, pre-composition. The Cooked arenas (Camera,
+  // Light, PerDraw, WorldTransform) are populated by ScenePreprocessor's
+  // transform passes that combine Raw inputs with the current render
+  // target's aspect ratio and the scene-graph parent-slot chain.
+  // Consumer shaders bind the Cooked arenas. Material and Env are
+  // raw == cooked — they have no scene-composition dependency, so
+  // source nodes write directly into the cooked slot without a
+  // separate raw stage.
+  enum class Arena : uint8_t
+  {
+    // ── Shared / source-authored ──────────────────────────────────
+    // These arenas hold view- and filter-independent bytes: every
+    // preprocessor reads the same data regardless of its camera /
+    // render target / upstream scene filtering. The producer owns the
+    // slot; multiple preprocessors consume via gpu_slot_ref + isLive().
+    RawCamera,        // RawCameraData      — 64 B per slot, UBO
+    RawLight,         // RawLightData       — 64 B per slot, SSBO
+    RawTransform,     // RawLocalTransform  — 64 B per slot, SSBO
+    Material,         // MaterialGPU        — 64 B per slot, SSBO
+    Env,              // EnvParamsUBO       — 64 B per slot, UBO
+
+    // Cooked outputs (camera UBOs, composed world matrices, per-draw
+    // structs, LightGPU with world-direction, MaterialGPU with resolved
+    // textureRefs) are preprocessor-PRIVATE and live in each
+    // ScenePreprocessorNode's own QRhiBuffers — they're view- and
+    // filter-dependent, so a shared arena would be incorrect when two
+    // preprocessors see different filtered views of the same source.
+
+    Count_
+  };
+
+  // Fixed-stride slot. The arena buffer is laid out as a packed array of
+  // stride-byte slots: slot i lives at byte offset i * stride. The slot
+  // index is the arena-level identity that consumer shaders use to
+  // address the slot as `scene_materials.entries[slot_index]` (std430
+  // stride = sizeof(MaterialGPU)), `scene_lights.entries[slot_index]`,
+  // etc. Allocations are O(1) via a free-list stack; no bucket / bitmap
+  // fragmentation. Trades OffsetAllocator's variable-size flexibility
+  // for (a) shader-indexable layout and (b) a predictable 1:1 mapping
+  // between internal_index and byte offset — critical for direct arena
+  // reads without a per-draw offset-translation table.
+  struct Slot
+  {
+    static constexpr uint32_t kInvalidIndex = 0xFFFFFFFFu;
+
+    Arena arena{Arena::RawCamera};
+    uint32_t slot_index{kInvalidIndex};
+    uint32_t size{0};        // requested payload size (≤ arena stride)
+    uint32_t generation{};   // stamped on allocate; bumps on free
+
+    bool valid() const noexcept { return slot_index != kInvalidIndex; }
+  };
+
+  GpuResourceRegistry() = default;
+  GpuResourceRegistry(const GpuResourceRegistry&) = delete;
+  GpuResourceRegistry& operator=(const GpuResourceRegistry&) = delete;
+  ~GpuResourceRegistry();
+
+  /**
+   * @brief Create the arena buffers. Must be called before any allocate().
+   *
+   * Per-arena capacity is fixed at init time (grow-in-place reallocation
+   * is a follow-up). If an arena runs out of room, allocate() returns
+   * an invalid Slot and logs a warning.
+   *
+   * Persist-across-rebuild contract: the registry now lives on the
+   * OutputNode and survives RenderList rebuilds (e.g. viewport resize).
+   * The owning OutputNode lazy-calls init() exactly once for a given
+   * QRhi lifetime. Subsequent createRenderList calls reuse the registry
+   * as-is (texture arrays, mesh slabs, arena slot generations all
+   * preserved). Use isInitialized() to detect "registry already up".
+   */
+  void init(QRhi& rhi, QRhiResourceUpdateBatch& batch);
+
+  /**
+   * @brief True if init() has been called and destroyOwned()/destroy()
+   * has not. Used by RenderList::init to gate the (otherwise asserting)
+   * init() call when the registry is being reused across an RL rebuild.
+   */
+  bool isInitialized() const noexcept { return m_rhi != nullptr; }
+
+  /**
+   * @brief QRhi this registry was init()'d against. Null when not
+   * initialised. The owning OutputNode uses this to decide whether
+   * the registry is still bound to its QRhi (vs. a fresh QRhi created
+   * after a setSwapchainFormat-style teardown).
+   */
+  QRhi* boundRhi() const noexcept { return m_rhi; }
+
+  /**
+   * @brief Seed reserved arena slots with sensible defaults.
+   *
+   * Called by the owning RenderList after init() and after the initial
+   * resource-update batch is ready. Currently writes a default
+   * white-dielectric MaterialGPU into Material arena slot 0 — the slot
+   * `arenaSlotForMaterial(nullptr)` returns when a draw has no
+   * material assigned (e.g. a Primitive cube with the user never
+   * having dropped a Material node on it). Without this seed, slot 0
+   * carries whatever bytes the previous registered material left
+   * behind, producing the confusing "every unmaterialed mesh is red
+   * because the first registered material was red" symptom.
+   *
+   * Idempotent — second call is a no-op once @c m_defaults_seeded is
+   * set.
+   */
+  void seedDefaults(QRhiResourceUpdateBatch& batch);
+
+  /**
+   * @brief Destroy the arena buffers via the owning RenderList.
+   *
+   * Every arena QRhiBuffer is routed through @c RenderList::releaseBuffer
+   * so the RenderList's bookkeeping sees the release and any other path
+   * that still holds a pointer to the buffer can't accidentally double-
+   * free it. Prefer this overload; call it from RenderList::release()
+   * before the QRhi teardown.
+   */
+  void destroy(RenderList& renderer);
+
+  /**
+   * @brief Destructor fallback — buffers are nulled without touching the
+   * QRhi. Only safe when @ref destroy(RenderList&) has already run (or
+   * when the QRhi has already torn them down as children). Leaks the
+   * QRhiBuffer wrappers otherwise; that's the lesser evil vs. a
+   * use-after-free in the common "QRhi already dead" path.
+   */
+  void destroy();
+
+  /**
+   * @brief Tear down arena buffers + texture arrays + mesh streams
+   * directly (no RenderList plumbing). Called by the owning OutputNode
+   * when its QRhi is about to be destroyed (destroyOutput, ~OutputNode).
+   *
+   * Persist-across-rebuild contract: the registry survives across RL
+   * rebuilds (RenderList::release is a no-op for the registry now), so
+   * the QRhi-routed teardown that used to happen in destroy(RenderList&)
+   * has no live RenderList to run through any more. We `delete` the
+   * QRhiBuffer / QRhiTexture / QRhiSampler wrappers directly: the QRhi
+   * is still alive at this call site (callers MUST invoke this BEFORE
+   * RenderState::destroy() / setSwapchainFormat-style teardown), so the
+   * destructors free both the wrapper and the underlying GPU resource
+   * cleanly. After this call the registry is back to its pre-init()
+   * state and can be re-init()'d against a new QRhi.
+   */
+  void destroyOwned();
+
+  /**
+   * @brief Reserve a slot in the given arena for @p size bytes.
+   * @return invalid Slot on OOM. Caller must check Slot::valid().
+   */
+  Slot allocate(Arena arena, uint32_t size);
+
+  /**
+   * @brief Return the slot to the free list. Safe to call with invalid Slot.
+   */
+  void free(Slot& slot);
+
+  /**
+   * @brief Buffer underlying an arena. Null until init().
+   *
+   * Downstream consumers (preprocessor, rasterizer SRBs) bind this buffer
+   * with the slot offset + size from Slot.
+   */
+  QRhiBuffer* buffer(Arena arena) const noexcept;
+
+  /**
+   * @brief Byte offset of a slot inside its arena's buffer.
+   */
+  uint32_t slotOffset(const Slot& slot) const noexcept;
+
+  /**
+   * @brief Byte stride of the arena — every slot is this many bytes.
+   * Consumer shaders index `arena.entries[slot_index]` where entries[]
+   * has std430 stride equal to this value.
+   */
+  uint32_t arenaSlotStride(Arena arena) const noexcept;
+
+  /**
+   * @brief Slot capacity of the arena (number of slots, not bytes).
+   */
+  uint32_t arenaSlotCount(Arena arena) const noexcept;
+
+  /**
+   * @brief Upload @p size bytes starting at @p data into a slot.
+   *
+   * Thin wrapper around `QRhiResourceUpdateBatch::updateDynamicBuffer`
+   * (for Dynamic-usage arenas) or `uploadStaticBuffer` (Static).
+   * Called by source nodes in their `update()` when their content
+   * changes — never per frame for unchanged data.
+   */
+  void updateSlot(
+      QRhiResourceUpdateBatch& res, const Slot& slot, const void* data,
+      uint32_t size) noexcept;
+
+  /**
+   * @brief Produce an ossia::gpu_slot_ref that can be stamped on a
+   * scene-graph component for the downstream preprocessor to consume.
+   *
+   * The returned ref captures (arena tag, offset, size, internal slot
+   * index, generation). The preprocessor uses isLive() to validate it
+   * before reading GPU bytes.
+   */
+  ossia::gpu_slot_ref toOssiaRef(const Slot& slot) const noexcept
+  {
+    if(!slot.valid())
+      return {};
+    ossia::gpu_slot_ref r;
+    r.arena = (uint32_t)slot.arena;
+    r.offset = slotOffset(slot);
+    r.size = slot.size;
+    r.internal_index = slot.slot_index;
+    r.generation = slot.generation;
+    return r;
+  }
+
+  /**
+   * @brief Return true if the ref still points at a live allocation.
+   *
+   * O(1): one array access + one uint32 compare. The generation table
+   * is bumped on every allocate() and free(), so a ref from a prior
+   * allocation at the same slot index fails the compare.
+   */
+  bool isLive(const ossia::gpu_slot_ref& r) const noexcept
+  {
+    if(r.arena >= (uint32_t)Arena::Count_ || r.size == 0)
+      return false;
+    const auto& a = m_arenas[r.arena];
+    if(r.internal_index >= a.slot_generations.size())
+      return false;
+    return a.slot_generations[r.internal_index] == r.generation;
+  }
+
+  // ─── Material texture arrays ──────────────────────────────────────
+  //
+  // Per-channel static texture arrays shared across all preprocessors
+  // in this RenderList. Static textures dedup by texture_source pointer
+  // — every producer that references the same asset gets the same
+  // layer. Dynamic handles (video textures, runtime GPU outputs) get
+  // per-slot bindings in the `dynamicTextures` vector — the bound
+  // aux-texture name is `<channel>Dyn<slot>` in consumer shaders.
+  //
+  // Source-authored by nature: the textures belong to an asset / a
+  // wired GPU handle, independent of which preprocessor is looking.
+  // Shared state avoids re-decoding + re-uploading the same JPEG for
+  // every preprocessor.
+
+  enum class TextureChannel : uint8_t
+  {
+    BaseColor  = 0,
+    MetalRough = 1,
+    Normal     = 2,
+    Emissive   = 3,
+    Occlusion  = 4,  // Separate glTF occlusionTexture (when distinct from MR).
+    Count_     = 5
+  };
+
+  // Default layer size + max dynamic slots. Matched across channels so
+  // samplers are interchangeable and consumer shaders can declare a
+  // fixed sampler count.
+  static constexpr int kTextureLayerSize = 1024;
+  // Bumped from 2 to 4: with LRU eviction in place the cap matters less
+  // (recycled slots stay fresh), but a higher floor reduces churn in
+  // scenes that legitimately use 3-4 distinct dynamic textures per
+  // channel (multi-camera capture, layered video). Stays comfortably
+  // under the 16-samplers-per-stage RHI floor at 4 channels × 4 slots
+  // + static arrays + skybox/IBL.
+  static constexpr int kMaxDynamicSlots  = 4;
+
+  // Wave 2 S2-shader: per-channel static buckets. Each bucket holds
+  // textures of ONE (format, pixelSize) tuple. Distinct tuples go into
+  // distinct buckets; consumer shaders declare N `sampler2DArray`s per
+  // channel and switch on the bucket field decoded from
+  // MaterialGPU::textureRefs (see tex_ref_static in SceneGPUState.hpp).
+  //
+  // Runtime cap is 16 (kMaxBuckets), chosen to stay within Vulkan's
+  // default VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER pool budget: 5
+  // channels × 16 buckets + ~10 dynamic slots ≈ 90 samplers per
+  // pipeline, well under 256. Real scenes typically need 1-3 buckets
+  // per channel. Shader sampler arrays in classic_pbr_full.frag MUST
+  // stay in sync (baseColorArray0..baseColorArray15 etc).
+  //
+  // The tex_ref_static encoding (SceneGPUState.hpp:74) reserves a 7-bit
+  // bucket field (0..127), giving headroom to grow kMaxBuckets up to 128
+  // without changing the packed layout or shader decode masks. Growing
+  // beyond 16 requires enlarging the shader array declarations and
+  // verifying the descriptor pool budget on the target backend.
+  //
+  // GLES 3.1 / WebGL 2 guarantee only 16 textures per stage; those
+  // targets need a reduced-bucket preset variant (follow-up).
+  //
+  // Small scenes pay nothing: buckets are allocated lazily as texture
+  // uploads discover new (format, size) combinations.
+  static constexpr int kMaxBuckets = 16;
+
+  /**
+   * @brief Channel texture state with multi-bucket support.
+   *
+   * The MaterialGPU::textureRefs[] encoding is
+   * `source:2 | bucket:7 | layer:23` — the 7-bit bucket field
+   * addresses up to 128 distinct (format, pixelSize) tuples in the
+   * encoding; the runtime cap is kMaxBuckets (currently 16). Wave 1 of the rollout (Plan 09 S2-infra) keeps exactly
+   * ONE bucket live per channel: same shipping behaviour as the
+   * pre-refactor single-array path, shaders unchanged. Wave 2
+   * (S2-shader) lifts the cap — the preprocessor allocates a new
+   * bucket when a texture of a new (format, pixelSize) appears, and
+   * shipped shaders grow a bucket-switch ladder in sample_slot().
+   *
+   * The Bucket struct holds everything that used to be at channel
+   * scope (QRhiTexture*, layers, layerMap) plus the discriminating
+   * (format, pixelSize) tuple. Dynamic (runtime-GPU) slots stay at
+   * channel scope — they carry opaque QRhiTexture*s with no
+   * canonical format/size, so no sensible bucket to live in.
+   */
+  struct TextureChannelState
+  {
+    struct Bucket
+    {
+      QRhiTexture* array{};          // QRhiTexture::TextureArray + channel flags
+      QRhiTexture::Format format{QRhiTexture::RGBA8};
+      QSize pixelSize;               // all layers in a bucket share this size
+      int layers{};                  // current layer count
+
+      // Per-bucket sampler config. Bucket key extended to include this:
+      // distinct (format, size, sampler_config) tuples land in distinct
+      // buckets so per-glTF-texture wrap/filter modes are honoured even
+      // when multiple materials share a channel array.
+      ossia::texture_sampler_config sampler_config{};
+      QRhiSampler* sampler{};        // created on first allocation; owned
+
+      // Dedup: texture_source shared_ptr pointer → layer index in
+      // this bucket's `array`. Append-only within a materials list;
+      // cleared when the list changes.
+      ossia::flat_map<const ossia::texture_source*, int> layerMap;
+    };
+
+    // Wave 1 invariant: buckets.size() <= 1. Wave 2: up to 64.
+    std::vector<Bucket> buckets;
+
+    // Dynamic (runtime-GPU) slot map. Keyed by the QRhi-assigned
+    // globally-unique resource id (`QRhiResource::globalResourceId()`,
+    // monotonic uint64) rather than the raw `QRhiTexture*` pointer.
+    // The system allocator is allowed to recycle freed pointer values,
+    // and qrhivulkan.cpp:5909-5912 explicitly documents the same hazard
+    // for SRB tracking — keying by the stable id makes a recycled
+    // address always look like a fresh resource here too.
+    //
+    // Slots are recycled via LRU eviction: when the map fills up and a
+    // new texture id arrives, the slot with the smallest dynamicSlotLastUse
+    // counter is evicted to make room. Without the eviction path, a long
+    // session with any resolution-changing producer (window-capture, NDI
+    // source-switch, video file resolution change mid-stream) hit the
+    // 2-slot cap after two distinct globalResourceIds and every subsequent
+    // texture returned -1 → tex_ref_none() (material's dynamic texture
+    // silently blanks). LRU bumps lastUse on every access so the evicted
+    // slot is always the one no longer referenced by any active material.
+    ossia::flat_map<quint64, int> dynamicSlotMap;
+    std::vector<QRhiTexture*>     dynamicTextures;       // slot idx → texture
+    std::vector<uint64_t>         dynamicSlotLastUse;    // slot idx → access counter at last lookup
+    uint64_t                      dynamicSlotCounter{0}; // monotonic, bumped on each resolve
+
+    // Wave-1 shims. Callers that haven't been updated to loop over
+    // buckets[] go through these for legacy single-bucket semantics.
+    // Returns null / 0 when no bucket has been allocated yet.
+    QRhiTexture* primaryArray() const noexcept
+    {
+      return buckets.empty() ? nullptr : buckets[0].array;
+    }
+    int primaryLayers() const noexcept
+    {
+      return buckets.empty() ? 0 : buckets[0].layers;
+    }
+
+    // Access or lazily create bucket 0 with an owned (format, size).
+    // Kept for init-time fallback allocation only — production code
+    // goes through findOrCreateBucket() which selects the right bucket
+    // for the texture's actual (format, size).
+    Bucket& ensurePrimary(QRhiTexture::Format fmt, QSize sz)
+    {
+      if(buckets.empty())
+        buckets.emplace_back();
+      auto& b = buckets[0];
+      b.format = fmt;
+      b.pixelSize = sz;
+      return b;
+    }
+
+    // Find a bucket matching (fmt, sz); create a new one if none
+    // matches and we haven't hit kMaxBuckets. Returns `{bucket_index,
+    // pointer}`. On overflow returns `{-1, nullptr}` — caller must
+    // handle (typically emits a warning + `tex_ref_none`).
+    //
+    // Bucket identity is the exact (format, pixelSize) tuple — no
+    // rounding. Most real scenes have < 4 distinct tuples per
+    // channel; a Sponza-size asset mix sits comfortably at 2-3.
+    std::pair<int, Bucket*>
+    findOrCreateBucket(QRhiTexture::Format fmt, QSize sz)
+    {
+      for(std::size_t i = 0; i < buckets.size(); ++i)
+      {
+        if(buckets[i].format == fmt && buckets[i].pixelSize == sz)
+          return {(int)i, &buckets[i]};
+      }
+      if((int)buckets.size() >= kMaxBuckets)
+        return {-1, nullptr};
+      buckets.emplace_back();
+      auto& b = buckets.back();
+      b.format = fmt;
+      b.pixelSize = sz;
+      return {(int)buckets.size() - 1, &b};
+    }
+
+    // Sampler-config-aware variant. Bucket key = (format, pixelSize,
+    // sampler_config). Used by the glTF path so a scene with mixed
+    // wrap modes (e.g., a tiled floor with REPEAT plus a UI element
+    // with CLAMP_TO_EDGE) splits across buckets, each with its own
+    // QRhiSampler. Falls back to the simpler 2-tuple variant when
+    // sampler config is the default (no need to fragment buckets if
+    // every texture uses the same sampler).
+    std::pair<int, Bucket*>
+    findOrCreateBucket(
+        QRhiTexture::Format fmt, QSize sz,
+        const ossia::texture_sampler_config& sampler_cfg)
+    {
+      for(std::size_t i = 0; i < buckets.size(); ++i)
+      {
+        if(buckets[i].format == fmt && buckets[i].pixelSize == sz
+           && buckets[i].sampler_config == sampler_cfg)
+          return {(int)i, &buckets[i]};
+      }
+      if((int)buckets.size() >= kMaxBuckets)
+        return {-1, nullptr};
+      buckets.emplace_back();
+      auto& b = buckets.back();
+      b.format = fmt;
+      b.pixelSize = sz;
+      b.sampler_config = sampler_cfg;
+      return {(int)buckets.size() - 1, &b};
+    }
+  };
+
+  /**
+   * @brief Shared state for one of the four PBR texture channels.
+   * Preprocessors / producers read-modify this in place; contents are
+   * view-independent (asset identity drives layer assignment) so
+   * sharing across preprocessors is correct.
+   */
+  TextureChannelState& textureChannel(TextureChannel ch) noexcept
+  {
+    return m_textureChannels[(std::size_t)ch];
+  }
+  const TextureChannelState& textureChannel(TextureChannel ch) const noexcept
+  {
+    return m_textureChannels[(std::size_t)ch];
+  }
+
+  /**
+   * @brief Shader-visible aux-texture name for a channel's static array
+   * (`baseColorArray`, `metalRoughArray`, `normalArray`, `emissiveArray`).
+   */
+  static const char* textureChannelArrayName(TextureChannel ch) noexcept;
+
+  /**
+   * @brief Shader-visible aux-texture name base for a channel's dynamic
+   * slots (`baseColorDyn`, `metalRoughDyn`, `normalDyn`, `emissiveDyn`).
+   * Full name is `<base><slot_index>`, slot_index < kMaxDynamicSlots.
+   */
+  static const char* textureChannelDynBaseName(TextureChannel ch) noexcept;
+
+  /**
+   * @brief QRhiTexture creation flags for a channel. sRGB channels
+   * (base color, emissive) get hardware sRGB→linear on sample; MR and
+   * normal stay linear.
+   */
+  static QRhiTexture::Flags textureChannelFlags(TextureChannel ch) noexcept;
+
+  /**
+   * @brief Register a runtime GPU texture handle for this channel's
+   * dynamic-slot set. Returns the slot index (0 .. kMaxDynamicSlots-1)
+   * or -1 if the slot cap is exhausted.
+   *
+   * Slot assignment is persistent across frames — once a handle is in
+   * the map, it keeps its slot until the registry is destroyed. This
+   * ordering-free property lets multiple producers AND the
+   * preprocessor all call resolveDynamicSlot concurrently within a
+   * frame and agree on the same answer for the same handle.
+   *
+   * The ~6-handle cap (4 channels × kMaxDynamicSlots ≈ 8 slots
+   * registry-wide) is fine for the common case of 1-2 live
+   * per-channel dynamic textures; more elaborate eviction (LRU,
+   * explicit release from producer teardown) is a future concern
+   * when the first real 3+-handle scene shows up.
+   */
+  int resolveDynamicSlot(TextureChannel channel, void* native_handle) noexcept;
+
+  // ─── Mesh arena manager (Plan 09 S4, post-fix) ───────────────────
+  //
+  // Per-mesh slab allocator over the 5 attribute streams of the MDI
+  // concatenated geometry: positions, normals, texcoords, tangents,
+  // indices. Each stream is a single growth-capped QRhiBuffer.
+  //
+  // CRITICAL invariant for indirect-draw correctness: a single
+  // `baseVertex` value is applied to ALL vertex bindings by the GPU
+  // (see VkDrawIndexedIndirectCommand::vertexOffset). So per-mesh
+  // byte offsets across vertex streams MUST satisfy
+  //   pos_byte_off  = baseVertex * 16
+  //   nrm_byte_off  = baseVertex * 16
+  //   uv_byte_off   = baseVertex * 8
+  //   tan_byte_off  = baseVertex * 16
+  // Original design used 5 INDEPENDENT OffsetAllocators (one per
+  // stream). For sequential allocations from a fresh pool that holds,
+  // but as soon as alloc/free traffic fragments the streams the
+  // per-stream allocators pick free blocks of different size-bins and
+  // the offsets diverge → vertex shader reads attribute[v] from the
+  // wrong slab → garbage normals (back-face cull → mesh disappears),
+  // 1-pixel-wide texcoord smear, etc.
+  //
+  // Fixed design: TWO shared allocators —
+  //   * `m_vertexAllocator`   in VERTEX units (cap = 8M vertex slots)
+  //   * `m_indexAllocator`    in INDEX  units (cap = 8M index slots)
+  // Each slab carries one `vertex_slot` and one `index_slot`. Per-
+  // stream byte offsets are derived as `vertex_slot.offset * stride`
+  // and `index_slot.offset * 4`. Lockstep is structurally guaranteed.
+  //
+  // Cache: stable_id hit → reuse slab, skip upload. Miss → fresh
+  // allocation. Sweep frees slabs unseen for `grace` frames.
+  //
+  // Backing buffer sizes (pointer-stable across the registry's
+  // lifetime; downstream bindings resolve once):
+  //   positions / normals / tangents  128 MB  (8M verts × 16 B)
+  //   texcoords                        64 MB  (8M verts ×  8 B)
+  //   indices                          32 MB  (8M idx   ×  4 B)
+  //
+  // Indirect draw: `baseVertex = vertex_slot.offset`,
+  //                `firstIndex = index_slot.offset`.
+
+  enum class MeshStream : uint8_t
+  {
+    Positions  = 0,
+    Normals    = 1,
+    Texcoords  = 2,  // TEXCOORD_0 (primary UV).
+    Tangents   = 3,
+    Colors     = 4,  // glTF COLOR_0, vec4 (vec3 sources padded with alpha=1).
+    Texcoords1 = 5,  // glTF TEXCOORD_1 (lightmap / secondary UV).
+    Indices    = 6,
+    Count_     = 7
+  };
+
+  // Bytes per element per stream. Matches the MDI output layout
+  // the existing rasterizer presets consume:
+  //   positions/normals = vec3 padded to vec4 (std430 alignment).
+  //   tangents          = vec4.
+  //   colors            = vec4 (vec3 sources padded with alpha=1).
+  //   texcoords[_1]     = vec2.
+  //   indices           = uint32.
+  static constexpr uint32_t kMeshStride[(std::size_t)MeshStream::Count_]
+      = {16, 16, 8, 16, 16, 8, 4};
+
+  // Bytes of capacity reserved per stream at init time. These are the
+  // "kMinCap" pre-sizing budgets — generous enough to avoid realloc
+  // churn on normal scene growth. If a scene exceeds these, allocate()
+  // returns a sentinel allocation and the caller skips the mesh.
+  //
+  // 128 MB positions × 16B stride = 8M verts.
+  // 128 MB normals/tangents/colors matches.
+  // 64 MB texcoords (8B) = 8M verts.
+  // 64 MB texcoords1 matches.
+  // 32 MB indices (4B) = 8M indices.
+  static constexpr uint32_t kMeshCapBytes[(std::size_t)MeshStream::Count_]
+      = {
+          128u * 1024u * 1024u,
+          128u * 1024u * 1024u,
+           64u * 1024u * 1024u,
+          128u * 1024u * 1024u,
+          128u * 1024u * 1024u,  // colors
+           64u * 1024u * 1024u,  // texcoords1
+           32u * 1024u * 1024u,
+  };
+
+  /**
+   * @brief Slab handle returned by MeshArenaManager::acquire.
+   *
+   * One per mesh (keyed on stable_id). Holds ONE vertex-unit allocation
+   * (shared across positions / normals / texcoords / tangents) and ONE
+   * index-unit allocation. Per-stream byte offsets are derived in
+   * meshSlabOffsetBytes() as `vertex_slot.offset * stride` /
+   * `index_slot.offset * 4`. This guarantees baseVertex consistency
+   * across all vertex bindings even after fragmentation — see the
+   * "CRITICAL invariant" block above.
+   *
+   * `last_seen_frame` is bumped each frame the owner calls
+   * markSeen(); sweep() frees slabs whose last_seen is older than
+   * `current_frame - grace`. Grace = FramesInFlight + 1 is the
+   * safe default (let in-flight draws finish).
+   */
+  struct MeshSlab
+  {
+    uint64_t stable_id{};
+    OffsetAllocator::Allocation vertex_slot{};  // offset/size in vertex units
+    OffsetAllocator::Allocation index_slot{};   // offset/size in index  units
+    uint32_t vertex_count{};
+    uint32_t index_count{};
+    uint32_t last_seen_frame{};
+    bool freshly_allocated{};  // true on the frame the slab was created
+  };
+
+  /// Acquire a slab for a mesh. Returns an existing slab on stable_id
+  /// hit (zero-cost, no upload needed); allocates fresh on miss.
+  /// Returns nullptr on allocator exhaustion.
+  ///
+  /// `freshly_allocated` on the returned slab signals "caller must
+  /// upload the mesh's bytes via uploadMeshStream(...)".
+  ///
+  /// `current_frame` is required so that the count-mismatch grace-queue
+  /// enqueue stamps a real release frame (not 0). Without it, after the
+  /// first `grace` frames of the session every count-mismatch deferred
+  /// release is freed instantly on the very next sweep, defeating the
+  /// guard against in-flight GPU draws referencing the old offset.
+  MeshSlab* acquireMeshSlab(
+      uint64_t stable_id,
+      uint32_t vertex_count,
+      uint32_t index_count,
+      uint32_t current_frame) noexcept;
+
+  /// Mark a slab as seen this frame so sweep() doesn't reclaim it.
+  void markMeshSlabSeen(uint64_t stable_id, uint32_t current_frame) noexcept;
+
+  /// Release slabs whose `last_seen_frame < current_frame - grace`.
+  /// Grace defaults to 2 (covers FramesInFlight+1 on typical backends).
+  void sweepMeshSlabs(uint32_t current_frame, uint32_t grace = 2) noexcept;
+
+  /// Free pending-release slabs whose `released_frame + grace <= current_frame`
+  /// from the OffsetAllocator. Called by `sweepMeshSlabs` (phase-2) and by
+  /// `acquireMeshSlab` *before* its fresh allocate, so a count-mismatch whose
+  /// previous slot has served its grace can recycle that capacity in the same
+  /// `update()` instead of triggering a spurious "pool exhausted" warning.
+  /// Does not touch the *SlotsUsed trackers — those are decremented eagerly at
+  /// logical-release time (enqueue) so phase-2 free is purely allocator
+  /// bookkeeping.
+  void drainExpiredPendingReleases(
+      uint32_t current_frame, uint32_t grace = 2) noexcept;
+
+  /// Explicit release of a slab by stable_id (used on scene teardown).
+  /// The release is enqueued into the pending-releases grace queue and freed
+  /// from the OffsetAllocator only after `grace` frames have elapsed, matching
+  /// the same contract as sweepMeshSlabs. Pass the current render-frame counter
+  /// so the sweep can determine when it is safe to reclaim the sub-allocation.
+  void releaseMeshSlab(uint64_t stable_id, uint32_t current_frame) noexcept;
+
+  /// Byte offset of a stream within its backing buffer. Use directly
+  /// as `uploadStaticBuffer(buf, offset, size, data)`.
+  uint32_t meshSlabOffsetBytes(
+      const MeshSlab& slab, MeshStream stream) const noexcept;
+
+  /// Backing QRhiBuffer for a stream. Stable pointer across the
+  /// registry's lifetime (pre-sized, never grown).
+  QRhiBuffer* meshStreamBuffer(MeshStream s) const noexcept;
+
+  /// Upload CPU bytes into a slab's stream. Thin wrapper around
+  /// QRhiResourceUpdateBatch::uploadStaticBuffer at the slab's
+  /// computed offset.
+  void uploadMeshStream(
+      QRhiResourceUpdateBatch& res, const MeshSlab& slab,
+      MeshStream s, const void* data, uint32_t size) noexcept;
+
+  /// Total bytes in use per stream (for S6 telemetry panel).
+  uint32_t meshStreamUsedBytes(MeshStream s) const noexcept;
+  uint32_t meshStreamFreeBytes(MeshStream s) const noexcept;
+
+private:
+  struct ArenaState
+  {
+    QRhiBuffer* buffer{};
+    uint32_t slot_stride{0};   // bytes per slot (arena layout is a packed
+                               // std430-compatible array of this stride)
+    uint32_t slot_count{0};    // total slots (capacity_bytes = stride × count)
+    QRhiBuffer::UsageFlags usage{};
+    QRhiBuffer::Type type{QRhiBuffer::Dynamic};
+
+    // LIFO stack of free slot indices. Push on free, pop on allocate.
+    // O(1) alloc / free, no fragmentation (every slot is the same size).
+    std::vector<uint32_t> free_slots;
+
+    // Per-slot generation, indexed by slot_index. Sized to slot_count
+    // at init() and bumped on every allocate()/free() to that slot.
+    // Consumers check the stamped generation in their gpu_slot_ref via
+    // isLive().
+    std::vector<uint32_t> slot_generations;
+  };
+
+  std::array<ArenaState, (std::size_t)Arena::Count_> m_arenas{};
+
+  std::array<TextureChannelState, (std::size_t)TextureChannel::Count_>
+      m_textureChannels{};
+
+  // Per-stream backing buffers (one QRhiBuffer per attribute).
+  // Allocations are NOT per-stream anymore: a single shared
+  // m_vertexAllocator hands out vertex-unit slots that all four
+  // vertex streams (positions/normals/texcoords/tangents) interpret
+  // through their own stride, and m_indexAllocator handles indices.
+  // This keeps per-stream byte offsets in lockstep — required for
+  // indirect-draw baseVertex correctness across fragmentation.
+  struct MeshStreamState
+  {
+    QRhiBuffer* buffer{};
+    uint32_t capacity_bytes{};
+    QRhiBuffer::UsageFlags usage{};
+  };
+  std::array<MeshStreamState, (std::size_t)MeshStream::Count_> m_meshStreams{};
+
+  // Shared vertex / index allocators (slot units, not bytes).
+  // capacity_slots = min(stream_capacity_bytes / stream_stride) across
+  // the four vertex streams = 8M for the default sizes; index pool
+  // capacity = 8M slots.
+  std::unique_ptr<OffsetAllocator::Allocator> m_vertexAllocator;
+  std::unique_ptr<OffsetAllocator::Allocator> m_indexAllocator;
+  uint32_t m_vertexSlotsCapacity{};
+  uint32_t m_indexSlotsCapacity{};
+  uint32_t m_vertexSlotsUsed{};
+  uint32_t m_indexSlotsUsed{};
+
+  ossia::hash_map<uint64_t, MeshSlab> m_meshSlabs;
+
+  // Slabs whose `released_frame` is set are waiting out the grace
+  // period before their OffsetAllocator allocations return to the
+  // free list. Prevents use-after-free when an in-flight draw still
+  // references the old offset.
+  struct PendingRelease
+  {
+    uint64_t stable_id{};
+    uint32_t released_frame{};
+    OffsetAllocator::Allocation vertex_slot{};
+    OffsetAllocator::Allocation index_slot{};
+  };
+  std::vector<PendingRelease> m_pendingReleases;
+
+  QRhi* m_rhi{};
+
+  // Set by seedDefaults() after writing the default-MaterialGPU bytes
+  // into Material arena slot 0. Idempotent guard so repeated calls are
+  // free.
+  bool m_defaults_seeded{false};
+};
+
+} // namespace score::gfx
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/GpuTiming.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/GpuTiming.cpp
new file mode 100644
index 0000000000..0a65ef5e9f
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/GpuTiming.cpp
@@ -0,0 +1,111 @@
+#include <Gfx/Graph/GpuTiming.hpp>
+
+#include <algorithm>
+
+namespace score::gfx
+{
+
+void GpuTimings::record(std::string_view name, double ms) noexcept
+{
+  // Samples of 0 typically mean "backend doesn't support timestamps" or
+  // "resolved value not yet available" — don't pollute the rolling
+  // mean with those. An explicit clear happens via reset().
+  if(ms <= 0.0)
+    return;
+
+  std::lock_guard lk{m_mutex};
+
+  auto it = std::find_if(
+      m_entries.begin(), m_entries.end(),
+      [&](const Entry& e) { return e.name == name; });
+
+  if(it == m_entries.end())
+  {
+    Entry e;
+    e.name.assign(name);
+    e.history.fill(0.0);
+    e.last_ms = ms;
+    e.mean_ms = ms;
+    e.max_ms = ms;
+    e.history[0] = ms;
+    e.history_index = 1 % kHistorySize;
+    e.sample_count = 1;
+    e.frames_since_observed = 0;
+    m_entries.push_back(std::move(e));
+    return;
+  }
+
+  // Ring-buffer update + rolling mean + max over the window.
+  it->last_ms = ms;
+  it->history[it->history_index] = ms;
+  it->history_index = (it->history_index + 1) % kHistorySize;
+  if(it->sample_count < kHistorySize)
+    ++it->sample_count;
+  it->frames_since_observed = 0;
+
+  double sum = 0.0;
+  double m = 0.0;
+  for(int i = 0; i < it->sample_count; ++i)
+  {
+    const double v = it->history[i];
+    sum += v;
+    m = std::max(m, v);
+  }
+  it->mean_ms = sum / double(it->sample_count);
+  it->max_ms = m;
+}
+
+void GpuTimings::tickFrame() noexcept
+{
+  std::lock_guard lk{m_mutex};
+  for(auto& e : m_entries)
+    ++e.frames_since_observed;
+
+  // Drop entries not observed for a while — nodes get reconfigured,
+  // passes come and go, keeping stale ghosts in the panel is noise.
+  m_entries.erase(
+      std::remove_if(
+          m_entries.begin(), m_entries.end(),
+          [](const Entry& e) {
+            return e.frames_since_observed > kStaleThreshold;
+          }),
+      m_entries.end());
+}
+
+std::vector<GpuTimings::Entry> GpuTimings::snapshot() const
+{
+  std::lock_guard lk{m_mutex};
+  return m_entries;
+}
+
+void GpuTimings::reset() noexcept
+{
+  std::lock_guard lk{m_mutex};
+  m_entries.clear();
+}
+
+ScopedGpuTimer::ScopedGpuTimer(
+    QRhiCommandBuffer& cb, GpuTimings& timings, std::string_view name)
+    : m_cb{cb}
+    , m_timings{timings}
+    , m_name{name}
+{
+  // QRhi only exposes a CB-wide timestamp via lastCompletedGpuTime() —
+  // there is no per-pass sub-range API. Recording that value here (under
+  // a per-pass name) would cause every ScopedGpuTimer in the same frame
+  // to write the identical number under different names, making the S6
+  // panel show the full-frame cost against every individual pass.
+  //
+  // The frame-total is recorded once per frame in RenderList::renderInternal
+  // under the "frame" bucket. ScopedGpuTimer's job is to emit the debug
+  // marker brackets (visible in RenderDoc / Nsight) without duplicating
+  // the timing attribution.
+  m_cb.debugMarkBegin(QByteArray::fromRawData(m_name.data(), (qsizetype)m_name.size()));
+}
+
+ScopedGpuTimer::~ScopedGpuTimer()
+{
+  m_cb.debugMarkEnd();
+}
+
+} // namespace score::gfx
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/GpuTiming.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/GpuTiming.hpp
new file mode 100644
index 0000000000..14c736413d
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/GpuTiming.hpp
@@ -0,0 +1,126 @@
+#pragma once
+#include <score_plugin_gfx_export.h>
+
+#include <QtGui/private/qrhi_p.h>
+
+#include <array>
+#include <cstdint>
+#include <mutex>
+#include <string>
+#include <vector>
+
+namespace score::gfx
+{
+/**
+ * @brief Per-pass GPU timing collector (Plan 09 S0 / S6).
+ *
+ * QRhi exposes only a single `QRhiCommandBuffer::lastCompletedGpuTime()`
+ * value — the elapsed GPU time of the most recently COMPLETED frame on
+ * that CB. Internally QRhi wraps the CB with a timestamp query pair and
+ * returns the delta in milliseconds. This class gives us per-pass
+ * granularity via scoped markers: every `ScopedGpuTimer` pushes a
+ * debug marker pair around its `beginPass` / `endPass` and reads
+ * `lastCompletedGpuTime()` ONE FRAME LATER, attributing the delta to
+ * the named pass.
+ *
+ * Results are always one frame late (the GPU must complete, then the
+ * CPU reads back the resolved timestamp). Callers expecting live
+ * numbers should treat the read as "previous frame's time".
+ *
+ * The collector is per-RenderList. It accumulates a rolling mean over
+ * the last N frames and exposes a snapshot via `timingsLastFrame()`
+ * for the S6 observability panel.
+ *
+ * Thread model: all public methods are called from the Gfx thread.
+ * The panel's read path takes a shared lock; writers hold an exclusive
+ * lock during update. Lock contention is negligible (one update/frame,
+ * one read/ui-tick).
+ */
+class SCORE_PLUGIN_GFX_EXPORT GpuTimings
+{
+public:
+  static constexpr int kHistorySize = 64;
+
+  struct Entry
+  {
+    std::string name;
+    double last_ms{0.0};
+    double mean_ms{0.0};
+    double max_ms{0.0};
+    std::array<double, kHistorySize> history{};
+    int history_index{0};
+    int sample_count{0};   // capped at kHistorySize; used to avoid cold-start bias
+    int frames_since_observed{0};
+  };
+
+  GpuTimings() = default;
+  GpuTimings(const GpuTimings&) = delete;
+  GpuTimings& operator=(const GpuTimings&) = delete;
+
+  /**
+   * @brief Record an observation for a named pass.
+   *
+   * @p ms may be 0 when caps.timestamps is false or when the backend
+   * hasn't resolved a timestamp yet. Zero samples skip the rolling
+   * mean update.
+   */
+  void record(std::string_view name, double ms) noexcept;
+
+  /**
+   * @brief Tick once per frame. Entries not observed for more than
+   *        `kStaleThreshold` frames are dropped.
+   */
+  void tickFrame() noexcept;
+
+  /**
+   * @brief Snapshot of all entries for the observability panel.
+   *
+   * Returns a copy so the caller doesn't need to hold a lock while
+   * iterating. Cost: O(n_entries); typical n ≤ 32.
+   */
+  std::vector<Entry> snapshot() const;
+
+  /**
+   * @brief Reset all state. Called on RenderList re-init.
+   */
+  void reset() noexcept;
+
+private:
+  static constexpr int kStaleThreshold = 120;   // drop entries after 2s at 60fps
+
+  mutable std::mutex m_mutex;
+  std::vector<Entry> m_entries;
+};
+
+/**
+ * @brief RAII helper that brackets a named pass region for GPU frame-debug.
+ *
+ * Emits `debugMarkBegin` / `debugMarkEnd` around the enclosed code so
+ * RenderDoc, Nsight, and Metal Frame Debugger show pass boundaries in
+ * captures. Does NOT record timing data — `QRhiCommandBuffer::lastCompletedGpuTime()`
+ * returns a CB-wide delta with no per-pass resolution, so attributing it
+ * to individual passes would print the same full-frame cost against every
+ * named region.
+ *
+ * The whole-CB frame time is recorded once per frame in
+ * `RenderList::renderInternal` under the `"frame"` bucket. Per-pass
+ * sub-range timestamps require explicit QRhi timestamp queries, which
+ * are not yet exposed by the RHI abstraction layer.
+ */
+class SCORE_PLUGIN_GFX_EXPORT ScopedGpuTimer
+{
+public:
+  ScopedGpuTimer(
+      QRhiCommandBuffer& cb, GpuTimings& timings, std::string_view name);
+  ~ScopedGpuTimer();
+
+  ScopedGpuTimer(const ScopedGpuTimer&) = delete;
+  ScopedGpuTimer& operator=(const ScopedGpuTimer&) = delete;
+
+private:
+  QRhiCommandBuffer& m_cb;
+  GpuTimings& m_timings;
+  std::string m_name;
+};
+
+} // namespace score::gfx
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/Graph.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/Graph.cpp
index ef95a59a27..1d18057118 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/Graph.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/Graph.cpp
@@ -1,5 +1,6 @@
 #include "ISFNode.hpp"
 
+#include <Gfx/Graph/CustomMesh.hpp>
 #include <Gfx/Graph/Graph.hpp>
 #include <Gfx/Graph/NodeRenderer.hpp>
 #include <Gfx/Graph/OutputNode.hpp>
@@ -71,25 +72,25 @@ struct no_delay_edges
 
 static void graphwalk(
     score::gfx::Node* node, std::vector<score::gfx::Node*>& list, GraphImpl& g,
-    VertexMap& m)
+    VertexMap& m, ossia::flat_set<score::gfx::Node*>& visited)
 {
   auto sink_desc = m[node];
   for(auto inputs : node->input)
   {
     for(auto edge : inputs->edges)
     {
-      if(!edge->source->node->addedToGraph)
+      auto* src_node = edge->source->node;
+      if(visited.insert(src_node).second)
       {
-        list.push_back(edge->source->node);
+        list.push_back(src_node);
 
-        auto src_desc = boost::add_vertex(edge->source->node, g);
-        m[edge->source->node] = src_desc;
-        edge->source->node->addedToGraph = true;
+        auto src_desc = boost::add_vertex(src_node, g);
+        m[src_node] = src_desc;
         boost::add_edge(src_desc, sink_desc, edge->type, g);
       }
       else
       {
-        auto src_desc = m[edge->source->node];
+        auto src_desc = m[src_node];
         boost::add_edge(src_desc, sink_desc, edge->type, g);
       }
     }
@@ -100,14 +101,16 @@ static void graphwalk(std::vector<score::gfx::Node*>& model_nodes)
 {
   GraphImpl g;
   VertexMap m;
+  ossia::flat_set<score::gfx::Node*> visited;
+
   auto k = boost::add_vertex(model_nodes.front(), g);
   m[model_nodes.front()] = k;
-  model_nodes.front()->addedToGraph = true;
+  visited.insert(model_nodes.front());
 
   std::size_t processed = 0;
   while(processed != model_nodes.size())
   {
-    graphwalk(model_nodes[processed], model_nodes, g, m);
+    graphwalk(model_nodes[processed], model_nodes, g, m, visited);
     processed++;
   }
 
@@ -236,6 +239,62 @@ void Graph::recreateOutputRenderList(OutputNode& output)
     std::shared_ptr<RenderList>& renderer = *it;
     if(renderer.get() == output.renderer())
     {
+      // Pre-condition: recreateOutputRenderList MUST be called outside
+      // any active beginFrame/endFrame block. The Window::resize ->
+      // resizeSwapChain -> onResize -> here chain is invoked at the
+      // top of Window::render BEFORE beginFrame (Window.cpp:148-151),
+      // so this should always hold. Assert it to catch any future
+      // path that triggers the resize from inside a render frame.
+      if(auto rs = output.renderState(); rs && rs->rhi)
+        SCORE_ASSERT(!rs->rhi->isRecordingFrame());
+
+      // Drain the GPU before tearing down the old RenderList. release()
+      // walks every renderer and triggers a torrent of delete /
+      // deleteLater on QRhi objects (textures, samplers, buffers,
+      // SRBs, pipelines). On Vulkan, sibling outputs (BackgroundNode's
+      // beginOffscreenFrame, MultiWindowNode per-window CBs, the
+      // resizing window's own previous-frame CB) may still hold those
+      // resources in pending state. Without this drain, the next time
+      // ScenePreprocessor's runInitialPasses records vkCmdCopyBuffer /
+      // vkCmdPipelineBarrier into a CB the rhi believes is fresh,
+      // validation fires (-recording / -in-use), eventual device loss.
+      //
+      // FIX-A added rhi->finish() inside ScreenNode::destroyOutput and
+      // BackgroundNode::destroyOutput, but the
+      // `Window::resize → onResize → recreateOutputRenderList` path
+      // never enters those — it tears down the RenderList directly.
+      if(auto rs = output.renderState(); rs && rs->rhi)
+      {
+        auto* rhi = rs->rhi;
+        rhi->finish();
+
+        // Force a no-op offscreen frame on each frame slot so BOTH
+        // cmdPools are reset symmetrically. QRhi-Vulkan's finish()
+        // resets only `cmdPool[currentFrameSlot]`
+        // (qrhivulkan.cpp:2617-2629); the OTHER slot's pool stays
+        // untouched. If a sibling output (BackgroundNode /
+        // PreviewNode / MultiWindowNode) drives its own
+        // beginOffscreenFrame on a separate timer, its
+        // ensureCommandPoolForNewFrame on the un-reset slot finds
+        // CBs still in pending state from the pre-resize era →
+        // vkResetCommandPool VUID-00040, then vkBeginCommandBuffer
+        // on active CB, eventual device loss in vkQueueSubmit.
+        // The cascade fires ~16 frames after resize because that's
+        // when the sibling timer happens to phase-align with the
+        // un-drained slot.
+        //
+        // beginOffscreenFrame advances currentFrameSlot
+        // (qrhivulkan.cpp:3025-3031) and resets the new slot's pool;
+        // endOffscreenFrame waits on ofr.cmdFence (drains every
+        // queued CB before the fence signals). Two iterations cover
+        // QVK_FRAMES_IN_FLIGHT=2.
+        for(int i = 0; i < 2; ++i)
+        {
+          QRhiCommandBuffer* cb{};
+          if(rhi->beginOffscreenFrame(&cb) == QRhi::FrameOpSuccess)
+            rhi->endOffscreenFrame();
+        }
+      }
       auto old_renderer = renderer;
       old_renderer->release();
       old_renderer.reset();
@@ -252,7 +311,6 @@ void Graph::recreateOutputRenderList(OutputNode& output)
     }
     else
     {
-      qDebug("???");
     }
   }
 }
@@ -268,7 +326,25 @@ void Graph::initializeOutput(OutputNode* output, GraphicsApi graphicsApi)
     };
 
     auto onResize = [this, output] {
-      // FIXME optimize if size did not change?
+      // FAST-PATH: pure viewport resize. Skip the full RL rebuild
+      // (release+createRenderList) — its cost (pipeline compiles,
+      // ScenePreprocessor REBUILD, mesh slab + texture array
+      // re-upload, every preprocessor SSBO from cap=0) is wasted
+      // when only the framebuffer size changed. Instead, mark every
+      // renderer's RT specs as dirty so the existing rt_changed
+      // surgical block in renderInternal recreates only the
+      // swapchain-sized RTs + rebinds the downstream samplers.
+      // Persistent GpuResourceRegistry + persistent ScenePreprocessor
+      // caches mean none of the heavier work is needed for a pure
+      // size change.
+      //
+      // Returns false if it cannot handle the change (no renderers
+      // yet, invalid size); the fallback below covers initial setup
+      // and any future "format / sample-count change" path.
+      if(auto* rl = output->renderer())
+        if(auto rs = output->renderState(); rs)
+          if(rl->resizeSwapchainSizedTargets(rs->outputSize))
+            return;
       recreateOutputRenderList(*output);
     };
 
@@ -287,8 +363,6 @@ void Graph::relinkGraph()
   for(auto r_it = m_renderers.begin(); r_it != m_renderers.end();)
   {
     auto& r = **r_it;
-    for(auto& node : m_nodes)
-      node->addedToGraph = false;
 
     assert(!r.nodes.empty());
 
@@ -306,11 +380,21 @@ void Graph::relinkGraph()
       if(model_nodes.size() > 1)
       {
         bool invalid_renderlist = false;
+        // Acquire a resource update batch for both brand-new renderers
+        // (whose init() uploads material UBOs, creates samplers, etc.) and
+        // reused renderers that we just released (whose init() must recreate
+        // freed resources). Without reinitialising the reused path, a
+        // second execution after stop/start leaves every reused renderer
+        // in its released state forever.
+        QRhiResourceUpdateBatch* batch = r.state.rhi
+            ? r.state.rhi->nextResourceUpdateBatch()
+            : nullptr;
         for(auto node : model_nodes)
         {
           score::gfx::NodeRenderer* rn{};
           auto it = node->renderedNodes.find(&r);
-          if(it == node->renderedNodes.end())
+          const bool is_new = (it == node->renderedNodes.end());
+          if(is_new)
           {
             if((rn = node->createRenderer(r)))
             {
@@ -318,7 +402,6 @@ void Graph::relinkGraph()
               node->renderedNodes.emplace(&r, rn);
 
               node->renderedNodesChanged();
-              //rn->init(r);
             }
             else
             {
@@ -331,12 +414,31 @@ void Graph::relinkGraph()
             rn = it->second;
             SCORE_ASSERT(rn);
             rn->release(r);
-            //rn->init(r);
           }
           SCORE_ASSERT(rn);
+          if(batch)
+            rn->init(r, *batch);
           r.renderers.push_back(rn);
         }
 
+        // Fold the batch into the RenderList's initial batch so its uploads
+        // (vertex buffers, placeholder UBOs, samplers) land before the first
+        // render frame. `merge` copies entries but doesn't release `batch`
+        // back to the pool — release it explicitly, or we leak a pool slot
+        // per relinkGraph call and eventually exhaust the 64-slot pool.
+        if(batch)
+        {
+          if(r.initialBatch())
+          {
+            r.initialBatch()->merge(batch);
+            batch->release();
+          }
+          else
+          {
+            r.setInitialBatch(batch);
+          }
+        }
+
         // If a node couldn't be recreated, we skip the whole thing
         if(invalid_renderlist)
         {
@@ -344,11 +446,6 @@ void Graph::relinkGraph()
           r_it = m_renderers.erase(r_it);
           break;
         }
-
-        //         for(auto node : r.renderers)
-        //         {
-        //           node->init(r);
-        //         }
       }
       else if(model_nodes.size() == 1)
       {
@@ -406,10 +503,12 @@ std::shared_ptr<RenderList>
 Graph::createRenderList(OutputNode* output, std::shared_ptr<RenderState> state)
 {
   auto ptr = std::make_shared<RenderList>(*output, state);
+  // Forward the session-wide AssetTable (if any) so ScenePreprocessor
+  // and other renderers can hit the content-hash decode cache
+  // instead of decoding every texture per-RenderList. Plan 09 S1.
+  ptr->setAssetTable(m_assetTable);
   state->renderer = ptr;
   output->setRenderer(ptr);
-  for(auto& node : m_nodes)
-    node->addedToGraph = false;
 #if 0
   for(auto& model : m_nodes)
     qDebug() << "Model: " << typeid(*model).name();
@@ -463,22 +562,511 @@ Graph::createRenderList(OutputNode* output, std::shared_ptr<RenderState> state)
   {
     r.init();
 
-    if(model_nodes.size() > 1)
+    // Compute m_requiresDepth from the node graph BEFORE
+    // createAllInputRenderTargets — RT creation reads it. Mirrors
+    // maybeRebuild's recompute at RenderList.cpp:484-486.
     {
-      // Create all input render targets centrally before any node init().
-      // This ensures RTs are available regardless of init order
-      // (matches what maybeRebuild does).
-      r.createAllInputRenderTargets();
+      bool requiresDepth = false;
+      for(auto node : r.nodes)
+        requiresDepth |= node->requiresDepth;
+      r.markRequiresDepth(requiresDepth);
+    }
 
-      auto batch = r.initialBatch();
-      for(auto node : r.renderers)
-        node->init(r, *batch);
+    // Create all input render targets centrally before any node init().
+    // This ensures RTs are available regardless of init order
+    // (matches what maybeRebuild does).
+    r.createAllInputRenderTargets();
+
+    // Always init all renderers, even when only the output node exists.
+    // This ensures the output renderer's internal render target (e.g.
+    // ScaledRenderer::m_inputTarget) is created and available for
+    // incremental edge additions later.
+    auto batch = r.initialBatch();
+    for(auto node : r.renderers)
+    {
+      node->init(r, *batch);
+      // Sync change indices so the first render frame doesn't see
+      // a spurious rt_changed. Between init and the first render,
+      // update_inputs() can deliver render_target_spec messages that
+      // increment the node's counter. Without syncing, the renderer's
+      // stale index (-1) mismatches → rt_changed triggers → release+init
+      // Sync change indices to prevent spurious rt_changed, then set
+      // materialChanged and geometryChanged so the first update() uploads
+      // data and processes geometry. This matches what the old maybeRebuild()
+      // did. renderTargetSpecsChanged is left false (synced) to prevent
+      // the destructive rt_changed block from triggering.
+      node->checkForChanges();
+      node->materialChanged = true;
+      node->geometryChanged = true;
+      node->renderTargetSpecsChanged = false;
     }
+
+    // Mark built. Skips the wasteful + previously-dangerous mid-frame
+    // release()+init() that maybeRebuild(false) would otherwise fire on
+    // the first render frame. Without this, every viewport resize did
+    // a full RenderList teardown TWICE in quick succession (once here,
+    // once on the next frame in maybeRebuild) -- multi-second resizes
+    // for non-trivial scenes. The mid-frame teardown was also the root
+    // of the CB-cascade chased through commits 51400fc37 / 5b2da1d48 /
+    // 7f9f1e36a. The safety net (C2 drain in maybeRebuild) stays in
+    // place for forced rebuilds and the actual size-change cycle in
+    // maybeRebuild on subsequent frames.
+    //
+    // The historical concerns the previous comment cited (null
+    // processUBO in MRT blit passes, feedback ISF persistent textures,
+    // surgical rt_changed handling) were all fixed in their respective
+    // commits. The two missing pieces vs maybeRebuild's release+init
+    // (m_requiresDepth recompute, markBuilt) are now done here.
+    r.markBuilt();
   }
 
   return ptr;
 }
 
+void Graph::removeNodeFromRenderLists(Node* node)
+{
+  for(auto& [rl, renderer] : node->renderedNodes)
+  {
+    renderer->releaseState(*rl);
+    delete renderer;
+
+    ossia::remove_erase(rl->renderers, renderer);
+    ossia::remove_erase(rl->nodes, node);
+  }
+
+  node->renderedNodes.clear();
+  node->renderedNodesChanged();
+}
+
+void Graph::removeNodeAndEdges(Node* node)
+{
+  // 1. For each edge involving this node, notify the render lists
+  //    so that upstream/downstream renderers clean up their passes.
+  //    Must happen BEFORE edge deletion (onEdgeRemoved reads the edge).
+  for(auto* edge : m_edges)
+  {
+    if(edge->source->node == node || edge->sink->node == node)
+    {
+      // Notify affected render lists
+      Node* other = (edge->source->node == node)
+                         ? edge->sink->node
+                         : edge->source->node;
+
+      for(auto& rl : m_renderers)
+      {
+        if(ossia::contains(rl->nodes, other)
+           || ossia::contains(rl->nodes, node))
+        {
+          rl->onEdgeRemoved(*edge);
+        }
+      }
+    }
+  }
+
+  // 2. Delete all edges involving this node from m_edges.
+  //    Edge destructor removes from source->edges and sink->edges.
+  for(auto it = m_edges.begin(); it != m_edges.end();)
+  {
+    Edge* edge = *it;
+    if(edge->source->node == node || edge->sink->node == node)
+    {
+      delete edge;
+      it = m_edges.erase(it);
+    }
+    else
+    {
+      ++it;
+    }
+  }
+
+  // 3. Release the node's own renderers from all render lists.
+  removeNodeFromRenderLists(node);
+
+  // 4. Retopological sort all affected render lists and notify outputs.
+  for(auto& rl : m_renderers)
+  {
+    retopologicalSort(*rl);
+    rl->output.onRendererChange();
+  }
+
+  // Note: does NOT remove from m_nodes — the caller (GfxContext::remove_node)
+  // handles that via Graph::removeNode().
+}
+
+void Graph::onEdgeRemoved(
+    Edge& edge, const ossia::hash_set<const Port*>* preserveSinks)
+{
+  Node* source = edge.source->node;
+
+  for(auto& rl : m_renderers)
+  {
+    // Only act on render lists that contain the source node
+    if(!ossia::contains(rl->nodes, source))
+      continue;
+
+    // Delegate to the render list (must happen before edge destruction)
+    rl->onEdgeRemoved(edge, preserveSinks);
+
+    // Do NOT retopological-sort or destroy unreachable renderers here.
+    // Removals are processed before additions in incrementalEdgeUpdate.
+    // A node that becomes temporarily unreachable during removal may become
+    // reachable again when additions are processed. Destroying its renderer
+    // would lose runtime state (mesh data, video frames, etc.) that can't
+    // be trivially recreated.
+    //
+    // reconcileAllRenderLists() runs after all adds/removes and handles
+    // the final reachability check, renderer cleanup, and retopo sort.
+  }
+}
+
+void Graph::createPassForEdgeIfMissing(Edge& edge)
+{
+  Node* source = edge.source->node;
+
+  for(auto& rl : m_renderers)
+  {
+    // Check if the source node has a renderer in this render list
+    auto rn_it = source->renderedNodes.find(rl.get());
+    if(rn_it == source->renderedNodes.end())
+      continue;
+
+    auto* renderer = rn_it->second;
+
+    // Check if the sink node is also in this render list
+    if(!ossia::contains(rl->nodes, edge.sink->node))
+      continue;
+
+    // Check if a pass already exists for this edge
+    if(renderer->hasOutputPassForEdge(edge))
+      continue;
+
+    // Ensure the sink port has a render target (if needed)
+    Port* sink = edge.sink;
+    if(sink->type == Types::Image
+       && (sink->flags & Flag::GrabsFromSource) != Flag::GrabsFromSource
+       && sink->node != &rl->output)
+    {
+      if(rl->renderTargetForInputPort(*sink).renderTarget == nullptr)
+      {
+        int cur_port = 0;
+        for(auto* in : sink->node->input)
+        {
+          if(in == sink)
+            break;
+          cur_port++;
+        }
+        auto spec = sink->node->resolveRenderTargetSpecs(cur_port, *rl);
+        if(!sink->node->hasExplicitRenderTargetSize(cur_port))
+        {
+          ossia::small_flat_map<const Port*, RenderTargetSpecs, 16> emptySpecs;
+          QSize downstream = rl->resolveDownstreamSize(sink->node, emptySpecs);
+          if(!downstream.isEmpty())
+            spec.size = downstream;
+        }
+        bool wantsDepth = rl->requiresDepth(*sink);
+        bool wantsSamplableDepth
+            = (sink->flags & Flag::SamplableDepth) == Flag::SamplableDepth;
+        auto rt = createRenderTarget(
+            rl->state, spec.format, spec.size, rl->samples(),
+            wantsDepth || wantsSamplableDepth, wantsSamplableDepth);
+        rl->m_inputRenderTargets[sink] = std::move(rt);
+      }
+    }
+
+    // Create the output pass on the source renderer.
+    // Allocate a fresh batch, collect `addOutputPass`'s updates, then
+    // either promote it to the RL's initial batch or merge + release.
+    // QRhiResourceUpdateBatch::merge does NOT release the source batch
+    // — without the explicit release() the 64-slot pool exhausts after
+    // enough edges (e.g. when a live-connected shader triggers
+    // createAllMissingPasses over a large scene graph) and the next
+    // nextResourceUpdateBatch() returns null → crash on merge.
+    auto* batch = rl->state.rhi->nextResourceUpdateBatch();
+    if(!batch)
+      continue;
+    renderer->addOutputPass(*rl, edge, *batch);
+
+    if(rl->initialBatch())
+    {
+      rl->initialBatch()->merge(batch);
+      batch->release();
+    }
+    else
+    {
+      rl->setInitialBatch(batch);
+    }
+  }
+}
+
+void Graph::createAllMissingPasses()
+{
+  for(auto* edge : m_edges)
+    createPassForEdgeIfMissing(*edge);
+}
+
+void Graph::updateAllSinkSamplers()
+{
+  for(auto* edge : m_edges)
+    updateSinkSampler(*edge);
+}
+
+void Graph::updateSinkSampler(Edge& edge)
+{
+  Port* sink = edge.sink;
+  if(sink->type != Types::Image)
+    return;
+
+  // GrabsFromSource ports don't have a render target — they need the
+  // upstream's QRhiTexture directly via textureForOutput(). This path
+  // covers cubemaps, 3D textures, AND texture arrays (e.g.
+  // ScenePreprocessor's base_color_array feeding classic_pbr_textured).
+  // Without this, the sink keeps binding emptyTexture (2D, single-layer)
+  // into what the shader expects as sampler2DArray → Vulkan validation
+  // error VUID-vkCmdDrawIndexed-viewType-07752, nothing renders.
+  if((sink->flags & Flag::GrabsFromSource) == Flag::GrabsFromSource)
+  {
+    Port* source = edge.source;
+    if(!source || !source->node)
+      return;
+    for(auto& rl : m_renderers)
+    {
+      auto sink_rn_it = sink->node->renderedNodes.find(rl.get());
+      if(sink_rn_it == sink->node->renderedNodes.end())
+        continue;
+      auto src_rn_it = source->node->renderedNodes.find(rl.get());
+      if(src_rn_it == source->node->renderedNodes.end())
+        continue;
+      if(auto* tex = src_rn_it->second->textureForOutput(*source))
+        sink_rn_it->second->updateInputTexture(*sink, tex);
+    }
+    return;
+  }
+
+  for(auto& rl : m_renderers)
+  {
+    auto sink_rn_it = sink->node->renderedNodes.find(rl.get());
+    if(sink_rn_it == sink->node->renderedNodes.end())
+      continue;
+
+    // For output nodes, the RT comes from the renderer itself
+    if(sink->node == &rl->output)
+    {
+      auto rt = sink_rn_it->second->renderTargetForInput(*sink);
+      if(rt.texture)
+        sink_rn_it->second->updateInputTexture(*sink, rt.texture, rt.depthTexture);
+    }
+    else
+    {
+      // For intermediate nodes, the RT comes from the centralized map
+      auto rt = rl->renderTargetForInputPort(*sink);
+      if(rt.texture)
+        sink_rn_it->second->updateInputTexture(*sink, rt.texture, rt.depthTexture);
+    }
+  }
+}
+
+void Graph::reconcileAllRenderLists()
+{
+  for(auto& rl : m_renderers)
+  {
+    // 1. Re-walk the graph from output to discover all reachable nodes.
+    auto* outputNode = rl->nodes.front();
+    rl->nodes.clear();
+    rl->nodes.push_back(outputNode);
+    graphwalk(rl->nodes);
+
+    // 2. Find nodes that are newly reachable (no renderer yet)
+    //    and nodes that are no longer reachable (have renderer but not in walk).
+    ossia::flat_set<Node*> reachable(rl->nodes.begin(), rl->nodes.end());
+    // Collect all nodes that have renderers for this RL
+    std::vector<Node*> nodesWithRenderers;
+    for(auto* node : m_nodes)
+    {
+      if(node->renderedNodes.find(rl.get()) != node->renderedNodes.end())
+        nodesWithRenderers.push_back(node);
+    }
+
+    // 3. Remove renderers for nodes no longer reachable.
+    for(auto* node : nodesWithRenderers)
+    {
+      if(!reachable.contains(node))
+      {
+        auto rn_it = node->renderedNodes.find(rl.get());
+        if(rn_it != node->renderedNodes.end())
+        {
+          auto* renderer = rn_it->second;
+          BUFTRACE() << "reconcile: releasing unreachable renderer="
+                     << (void*)renderer
+                     << " node_id=" << node->nodeId
+                     << " (any downstream node still referencing this "
+                        "renderer's buffers via process() caches will see "
+                        "stale pointers → ASan target)";
+          renderer->releaseState(*rl);
+          delete renderer;
+          node->renderedNodes.erase(rn_it);
+          node->renderedNodesChanged();
+        }
+      }
+    }
+
+    // 4. Ensure render targets exist for all input ports BEFORE creating
+    //    renderers. initState() → initInputSamplers() looks up the RT
+    //    texture — if the RT doesn't exist yet, the sampler gets emptyTexture
+    //    and the SRB will have wrong bindings.
+    for(auto* node : rl->nodes)
+    {
+      if(node == &rl->output)
+        continue;
+      int cur_port = 0;
+      for(auto* in : node->input)
+      {
+        if(in->type == Types::Image
+           && (in->flags & Flag::GrabsFromSource) != Flag::GrabsFromSource)
+        {
+          if(rl->renderTargetForInputPort(*in).renderTarget == nullptr)
+          {
+            // Create the missing render target
+            auto spec = node->resolveRenderTargetSpecs(cur_port, *rl);
+            if(!node->hasExplicitRenderTargetSize(cur_port))
+            {
+              ossia::small_flat_map<const Port*, RenderTargetSpecs, 16> emptySpecs;
+              QSize downstream = rl->resolveDownstreamSize(node, emptySpecs);
+              if(!downstream.isEmpty())
+                spec.size = downstream;
+            }
+            bool wantsDepth = rl->requiresDepth(*in);
+            bool wantsSamplableDepth
+                = (in->flags & Flag::SamplableDepth) == Flag::SamplableDepth;
+            auto rt = createRenderTarget(
+                rl->state, spec.format, spec.size, rl->samples(),
+                wantsDepth || wantsSamplableDepth, wantsSamplableDepth);
+            rl->m_inputRenderTargets[in] = std::move(rt);
+          }
+        }
+        cur_port++;
+      }
+    }
+
+    // 5. Create renderers for newly-reachable nodes (AFTER render targets
+    //    exist so that initState → initInputSamplers finds the correct textures).
+    QRhiResourceUpdateBatch* batch = rl->state.rhi->nextResourceUpdateBatch();
+    bool batchUsed = false;
+
+    for(auto* node : rl->nodes)
+    {
+      if(node->renderedNodes.find(rl.get()) == node->renderedNodes.end())
+      {
+        if(auto* rn = node->createRenderer(*rl))
+        {
+          rn->nodeId = node->nodeId;
+          node->renderedNodes.emplace(rl.get(), rn);
+          node->renderedNodesChanged();
+
+          // All renderers now implement initState(). Pass creation for
+          // individual edges is handled by createPassForEdgeIfMissing
+          // after reconciliation, ensuring all renderers + RTs exist first.
+          rn->initState(*rl, *batch);
+          rn->checkForChanges();
+          rn->materialChanged = true;
+          rn->geometryChanged = true;
+          rn->renderTargetSpecsChanged = false;
+
+          // Seed downstream consumers with this newly-created renderer's
+          // outputs so live-inserted scene producers (Camera, Environment,
+          // Light) don't need a full stop/restart to take
+          // effect. Default no-op for everything else.
+          rn->seedInitialOutputs(*rl);
+
+          batchUsed = true;
+        }
+      }
+    }
+
+    // 6. Pass creation is now handled entirely by createPassForEdgeIfMissing
+    //    in incrementalEdgeUpdate, after reconciliation completes and all
+    //    renderers + RTs exist. No sweep needed here.
+
+    // 7. Rebuild renderers vector from node order.
+    //    Also sync change indices for ALL renderers (not just newly created)
+    //    to prevent spurious rt_changed on the first render frame.
+    //    Without this, existing renderers whose nodes received process()
+    //    messages (via update_inputs) between reconciliation and rendering
+    //    could have stale indices, triggering a full release+init in the
+    //    rt_changed block — which destroys the feedback ISF's persistent textures.
+    rl->renderers.clear();
+    // Filter nodes to only those with renderers
+    std::vector<score::gfx::Node*> validNodes;
+    validNodes.reserve(rl->nodes.size());
+    for(auto* node : rl->nodes)
+    {
+      auto rn_it = node->renderedNodes.find(rl.get());
+      if(rn_it != node->renderedNodes.end())
+      {
+        validNodes.push_back(node);
+        auto* rn = rn_it->second;
+        rl->renderers.push_back(rn);
+
+        // Sync change indices and prevent spurious rt_changed
+        rn->checkForChanges();
+        rn->renderTargetSpecsChanged = false;
+      }
+    }
+    rl->nodes = std::move(validNodes);
+
+    // 8. Submit batch and notify output. `merge()` copies entries but
+    // does NOT release the source batch, so we have to do it ourselves
+    // — otherwise the 64-slot pool leaks one slot per reconcile.
+    if(batchUsed)
+    {
+      if(rl->initialBatch())
+      {
+        rl->initialBatch()->merge(batch);
+        batch->release();
+      }
+      else
+      {
+        rl->setInitialBatch(batch);
+      }
+    }
+    else
+    {
+      batch->release();
+    }
+
+    rl->output.onRendererChange();
+  }
+}
+
+void Graph::retopologicalSort(RenderList& rl)
+{
+  // Save the output node (always first in the list)
+  auto* outputNode = rl.nodes.front();
+
+  // Clear and re-walk
+  rl.nodes.clear();
+  rl.nodes.push_back(outputNode);
+  graphwalk(rl.nodes);
+
+  // Rebuild renderers vector from the new node order.
+  // Only include nodes that actually have a renderer for this RenderList.
+  // Nodes discovered by the graph walk but without renderers (e.g. just
+  // added to the graph but not yet processed by reconcileAllRenderLists) are excluded
+  // from both lists to prevent the render loop from asserting.
+  rl.renderers.clear();
+  std::vector<score::gfx::Node*> valid_nodes;
+  valid_nodes.reserve(rl.nodes.size());
+  for(auto* node : rl.nodes)
+  {
+    auto it = node->renderedNodes.find(&rl);
+    if(it != node->renderedNodes.end())
+    {
+      valid_nodes.push_back(node);
+      rl.renderers.push_back(it->second);
+    }
+  }
+  rl.nodes = std::move(valid_nodes);
+}
+
 Graph::Graph() { }
 
 Graph::~Graph()
@@ -493,6 +1081,19 @@ Graph::~Graph()
     out->destroyOutput();
   }
 
+  // Belt-and-braces: any OutputNode registered via addNode but not yet
+  // promoted into m_outputs (e.g. preview outputs added via
+  // createSingleRenderList without a subsequent createAllRenderLists)
+  // would otherwise leak its swapchain / RPD on shutdown.
+  for(auto* n : m_nodes)
+  {
+    if(auto* out = dynamic_cast<OutputNode*>(n))
+    {
+      if(!ossia::contains(m_outputs, out))
+        out->destroyOutput();
+    }
+  }
+
   clearEdges();
 }
 
@@ -545,25 +1146,6 @@ void Graph::removeEdge(Port* source, Port* sink)
   }
 }
 
-void Graph::addAndLinkEdge(Port* source, Port* sink, Process::CableType t)
-{
-  addEdge(source, sink, t);
-
-  auto output = dynamic_cast<OutputNode*>(sink->node);
-  SCORE_ASSERT(output);
-
-  recreateOutputRenderList(*output);
-}
-
-void Graph::unlinkAndRemoveEdge(Port* source, Port* sink)
-{
-  removeEdge(source, sink);
-  auto output = dynamic_cast<OutputNode*>(sink->node);
-  SCORE_ASSERT(output);
-
-  recreateOutputRenderList(*output);
-}
-
 void Graph::destroyOutputRenderList(score::gfx::OutputNode& output)
 {
   auto it = ossia::find_if(
@@ -584,7 +1166,6 @@ void Graph::destroyOutputRenderList(score::gfx::OutputNode& output)
     }
     else
     {
-      qDebug("???");
     }
   }
 
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/Graph.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/Graph.hpp
index 6431b0d412..20f202d0ef 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/Graph.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/Graph.hpp
@@ -7,6 +7,10 @@
 #include <ossia/detail/algorithms.hpp>
 
 #include <score_plugin_gfx_export.h>
+namespace Gfx
+{
+class AssetTable;
+}
 namespace score::gfx
 {
 class OutputNode;
@@ -43,15 +47,42 @@ struct SCORE_PLUGIN_GFX_EXPORT Graph
    */
   void removeEdge(Port* source, Port* sink);
 
-  /**
-   * @brief Add an edge between two nodes and creates relevant pipelines.
-   */
-  void addAndLinkEdge(Port* source, Port* sink, Process::CableType t);
-
-  /**
-   * @brief Remove an edge between two nodes and free the pipelines
-   */
-  void unlinkAndRemoveEdge(Port* source, Port* sink);
+  /// Remove a node's renderers from all render lists.
+  void removeNodeFromRenderLists(Node* node);
+
+  /// Incrementally remove a non-output node: notify renderers of each
+  /// edge being removed, delete edges from m_edges, release the node's
+  /// renderers, retopological sort affected render lists, remove from m_nodes.
+  void removeNodeAndEdges(Node* node);
+
+  /// Called when an edge is removed from the graph.
+  ///
+  /// @param preserveSinks Optional set of sink Ports whose input render
+  ///   target should be kept alive even if this edge was their only feed.
+  ///   GfxContext::incrementalEdgeUpdate uses this to bridge the brief
+  ///   "sink has 0 edges" window that appears during a mid-batch filter
+  ///   insertion (A→B removed, A→F and F→B added in the same batch).
+  ///   Without this, B's input RT would be destroyed and immediately
+  ///   re-allocated with the same spec.
+  void
+  onEdgeRemoved(Edge& edge, const ossia::hash_set<const Port*>* preserveSinks = nullptr);
+
+  /// For an added edge, update the sink renderer's input sampler
+  /// to point to the (possibly new) render target texture.
+  void updateSinkSampler(Edge& edge);
+
+  /// Create missing passes and update samplers for ALL edges in ALL render lists.
+  void createAllMissingPasses();
+  void updateAllSinkSamplers();
+
+  /// For an added edge, create the output pass on the source renderer
+  /// if it exists but doesn't already have a pass for this edge.
+  void createPassForEdgeIfMissing(Edge& edge);
+
+  /// After all edges have been added/removed, reconcile all render lists:
+  /// retopological sort, create renderers for newly-reachable nodes,
+  /// create render targets and passes, remove unreachable nodes.
+  void reconcileAllRenderLists();
 
   /**
    * @brief Remove all edges.
@@ -93,7 +124,24 @@ struct SCORE_PLUGIN_GFX_EXPORT Graph
     return m_outputs;
   }
 
+  /**
+   * @brief Inject the session-wide AssetTable (Plan 09 S1).
+   *
+   * GfxContext owns the AssetTable and calls this once at graph
+   * construction. All RenderLists subsequently created by this
+   * Graph receive the pointer via their constructor, so the
+   * preprocessor can hit the content-hash cache when decoding
+   * texture_source / buffer_resource payloads.
+   *
+   * Null is allowed (tests, early teardown) — consumers guard.
+   */
+  void setAssetTable(Gfx::AssetTable* a) noexcept { m_assetTable = a; }
+  Gfx::AssetTable* assetTable() const noexcept { return m_assetTable; }
+
 private:
+  /// Re-run topological sort for a render list and rebuild renderer ordering.
+  void retopologicalSort(RenderList& rl);
+
   void initializeOutput(OutputNode* output, GraphicsApi graphicsApi);
   void createOutputRenderList(OutputNode& output);
   void recreateOutputRenderList(OutputNode& output);
@@ -107,5 +155,9 @@ struct SCORE_PLUGIN_GFX_EXPORT Graph
   std::vector<Edge*> m_edges;
 
   std::vector<OutputNode*> m_outputs;
+
+  // Session-wide decode cache. Non-owning; GfxContext owns the
+  // actual AssetTable. May be null in tests or during teardown.
+  Gfx::AssetTable* m_assetTable{};
 };
 }
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/ISFNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/ISFNode.cpp
index 6a82673425..e7de2544c1 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/ISFNode.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/ISFNode.cpp
@@ -31,7 +31,24 @@ struct isf_input_port_vis
 
   void operator()(const isf::long_input& in) noexcept
   {
-    *reinterpret_cast<int*>(data) = in.def;
+    // Enum mode (VALUES/LABELS set): in.def is the *index* into VALUES, but
+    // the shader and the UI pipeline downstream consume the numeric VALUE at
+    // that index. Look it up here so the initial UBO state matches what the
+    // ComboBox emits after any user interaction. String-valued VALUES fall
+    // back to the index (GLSL can't receive strings).
+    int initial = (int)in.def;
+    if(!in.values.empty())
+    {
+      auto idx = std::min<std::size_t>(in.def, in.values.size() - 1);
+      const auto& v = in.values[idx];
+      if(auto i = ossia::get_if<int64_t>(&v))
+        initial = (int)*i;
+      else if(auto d = ossia::get_if<double>(&v))
+        initial = (int)*d;
+      else
+        initial = (int)idx;
+    }
+    *reinterpret_cast<int*>(data) = initial;
     self.input.push_back(new Port{&self, data, Types::Int, {}});
     data += 4;
     sz += 4;
@@ -105,15 +122,38 @@ struct isf_input_port_vis
 
   void operator()(const isf::image_input& in) noexcept
   {
-    auto flags = in.dimensions == 3 ? Flag::GrabsFromSource : Flag{};
+    // GrabsFromSource = "fetch the QRhiTexture* straight from the upstream
+    // renderer's textureForOutput() instead of allocating our own render
+    // target". Required for:
+    //  - 3D textures (volumes): no render-target path exists for them.
+    //  - Texture arrays: consumers (e.g. classic_pbr_textured sampling a
+    //    per-material base_color_array from ScenePreprocessor) need the
+    //    producer's actual QRhiTexture array, not an empty render-target
+    //    texture created on their side.
+    //  - "STATIC: true" image inputs (shader-author opt-in): the upstream
+    //    is a CPU producer that publishes a long-lived QRhiTexture
+    //    (precomputed LUTs, IBL bakes, asset caches). Without this opt-in
+    //    the consumer would silently allocate an unused render target and
+    //    bind that empty texture instead of the producer's real one,
+    //    making the input read all zeros.
+    auto flags = (in.dimensions == 3 || in.is_array || in.is_static)
+                     ? Flag::GrabsFromSource
+                     : Flag{};
     if(in.depth)
       flags = flags | Flag::SamplableDepth;
+    if(in.is_array)
+      flags = flags | Flag::TextureArray;
+    if(in.dimensions == 3)
+      flags = flags | Flag::ThreeDimensional;
     self.input.push_back(new Port{&self, {}, Types::Image, flags, {}});
   }
 
   void operator()(const isf::cubemap_input& in) noexcept
   {
-    self.input.push_back(new Port{&self, {}, Types::Image, Flag::GrabsFromSource, {}});
+    auto flags = Flag::GrabsFromSource | Flag::Cubemap;
+    if(in.depth)
+      flags = flags | Flag::SamplableDepth;
+    self.input.push_back(new Port{&self, {}, Types::Image, flags, {}});
   }
 
   void operator()(const isf::audio_input& audio) noexcept
@@ -121,6 +161,8 @@ struct isf_input_port_vis
     self.m_audio_textures.push_back({});
     auto& data = self.m_audio_textures.back();
     data.fixedSize = audio.max;
+    data.filter = audio.sampler.filter;
+    data.wrap = audio.sampler.wrap;
     self.input.push_back(new Port{&self, &data, Types::Audio, {}});
   }
 
@@ -130,6 +172,8 @@ struct isf_input_port_vis
     auto& data = self.m_audio_textures.back();
     data.fixedSize = audio.max;
     data.mode = data.Histogram;
+    data.filter = audio.sampler.filter;
+    data.wrap = audio.sampler.wrap;
     self.input.push_back(new Port{&self, &data, Types::Audio, {}});
   }
 
@@ -139,6 +183,8 @@ struct isf_input_port_vis
     auto& data = self.m_audio_textures.back();
     data.fixedSize = audio.max;
     data.mode = AudioTexture::Mode::FFT;
+    data.filter = audio.sampler.filter;
+    data.wrap = audio.sampler.wrap;
     self.input.push_back(new Port{&self, &data, Types::Audio, {}});
   }
 
@@ -149,16 +195,24 @@ struct isf_input_port_vis
     // - read_only: input port
     // - write_only: output port
     // - read_write: output port only, buffer is persistent
+    //
+    // BUFFER_USAGE="indirect_draw[_indexed]": port additionally carries the
+    // IndirectDraw flag so renderers can route it to the indirect-draw
+    // mechanism on MeshBuffers.
+
+    auto extra_flags = Flag{};
+    if(in.buffer_usage == "indirect_draw" || in.buffer_usage == "indirect_draw_indexed")
+      extra_flags = extra_flags | Flag::IndirectDraw;
 
     if(in.access == "read_only")
     {
       // Create input port for read-only storage buffer
-      self.input.push_back(new Port{&self, {}, Types::Buffer, {}});
+      self.input.push_back(new Port{&self, {}, Types::Buffer, extra_flags, {}});
     }
     else if(in.access.contains("write"))
     {
       // Create output port for write-only storage buffer
-      self.output.push_back(new Port{&self, {}, Types::Buffer, {}});
+      self.output.push_back(new Port{&self, {}, Types::Buffer, extra_flags, {}});
 
       // Check for flexible array member
       if(!in.layout.empty())
@@ -172,9 +226,18 @@ struct isf_input_port_vis
     }
   }
 
+  void operator()(const isf::uniform_input& in) noexcept
+  {
+    // Read-only UBO sourced from upstream Buffer port. Renderers bind it via
+    // QRhiShaderResourceBinding::uniformBuffer (not bufferLoad).
+    self.input.push_back(new Port{&self, {}, Types::Buffer, Flag::UniformBuffer, {}});
+  }
+
   void operator()(const isf::texture_input& in) noexcept
   {
-    auto flags = in.dimensions == 3 ? Flag::GrabsFromSource : Flag{};
+    auto flags = in.dimensions == 3
+                     ? (Flag::GrabsFromSource | Flag::ThreeDimensional)
+                     : Flag{};
     self.input.push_back(new Port{&self, {}, Types::Image, flags, {}});
   }
 
@@ -229,7 +292,9 @@ struct isf_input_port_vis
     if(in.access == "read_only")
     {
       // Input port for read-only image; 3D textures use GrabsFromSource
-      auto flags = in.is3D() ? Flag::GrabsFromSource : Flag{};
+      auto flags = in.is3D()
+                       ? (Flag::GrabsFromSource | Flag::ThreeDimensional)
+                       : Flag{};
       self.input.push_back(new Port{&self, {}, Types::Image, flags, {}});
     }
     else if(in.access == "write_only" || in.access == "read_write")
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/ISFNode.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/ISFNode.hpp
index 805208ae0a..aa74aaa621 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/ISFNode.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/ISFNode.hpp
@@ -45,5 +45,32 @@ class ISFNode : public score::gfx::ProcessNode
   std::vector<int*> m_event_ports;
 
   int m_materialSize{};
+
+  // Reset all `event` input ports to 0 so they pulse true for exactly one
+  // frame after the upstream producer writes 1. Called at the end of each
+  // frame's update() — AFTER the material UBO has been staged via
+  // updateDynamicBuffer (which captures the value at call time), so
+  // resetting the CPU memory here doesn't affect what the shader reads
+  // this frame, only what would leak into the next frame if we didn't
+  // reset.
+  //
+  // Returns true if any port was actually firing. Callers should then set
+  // their NodeRenderer::materialChanged flag so the next frame re-uploads
+  // the now-zero event value — otherwise the gate-on-materialChanged
+  // upload path would skip the re-upload and leave the stale 1 in the GPU
+  // UBO indefinitely.
+  [[nodiscard]] bool resetEventPortsAfterFrame() noexcept
+  {
+    bool any_fired = false;
+    for(int* p : m_event_ports)
+    {
+      if(p && *p != 0)
+      {
+        *p = 0;
+        any_fired = true;
+      }
+    }
+    return any_fired;
+  }
 };
 }
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/ISFVisitors.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/ISFVisitors.hpp
index 637269a244..c8e519c70a 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/ISFVisitors.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/ISFVisitors.hpp
@@ -1,8 +1,152 @@
 #pragma once
 #include <isf.hpp>
 
+#include <ossia/detail/variant.hpp>
+
 namespace score::gfx
 {
+// ---------------------------------------------------------------------------
+// Descriptor port walker (diagnostic 097, refactor R3)
+// ---------------------------------------------------------------------------
+//
+// SINGLE source of truth for "how many input ports / output ports / samplers
+// does each desc.inputs entry produce?". Every prior call site (CSF
+// port_indices, RawRaster port_idx, RawRaster bindAuxTexturesInit, ISF
+// IsfBindingsBuilder) had its own copy of this rule — and they had drifted
+// (e.g. CSF over-counted inlets for write-only storage_input without a
+// flex-array sizing field; IsfBindingsBuilder added a phantom inlet for every
+// write-only csf_image_input). Mirrors `isf_input_port_vis` in ISFNode.cpp,
+// which is the actual port-creation code.
+//
+// When a new isf::*_input variant is added, update isf_input_port_vis AND
+// the matching `operator()` here — keep them in lockstep.
+struct port_counts
+{
+  int inlets{};   //!< score input ports created by this desc.inputs entry
+  int outlets{};  //!< score output ports created
+  int samplers{}; //!< sampler slots in initInputSamplers (1 per image-like;
+                  //!< +1 for image_input.depth on a non-GrabsFromSource port)
+
+  port_counts& operator+=(const port_counts& o) noexcept
+  {
+    inlets += o.inlets;
+    outlets += o.outlets;
+    samplers += o.samplers;
+    return *this;
+  }
+};
+
+// Returns the port_counts contributed by a single input variant. Mirrors
+// isf_input_port_vis (ISFNode.cpp) one-to-one.
+struct isf_input_port_count_vis
+{
+  port_counts operator()(const isf::float_input&) const noexcept   { return {1, 0, 0}; }
+  port_counts operator()(const isf::long_input&) const noexcept    { return {1, 0, 0}; }
+  port_counts operator()(const isf::event_input&) const noexcept   { return {1, 0, 0}; }
+  port_counts operator()(const isf::bool_input&) const noexcept    { return {1, 0, 0}; }
+  port_counts operator()(const isf::point2d_input&) const noexcept { return {1, 0, 0}; }
+  port_counts operator()(const isf::point3d_input&) const noexcept { return {1, 0, 0}; }
+  port_counts operator()(const isf::color_input&) const noexcept   { return {1, 0, 0}; }
+  port_counts operator()(const isf::audio_input&) const noexcept    { return {1, 0, 0}; }
+  port_counts operator()(const isf::audioHist_input&) const noexcept{ return {1, 0, 0}; }
+  port_counts operator()(const isf::audioFFT_input&) const noexcept { return {1, 0, 0}; }
+
+  port_counts operator()(const isf::image_input& in) const noexcept
+  {
+    // GrabsFromSource means no own render target → the matching depth sampler
+    // (image_input.depth==true) is also NOT created in initInputSamplers.
+    const bool grabs = (in.dimensions == 3 || in.is_array || in.is_static);
+    const int extra_depth_sampler = (in.depth && !grabs) ? 1 : 0;
+    return {1, 0, 1 + extra_depth_sampler};
+  }
+  port_counts operator()(const isf::cubemap_input&) const noexcept { return {1, 0, 1}; }
+  port_counts operator()(const isf::texture_input&) const noexcept { return {1, 0, 1}; }
+
+  port_counts operator()(const isf::storage_input& in) const noexcept
+  {
+    // read_only: 1 input port (no output, no sampler).
+    // write/read_write: 1 output port; +1 input port if the layout's last
+    //   field is a flexible array (synthesized long_input for sizing).
+    if(in.access == "read_only")
+      return {1, 0, 0};
+    port_counts c{0, 1, 0};
+    if(!in.layout.empty()
+       && in.layout.back().type.find("[]") != std::string::npos)
+      c.inlets = 1;
+    return c;
+  }
+
+  port_counts operator()(const isf::uniform_input&) const noexcept
+  {
+    return {1, 0, 0};
+  }
+
+  port_counts operator()(const isf::csf_image_input& in) const noexcept
+  {
+    // read_only: 1 input port; write/read_write: 1 output port (no input).
+    if(in.access == "read_only")
+      return {1, 0, 0};
+    return {0, 1, 0};
+  }
+
+  port_counts operator()(const isf::geometry_input& in) const noexcept
+  {
+    port_counts c{};
+    if(in.attributes.empty())
+    {
+      // Pass-through: 1 inlet + 1 outlet
+      c.inlets = 1;
+      c.outlets = 1;
+    }
+    else
+    {
+      for(const auto& attr : in.attributes)
+        if(attr.access == "read_only" || attr.access == "read_write")
+        { c.inlets = 1; break; }
+      for(const auto& attr : in.attributes)
+        if(attr.access == "write_only" || attr.access == "read_write")
+        { c.outlets = 1; break; }
+    }
+    // $USER ports → synthesized long_input each (1 inlet)
+    if(in.vertex_count.find("$USER") != std::string::npos)   c.inlets++;
+    if(in.instance_count.find("$USER") != std::string::npos) c.inlets++;
+    for(const auto& aux : in.auxiliary)
+      if(aux.size.find("$USER") != std::string::npos)
+        c.inlets++;
+    return c;
+  }
+};
+
+// Walk desc.inputs once. For each input, the visitor receives:
+//   - the isf::input entry
+//   - the cumulative port_counts BEFORE this input (so cur.inlets is the
+//     index of the first input port this entry creates, if any)
+//   - the per-input port_counts delta (how many ports this entry creates)
+// Cumulative state is then advanced before moving on.
+//
+// Callers needing a non-zero starting offset (e.g. RawRaster's port 0 is
+// the implicit Geometry input) can pass it in `start` — its inlets/outlets
+// are accumulated upfront.
+template <typename F>
+inline void walk_descriptor_inputs(
+    const isf::descriptor& desc, port_counts start, F&& fn)
+{
+  port_counts cur = start;
+  for(const auto& inp : desc.inputs)
+  {
+    port_counts delta = ossia::visit(isf_input_port_count_vis{}, inp.data);
+    fn(inp, cur, delta);
+    cur += delta;
+  }
+}
+
+// Convenience overload: zero starting offset.
+template <typename F>
+inline void walk_descriptor_inputs(const isf::descriptor& desc, F&& fn)
+{
+  walk_descriptor_inputs(desc, port_counts{}, std::forward<F>(fn));
+}
+
 struct isf_input_size_vis
 {
   int sz{};
@@ -55,21 +199,32 @@ struct isf_input_size_vis
   // CSF-specific input handlers
   void operator()(const isf::storage_input& in) noexcept
   {
-    if(in.access.contains("write"))
+    // Must match what isf_input_port_vis (ISFNode.cpp) actually writes into the
+    // blob — and the synthesized "size" int it creates: ONLY a writable buffer
+    // whose layout ends in a flexible-array member. Reserving for every write
+    // buffer over-allocated the UBO (harmless, but desynced from the port
+    // visitor and the generated GLSL Params/material_t block).
+    if(in.access.contains("write") && !in.layout.empty()
+       && in.layout.back().type.find("[]") != std::string::npos)
     {
       (*this)(isf::long_input{});
     }
   }
 
+  void operator()(const isf::uniform_input&) noexcept
+  {
+    // UBO inputs are bound from an upstream Buffer port; they do not
+    // contribute to the material UBO size.
+  }
+
   void operator()(const isf::texture_input in) noexcept { }
 
   void operator()(const isf::csf_image_input& in) noexcept
   {
-    if(in.access.contains("write"))
-    {
-      (*this)(isf::point2d_input{});
-      (*this)(isf::long_input{});
-    }
+    // isf_input_port_vis does NOT write anything into the material blob for
+    // write csf_image inputs (its point2d/long synthesis is commented out), so
+    // reserve nothing here — keep the size visitor and the port visitor (and
+    // hence the generated uniform block) in agreement.
   }
 
   void operator()(const isf::geometry_input& in) noexcept
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/ImageNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/ImageNode.cpp
index d7d6608329..8906d14a3f 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/ImageNode.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/ImageNode.cpp
@@ -9,6 +9,7 @@
 #include <QSvgRenderer>
 #endif
 
+#include <ossia/detail/algorithms.hpp>
 #include <ossia/detail/math.hpp>
 #include <ossia/gfx/port_index.hpp>
 #include <ossia/network/value/value_conversion.hpp>
@@ -207,6 +208,11 @@ void ImagesNode::process(Message&& msg)
 
         case 5: // Images
         {
+          // getImages() acquires every image from Gfx::ImageCache (refcount
+          // bumped per image). Without a matching release on the no-change
+          // branch below, the cache refcount accumulated by one acquire per
+          // re-emit of the same control value — long sessions that re-fed
+          // the same image list every tick bled cache memory until quit.
           auto new_images = Gfx::getImages(*val, this->ctx);
           auto diff = [](const score::gfx::Image& lhs, const score::gfx::Image& rhs) {
             return lhs.path != rhs.path;
@@ -245,6 +251,14 @@ void ImagesNode::process(Message&& msg)
 
             ++this->imagesChanged;
           }
+          else
+          {
+            // Same image set as before — release the freshly-acquired
+            // copy so the cache refcount returns to baseline. Without
+            // this, every re-emit on the same control value bumped
+            // ImageCache::m_refcounts by one per image and never paired.
+            Gfx::releaseImages(new_images);
+          }
           break;
         }
 
@@ -381,13 +395,13 @@ class ImagesNode::PreloadedRenderer : public GenericNodeRenderer
   }
 
   TextureRenderTarget renderTargetForInput(const Port& p) override { return {}; }
-  void init(RenderList& renderer, QRhiResourceUpdateBatch& res) override
+  void initState(RenderList& renderer, QRhiResourceUpdateBatch& res) override
   {
     auto& n = static_cast<const ImagesNode&>(this->node);
     const auto& rs = renderer.state;
-    const Mesh& mesh = renderer.defaultQuad();
+    m_mesh = &renderer.defaultQuad();
 
-    defaultMeshInit(renderer, mesh, res);
+    defaultMeshInit(renderer, *m_mesh, res);
     processUBOInit(renderer);
     m_material.init(renderer, node.input, m_samplers);
 
@@ -398,9 +412,15 @@ class ImagesNode::PreloadedRenderer : public GenericNodeRenderer
     recreateTextures(rhi);
 
     tile = n.tileMode;
+
+    // Compile shaders for the "single" case
     std::tie(m_vertexS, m_fragmentS) = score::gfx::makeShaders(
         rs, images_single_vertex_shader, images_single_fragment_shader);
 
+    // Compile shaders for the "tiled" case
+    std::tie(m_tiledVertexS, m_tiledFragmentS) = score::gfx::makeShaders(
+        rs, images_tiled_vertex_shader, images_tiled_fragment_shader);
+
     // Create the sampler in which we are going to put the texture
     {
       auto sampler = createSampler(tile, rhi);
@@ -408,34 +428,62 @@ class ImagesNode::PreloadedRenderer : public GenericNodeRenderer
       m_samplers.push_back({sampler, tex});
     }
 
-    // Initialize the passes for the "single" case
-    defaultPassesInit(renderer, mesh);
+    m_initialized = true;
+  }
 
-    // Initialize the passes for the "tiled" case
+  void addOutputPass(
+      RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) override
+  {
+    if(!m_mesh)
+      return;
+    if(this->node.output[0]->type != score::gfx::Types::Image)
+      return;
+
+    auto rt = renderer.renderTargetForOutput(edge);
+    if(rt.renderTarget)
     {
-      auto [v, f] = score::gfx::makeShaders(
-          rs, images_tiled_vertex_shader, images_tiled_fragment_shader);
-      for(Edge* edge : this->node.output[0]->edges)
+      // Pass for the "single" case
       {
-        auto rt = renderer.renderTargetForOutput(*edge);
-        if(rt.renderTarget)
-        {
-          m_altPasses.emplace_back(
-              edge, score::gfx::buildPipeline(
-                        renderer, mesh, v, f, rt, m_processUBO, m_material.buffer,
-                        m_samplers));
-        }
+        auto pip = score::gfx::buildPipeline(
+            renderer, *m_mesh, m_vertexS, m_fragmentS, rt, m_processUBO,
+            m_material.buffer, m_samplers);
+        if(pip.pipeline)
+          m_p.emplace_back(&edge, Pass{rt, pip, nullptr});
+      }
+
+      // Pass for the "tiled" case
+      {
+        auto pip = score::gfx::buildPipeline(
+            renderer, *m_mesh, m_tiledVertexS, m_tiledFragmentS, rt, m_processUBO,
+            m_material.buffer, m_samplers);
+        if(pip.pipeline)
+          m_altPasses.emplace_back(&edge, Pass{rt, pip, nullptr});
       }
     }
   }
 
+  void removeOutputPass(RenderList& renderer, Edge& edge) override
+  {
+    // Remove from the single passes
+    GenericNodeRenderer::removeOutputPass(renderer, edge);
+
+    // Remove from the tiled passes
+    auto it
+        = ossia::find_if(m_altPasses, [&](const auto& p) { return p.first == &edge; });
+    if(it != m_altPasses.end())
+    {
+      it->second.release();
+      m_altPasses.erase(it);
+    }
+  }
+
   void update(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) override
   {
     auto& n = (static_cast<const ImagesNode&>(this->node));
     if(n.tileMode != tile)
     {
       tile = n.tileMode;
-      auto [s, tex] = m_samplers[0];
+      auto [s, tex, fb_] = m_samplers[0];
       m_samplers.clear();
 
       // Create a new sampler
@@ -445,7 +493,7 @@ class ImagesNode::PreloadedRenderer : public GenericNodeRenderer
       // Replace it in the render passes
       auto replace_sampler = [](PassMap& passes, QRhiSampler* oldS, QRhiSampler* newS) {
         for(auto& pass : passes)
-          score::gfx::replaceSampler(*pass.second.srb, oldS, newS);
+          score::gfx::replaceSampler(*pass.second.p.srb, oldS, newS);
       };
 
       replace_sampler(m_p, s, new_sampler);
@@ -539,7 +587,7 @@ class ImagesNode::PreloadedRenderer : public GenericNodeRenderer
       auto replace_texture
           = [](PassMap& passes, QRhiSampler* sampler, QRhiTexture* tex) {
         for(auto& pass : passes)
-          score::gfx::replaceTexture(*pass.second.srb, sampler, tex);
+          score::gfx::replaceTexture(*pass.second.p.srb, sampler, tex);
       };
 
       currentImageIndex = imageIndex(n.ubo.currentImageIndex, m_textures.size());
@@ -639,6 +687,7 @@ class ImagesNode::PreloadedRenderer : public GenericNodeRenderer
       {
         res.updateDynamicBuffer(m_material.buffer, 0, m_material.size, &m_ubo);
       }
+      materialChanged = false;
     }
   }
 
@@ -651,7 +700,7 @@ class ImagesNode::PreloadedRenderer : public GenericNodeRenderer
       defaultRenderPass(renderer, mesh, cb, edge, m_altPasses);
   }
 
-  void release(RenderList& r) override
+  void releaseState(RenderList& r) override
   {
     for(auto tex : m_textures)
     {
@@ -659,17 +708,17 @@ class ImagesNode::PreloadedRenderer : public GenericNodeRenderer
     }
     m_textures.clear();
 
-    defaultRelease(r);
+    for(auto& pass : m_altPasses)
+      pass.second.release();
+    m_altPasses.clear();
 
-    {
-      for(auto& pass : m_altPasses)
-        pass.second.release();
-      m_altPasses.clear();
-    }
+    GenericNodeRenderer::releaseState(r);
   }
 
   struct ImagesNode::UBO m_ubo;
-  ossia::small_vector<std::pair<Edge*, Pipeline>, 2> m_altPasses;
+  QShader m_tiledVertexS;
+  QShader m_tiledFragmentS;
+  ossia::small_vector<std::pair<Edge*, Pass>, 2> m_altPasses;
   std::vector<QRhiTexture*> m_textures;
   bool m_uploaded = false;
 };
@@ -755,9 +804,9 @@ class ImagesNode::OnTheFlyRenderer : public GenericNodeRenderer
         if(rt.renderTarget)
         {
           m_altPasses.emplace_back(
-              edge, score::gfx::buildPipeline(
+              edge, Pass{rt, score::gfx::buildPipeline(
                         renderer, mesh, v, f, rt, m_processUBO, m_material.buffer,
-                        m_samplers));
+                        m_samplers), nullptr});
         }
       }
     }
@@ -770,7 +819,7 @@ class ImagesNode::OnTheFlyRenderer : public GenericNodeRenderer
     if(n.tileMode != tile)
     {
       tile = n.tileMode;
-      auto [s, tex] = m_samplers[0];
+      auto [s, tex, fb_] = m_samplers[0];
 
       m_samplers.clear();
 
@@ -781,7 +830,7 @@ class ImagesNode::OnTheFlyRenderer : public GenericNodeRenderer
       // Replace it in the render passes
       auto replace_sampler = [](PassMap& passes, QRhiSampler* oldS, QRhiSampler* newS) {
         for(auto& pass : passes)
-          score::gfx::replaceSampler(*pass.second.srb, oldS, newS);
+          score::gfx::replaceSampler(*pass.second.p.srb, oldS, newS);
       };
 
       replace_sampler(m_p, s, new_sampler);
@@ -803,7 +852,7 @@ class ImagesNode::OnTheFlyRenderer : public GenericNodeRenderer
       auto replace_texture
           = [](PassMap& passes, QRhiSampler* sampler, QRhiTexture* tex) {
         for(auto& pass : passes)
-          score::gfx::replaceTexture(*pass.second.srb, sampler, tex);
+          score::gfx::replaceTexture(*pass.second.p.srb, sampler, tex);
       };
 
       auto sampler = m_samplers[0].sampler;
@@ -854,7 +903,7 @@ class ImagesNode::OnTheFlyRenderer : public GenericNodeRenderer
   }
 
   struct ImagesNode::UBO m_prev_ubo;
-  ossia::small_vector<std::pair<Edge*, Pipeline>, 2> m_altPasses;
+  ossia::small_vector<std::pair<Edge*, Pass>, 2> m_altPasses;
   QRhiTexture* m_texture{};
   bool m_uploaded = false;
 };
@@ -929,10 +978,10 @@ class FullScreenImageNode::Renderer : public GenericNodeRenderer
   ~Renderer() { }
 
   TextureRenderTarget renderTargetForInput(const Port& p) override { return {}; }
-  void init(RenderList& renderer, QRhiResourceUpdateBatch& res) override
+  void initState(RenderList& renderer, QRhiResourceUpdateBatch& res) override
   {
-    const auto& mesh = renderer.defaultTriangle();
-    defaultMeshInit(renderer, mesh, res);
+    m_mesh = &renderer.defaultTriangle();
+    defaultMeshInit(renderer, *m_mesh, res);
     processUBOInit(renderer);
     m_material.init(renderer, node.input, m_samplers);
     std::tie(m_vertexS, m_fragmentS) = score::gfx::makeShaders(
@@ -962,7 +1011,7 @@ class FullScreenImageNode::Renderer : public GenericNodeRenderer
       m_samplers.push_back({sampler, m_texture});
     }
 
-    defaultPassesInit(renderer, mesh);
+    m_initialized = true;
   }
 
   void update(RenderList& renderer, QRhiResourceUpdateBatch& res, score::gfx::Edge* edge)
@@ -985,12 +1034,15 @@ class FullScreenImageNode::Renderer : public GenericNodeRenderer
     defaultRenderPass(renderer, mesh, cb, edge);
   }
 
-  void release(RenderList& r) override
+  void releaseState(RenderList& r) override
   {
-    m_texture->deleteLater();
-    m_texture = nullptr;
+    if(m_texture)
+    {
+      m_texture->deleteLater();
+      m_texture = nullptr;
+    }
 
-    defaultRelease(r);
+    GenericNodeRenderer::releaseState(r);
   }
 
   QRhiTexture* m_texture{};
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/IsfBindingsBuilder.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/IsfBindingsBuilder.cpp
new file mode 100644
index 0000000000..40f882e04c
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/IsfBindingsBuilder.cpp
@@ -0,0 +1,1016 @@
+#include "IsfBindingsBuilder.hpp"
+
+#include <Gfx/Graph/ISFVisitors.hpp>
+#include <Gfx/Graph/Node.hpp>
+#include <Gfx/Graph/NodeRenderer.hpp>
+#include <Gfx/Graph/RenderList.hpp>
+#include <Gfx/Graph/RhiClearBuffer.hpp>
+
+#include <score/tools/Debug.hpp>
+
+namespace score::gfx
+{
+
+// Centralized GLSL type → size table; see header comment for conventions.
+int64_t glslTypeSizeBytes(std::string_view type) noexcept
+{
+  if(type == "float" || type == "int" || type == "uint" || type == "bool")
+    return 4;
+  if(type == "vec2" || type == "ivec2" || type == "uvec2")
+    return 8;
+  if(type == "vec3" || type == "ivec3" || type == "uvec3")
+    return 12;
+  if(type == "vec4" || type == "ivec4" || type == "uvec4")
+    return 16;
+  if(type == "mat2")
+    return 16;
+  if(type == "mat3")
+    return 48;
+  if(type == "mat4")
+    return 64;
+  return 16;
+}
+
+int64_t std430ArrayStride(std::string_view type) noexcept
+{
+  // std430 keeps the vec4-aligned base alignment for vec3 array elements,
+  // so the per-element stride is 16 (4 bytes of trailing padding). Every
+  // other primitive shrinks to its packed size in std430.
+  if(type == "vec3" || type == "ivec3" || type == "uvec3")
+    return 16;
+  return glslTypeSizeBytes(type);
+}
+
+}
+
+namespace score::gfx
+{
+
+int64_t std430LayoutSize(
+    const std::vector<isf::storage_input::layout_field>& layout) noexcept
+{
+  int64_t sz = 0;
+  for(const auto& f : layout)
+  {
+    auto type = f.type;
+    int64_t count = 1;
+    auto lbr = type.find('[');
+    if(lbr != std::string::npos)
+    {
+      auto rbr = type.find(']', lbr + 1);
+      if(rbr != std::string::npos && rbr > lbr + 1)
+      {
+        auto inner = type.substr(lbr + 1, rbr - lbr - 1);
+        if(!inner.empty())
+        {
+          try { count = std::stoll(inner); } catch(...) { count = 1; }
+        }
+        // else: empty '[]' means runtime-length — counted as 1 element for
+        // sizing the fixed part of the struct; the renderer sizes the buffer
+        // based on actual data.
+      }
+      type = type.substr(0, lbr);
+    }
+    int64_t element = glslTypeSizeBytes(type);
+    // std430: elements align to 16 bytes for vec3/mat arrays; keep it simple
+    // and align each field to 16 bytes to match the CSF renderer's convention.
+    element = (element + 15) & ~15;
+    sz += element * count;
+  }
+  if(sz == 0)
+    sz = 16;
+  return sz;
+}
+
+int64_t glslTypeSizeBytes(std::string_view type, const isf::descriptor& d) noexcept
+{
+  // Built-in primitives go through the authoritative size table.
+  if(type == "float" || type == "int" || type == "uint" || type == "bool")
+    return 4;
+  if(type == "vec2" || type == "ivec2" || type == "uvec2")
+    return 8;
+  if(type == "vec3" || type == "ivec3" || type == "uvec3")
+    return 12;
+  if(type == "vec4" || type == "ivec4" || type == "uvec4")
+    return 16;
+  if(type == "mat2") return 16;
+  if(type == "mat3") return 48;
+  if(type == "mat4") return 64;
+
+  // User-defined struct from the descriptor's TYPES section. We sum
+  // each field's natural size (no per-field 16-byte padding) so the
+  // result matches the actual GLSL std430 size of the emitted struct
+  // for scalar/vector-only layouts. This is what producers compare
+  // against when binding a struct-typed ATTRIBUTE (the AUXILIARY path
+  // uses `std430LayoutSize` instead, which over-pads each field for
+  // legacy reasons). For mixed-alignment layouts the producer should
+  // populate `element_byte_size` explicitly; the runtime trusts that
+  // value over this estimate.
+  for(const auto& tdef : d.types)
+  {
+    if(tdef.name != type)
+      continue;
+    int64_t sz = 0;
+    for(const auto& f : tdef.layout)
+    {
+      auto fty = f.type;
+      int64_t count = 1;
+      auto lbr = fty.find('[');
+      if(lbr != std::string::npos)
+      {
+        auto rbr = fty.find(']', lbr + 1);
+        if(rbr != std::string::npos && rbr > lbr + 1)
+        {
+          auto inner = fty.substr(lbr + 1, rbr - lbr - 1);
+          if(!inner.empty())
+          {
+            try { count = std::stoll(inner); } catch(...) { count = 1; }
+          }
+        }
+        fty = fty.substr(0, lbr);
+      }
+      sz += glslTypeSizeBytes(fty) * count;
+    }
+    return sz > 0 ? sz : 16;
+  }
+
+  // Unknown — match the lenient default of the no-descriptor overload.
+  return 16;
+}
+
+int64_t std430ArrayStride(std::string_view type, const isf::descriptor& d) noexcept
+{
+  // Only built-in vec3 needs the std430 padding promotion; user-defined
+  // structs already pad their fields at declaration time and their array
+  // stride is just the struct's std430 size.
+  if(type == "vec3" || type == "ivec3" || type == "uvec3")
+    return 16;
+  return glslTypeSizeBytes(type, d);
+}
+
+}
+
+namespace
+{
+// Internal alias for the existing AUXILIARY size sites that imported the old
+// name from this translation unit; defer to the public helper.
+inline int64_t isf_ssbo_elem_size(
+    const std::vector<isf::storage_input::layout_field>& layout) noexcept
+{
+  return score::gfx::std430LayoutSize(layout);
+}
+}
+
+namespace score::gfx
+{
+
+QRhiShaderResourceBinding::StageFlags visibilityToStages(std::string_view v) noexcept
+{
+  using Stage = QRhiShaderResourceBinding;
+  if(v == "fragment")
+    return Stage::FragmentStage;
+  if(v == "vertex")
+    return Stage::VertexStage;
+  if(v == "vertex+fragment" || v == "both" || v == "graphics" || v == "all")
+    return Stage::VertexStage | Stage::FragmentStage;
+  if(v == "compute")
+    return Stage::ComputeStage;
+  if(v == "none")
+    return {};
+  // Default fallback: fragment visibility (matches the default in isf.hpp).
+  return Stage::FragmentStage;
+}
+
+void collectGraphicsStorageResources(
+    const isf::descriptor& desc, int firstBinding, GraphicsStorageResources& out)
+{
+  out.ssbos.clear();
+  out.images.clear();
+  out.indirectDrawBuffer = nullptr;
+  out.indirectDrawIndexed = false;
+  out.indirectDrawSsboIndex = -1;
+
+  int binding = firstBinding;
+
+  // walk_descriptor_inputs() advances port_idx in lockstep with
+  // isf_input_port_vis (ISFNode.cpp / ISFVisitors.hpp). Pre-refactor, this
+  // function had its own bookkeeping that did `port_idx++` for every
+  // desc.inputs entry — wrong for write-only storage_input (no input port
+  // unless flex-array sizing) and for write-only csf_image_input (no
+  // input port at all). Now port_idx == cur.inlets, which matches the
+  // actual ports created by ISFNode.
+  walk_descriptor_inputs(
+      desc, [&](const isf::input& inp, const port_counts& cur, const port_counts&) {
+        const int port_idx = cur.inlets;
+        if(auto* s = ossia::get_if<isf::storage_input>(&inp.data))
+        {
+          // Indirect-draw argument buffers don't need a shader-visible binding
+          // (the GPU reads them via cb.drawIndirect), but we still track them to
+          // refresh pointers from upstream ports.
+          if(!s->buffer_usage.empty())
+          {
+            GraphicsSSBO e;
+            e.name = inp.name;
+            e.access = s->access;
+            e.buffer_usage = s->buffer_usage;
+            e.persistent = false;
+            e.owned = false; // Pointer comes from upstream
+            e.layout = s->layout;
+            e.stages = QRhiShaderResourceBinding::StageFlags{}; // No shader binding
+            e.binding = -1;
+            // Only read-only indirect-draw buffers come from an upstream
+            // input port; write variants are produced by an output port.
+            e.input_port_index = (s->access == "read_only") ? port_idx : -1;
+            out.ssbos.push_back(std::move(e));
+            out.indirectDrawSsboIndex = (int)out.ssbos.size() - 1;
+            out.indirectDrawIndexed = (s->buffer_usage == "indirect_draw_indexed");
+            return;
+          }
+          auto stages = visibilityToStages(s->visibility);
+          if(stages == QRhiShaderResourceBinding::StageFlags{})
+            return;
+          GraphicsSSBO e;
+          e.name = inp.name;
+          e.access = s->access;
+          e.persistent = s->persistent;
+          e.owned = true;
+          e.size = isf_ssbo_elem_size(s->layout);
+          e.layout = s->layout;
+          e.stages = stages;
+          e.binding = binding++;
+          // Only read-only storage_inputs have a matching input port; write
+          // variants put the buffer on an OUTPUT port (no upstream rebind).
+          e.input_port_index = (s->access == "read_only") ? port_idx : -1;
+          if(s->persistent)
+            e.prev_binding = binding++;
+          out.ssbos.push_back(std::move(e));
+        }
+        else if(auto* img = ossia::get_if<isf::csf_image_input>(&inp.data))
+        {
+          auto stages = visibilityToStages(img->visibility);
+          if(stages == QRhiShaderResourceBinding::StageFlags{}
+             || stages == QRhiShaderResourceBinding::ComputeStage)
+            return;
+          GraphicsStorageImage e;
+          e.name = inp.name;
+          e.access = img->access;
+          e.format = img->format;
+          e.is3D = img->is3D();
+          // Cubemap / array shape flags must propagate from the parser to
+          // the runtime allocator AND to the GLSL emit; otherwise the
+          // descriptor type bound at SRB-create disagrees with the GLSL
+          // declaration (parser accepts CUBEMAP / IS_ARRAY at isf.cpp:1411
+          // / :1426 but earlier versions of this collector kept only is3D,
+          // forcing the allocator into a flat 2D texture and the emit into
+          // `image2D`, triggering Vulkan VUID-VkGraphicsPipelineCreateInfo-
+          // layout-07990 at pipeline build).
+          e.cubemap = img->isCube();
+          e.is_array = img->is_array;
+          e.persistent = img->persistent;
+          if(e.is3D && !img->depth_expression.empty())
+          {
+            try
+            {
+              e.depth = std::stoi(img->depth_expression);
+            }
+            catch(...)
+            {
+              // Non-literal expression (e.g. "$DEPTH"): leave 0, use default at alloc time
+            }
+          }
+          if(e.is_array && !img->layers_expression.empty())
+          {
+            try
+            {
+              e.layers = std::stoi(img->layers_expression);
+            }
+            catch(...)
+            {
+              // Non-literal expression (e.g. "$LAYERS"): leave 0; allocator picks default
+            }
+          }
+          e.owned = true;
+          e.stages = stages;
+          e.binding = binding++;
+          // Only read-only csf_image_inputs have a matching input port.
+          e.input_port_index = (img->access == "read_only") ? port_idx : -1;
+          if(img->persistent)
+            e.prev_binding = binding++;
+          out.images.push_back(std::move(e));
+        }
+        else if(auto* uni = ossia::get_if<isf::uniform_input>(&inp.data))
+        {
+          auto stages = visibilityToStages(uni->visibility);
+          if(stages == QRhiShaderResourceBinding::StageFlags{}
+             || stages == QRhiShaderResourceBinding::ComputeStage)
+            return;
+          GraphicsUBO e;
+          e.name = inp.name;
+          e.owned = false; // sourced from upstream port each frame
+          e.stages = stages;
+          e.binding = binding++;
+          e.input_port_index = port_idx;
+          out.ubos.push_back(std::move(e));
+        }
+      });
+}
+
+// --- SSBO allocation ------------------------------------------------------
+
+static QRhiBuffer* allocateSsbo(
+    QRhi& rhi, const std::string& name, const std::string& buffer_usage,
+    int64_t size)
+{
+  QRhiBuffer::UsageFlags flags = QRhiBuffer::StorageBuffer;
+#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0)
+  if(buffer_usage == "indirect_draw" || buffer_usage == "indirect_draw_indexed")
+    flags = flags | QRhiBuffer::IndirectBuffer;
+#else
+  (void)buffer_usage;
+#endif
+  auto* buf = rhi.newBuffer(QRhiBuffer::Static, flags, size);
+  buf->setName(QByteArray("ISF_SSBO_") + name.c_str());
+  if(!buf->create())
+  {
+    qWarning() << "Failed to create SSBO" << name.c_str();
+    delete buf;
+    return nullptr;
+  }
+  return buf;
+}
+
+static QRhiTexture::Format parseImageFormat(const std::string& fmt)
+{
+  std::string f = fmt;
+  for(auto& c : f) c = (char)std::tolower((unsigned char)c);
+  if(f == "rgba8")   return QRhiTexture::RGBA8;
+  if(f == "bgra8")   return QRhiTexture::BGRA8;
+  if(f == "r8")      return QRhiTexture::R8;
+  if(f == "rg8")     return QRhiTexture::RG8;
+  if(f == "r16")     return QRhiTexture::R16;
+  if(f == "rg16")    return QRhiTexture::RG16;
+  if(f == "r16f")    return QRhiTexture::R16F;
+  if(f == "r32f")    return QRhiTexture::R32F;
+//  if(f == "rg16f")   return QRhiTexture::RG16F;
+//  if(f == "rg32f")   return QRhiTexture::RG32F;
+  if(f == "rgba16f") return QRhiTexture::RGBA16F;
+  if(f == "rgba32f") return QRhiTexture::RGBA32F;
+
+  // Integer storage image formats — required for atomic image ops
+  // (imageAtomicOr / Add / Min / Max / Exchange / CompareExchange).
+  // Aliasing an integer SPIR-V OpTypeImage Format operand onto a float
+  // QRhiTexture::Format violates VUID-RuntimeSpirv-OpTypeImage-07752
+  // and VUID-RuntimeSpirv-OpImageWrite-04469 (numeric-class mismatch
+  // between Sampled operand and the bound storage image's format).
+  // Mirror RenderedCSFNode.cpp's pattern: gate on Qt 6.10+ (when
+  // QRhiTexture exposed R{8,32}{UI,SI} and {RG,RGBA}{32}{UI,SI}).
+#if QT_VERSION >= QT_VERSION_CHECK(6, 10, 0)
+  if(f == "r8ui")                   return QRhiTexture::R8UI;
+  if(f == "r32ui")                  return QRhiTexture::R32UI;
+  if(f == "rg32ui")                 return QRhiTexture::RG32UI;
+  if(f == "rgba32ui")               return QRhiTexture::RGBA32UI;
+  if(f == "r8si" || f == "r8i")     return QRhiTexture::R8SI;
+  if(f == "r32si" || f == "r32i")   return QRhiTexture::R32SI;
+  if(f == "rg32si")                 return QRhiTexture::RG32SI;
+  if(f == "rgba32si")               return QRhiTexture::RGBA32SI;
+#endif
+  // r16ui / r16si / r16i are not exposed by QRhiTexture::Format at all,
+  // and on older Qt the wider integer formats are also absent. Refuse
+  // the binding rather than silently aliasing onto a float/UNORM format
+  // — atomics and integer image ops have undefined behaviour otherwise.
+  if(f == "r8ui"   || f == "r8si"  || f == "r8i"
+     || f == "r16ui" || f == "r16si" || f == "r16i"
+     || f == "r32ui" || f == "r32si" || f == "r32i"
+     || f == "rg32ui" || f == "rg32si"
+     || f == "rgba32ui" || f == "rgba32si")
+  {
+    qWarning() << "ISF storage image format" << fmt.c_str()
+               << "requires Qt 6.10+ integer formats; refusing binding "
+                  "(VUID-RuntimeSpirv-OpTypeImage-07752).";
+    return QRhiTexture::UnknownFormat;
+  }
+  return QRhiTexture::RGBA8;
+}
+
+// Sentinel zero-buffer used when an upstream SSBO/UBO port disconnects
+// mid-session. Vulkan requires every SRB binding to point at a valid
+// resource — without a sentinel, a disconnect leaves the binding
+// pointing at a deleteLater'd QRhiBuffer (the prior upstream's, freed
+// when its owning node was destroyed), and the next setShaderResources
+// dereferences the freed pointer.
+//
+// 64 KiB is generous for any realistic UBO/SSBO layout that a graphics
+// shader actually reads from (Vulkan UBO max is at least 16 KiB; SSBOs
+// can be larger but disconnect-fallback reads return zeros regardless of
+// the buffer's actual size, only its descriptor validity matters). One
+// buffer covers both SSBO and UBO disconnects since QRhi accepts both
+// usage flags on a single buffer; the descriptor type is set on the
+// SRB binding side, not the buffer side.
+static constexpr uint32_t kSentinelBufferSize = 64u * 1024u;
+
+// Allocate (and zero-fill) the sentinel disconnect-fallback buffer.
+// Called from ensureStorageResources so the resource-update batch is in
+// hand. Idempotent — store.sentinelBuffer is non-null after first call.
+static void ensureSentinelBuffer(
+    QRhi& rhi, QRhiResourceUpdateBatch& res, GraphicsStorageResources& store)
+{
+  if(store.sentinelBuffer)
+    return;
+  auto* buf = rhi.newBuffer(
+      QRhiBuffer::Static,
+      QRhiBuffer::StorageBuffer | QRhiBuffer::UniformBuffer,
+      kSentinelBufferSize);
+  buf->setName("ISF_SentinelDisconnectBuffer");
+  if(!buf->create())
+  {
+    qWarning() << "Failed to create sentinel disconnect buffer";
+    delete buf;
+    return;
+  }
+  // Zero-fill so disconnected SSBO/UBO reads return predictable zeros
+  // rather than uninitialised memory.
+  static const std::vector<char> zeros(kSentinelBufferSize, 0);
+  res.uploadStaticBuffer(buf, 0, kSentinelBufferSize, zeros.data());
+  store.sentinelBuffer = buf;
+  store.sentinelSize = kSentinelBufferSize;
+}
+
+void ensureStorageResources(
+    QRhi& rhi, QRhiResourceUpdateBatch& res, const RenderList& renderer,
+    const isf::descriptor& /*desc*/, GraphicsStorageResources& store,
+    QSize renderSize)
+{
+  // Sentinel disconnect-fallback buffer: only allocate when the node has
+  // at least one upstream-bound SSBO or UBO. ensureSentinelBuffer is
+  // idempotent, so subsequent calls (per-frame ensure passes) are
+  // no-ops once the sentinel exists. Allocating here (rather than
+  // lazily inside bindUpstreamBuffers) lets us fold the zero-fill upload
+  // into the same resource-update batch as the rest of the storage
+  // initialisation, instead of needing a per-call res in the bind path.
+  bool needsSentinel = false;
+  for(const auto& s : store.ssbos)
+    if(s.input_port_index >= 0) { needsSentinel = true; break; }
+  if(!needsSentinel)
+    for(const auto& u : store.ubos)
+      if(u.input_port_index >= 0) { needsSentinel = true; break; }
+  if(needsSentinel)
+    ensureSentinelBuffer(rhi, res, store);
+  // SSBOs
+  for(auto& e : store.ssbos)
+  {
+    // owned==false: buffer comes from upstream, nothing to allocate here.
+    // size derived from layout when persistent; otherwise the user sets
+    // it externally (typically matching upstream geometry).
+    if(!e.owned)
+      continue;
+    int64_t target_size = e.size > 0 ? e.size : 16;
+    if(!e.buffer)
+    {
+      e.buffer = allocateSsbo(rhi, e.name, e.buffer_usage, target_size);
+      // Zero-fill the placeholder. Vulkan does NOT initialise VkBuffer
+      // memory; on a fresh RenderList the new placeholder lands on a
+      // device-memory page with whatever the previous owner left there.
+      // For shader inputs that have no producer in the user's graph
+      // (e.g. cluster_light_counts / cluster_light_lists when no
+      // clustered-lighting compute pass is wired) this placeholder IS
+      // the buffer the shader reads from — and the read returns
+      // device-memory garbage (e.g. a huge cluster_light_count value
+      // makes openpbr's light loop iterate thousands of slots, each
+      // returning garbage indices into scene_lights → wildly different
+      // colours per resize). Mirrors the sentinel-buffer zero-fill at
+      // line 432.
+      if(e.buffer)
+        RhiClearBuffer::clearBuffer(
+            rhi, res, e.buffer, 0, (quint32)target_size);
+    }
+    if(e.persistent && !e.prev)
+    {
+      e.prev = allocateSsbo(rhi, e.name + "_prev", "", target_size);
+      if(e.prev)
+        RhiClearBuffer::clearBuffer(
+            rhi, res, e.prev, 0, (quint32)target_size);
+    }
+  }
+
+  // Uniform buffers (UBOs sourced from upstream Buffer ports). The upstream's
+  // real buffer is swapped in at runtime by bindUpstreamBuffers — but we need
+  // a valid placeholder allocated here so the SRB binding slot exists at
+  // pipeline-build time. Without it, Vulkan complains about an invalid
+  // descriptor for binding N when the shader reads `camera`.
+  for(auto& e : store.ubos)
+  {
+    if(e.buffer)  // already borrowed from upstream, or previously allocated
+      continue;
+    // 256 bytes covers the camera UBO (240 B) and most other small UBOs.
+    // If the upstream provides a larger buffer we'll replace this at bind time.
+    auto* buf = rhi.newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, 256);
+    buf->setName(QByteArray("ISF_UBO_placeholder_") + e.name.c_str());
+    if(!buf->create())
+    {
+      qWarning() << "Failed to create placeholder UBO" << e.name.c_str();
+      delete buf;
+      continue;
+    }
+    // Zero-fill the placeholder. Same Vulkan-doesn't-zero-VkBuffers
+    // rationale as the SSBO placeholder above. UBOs have a smaller
+    // attack surface (256 B) but a single garbage value here can flip
+    // a feature bit in scene_counts or fog params, producing the
+    // openpbr-only intermittent lighting glitch on resize.
+    RhiClearBuffer::clearBuffer(rhi, res, buf, 0, 256u);
+    e.buffer = buf;
+    e.owned = true;  // we own this placeholder; bindUpstreamBuffers drops ownership when it swaps.
+  }
+
+  // Storage images. Allocator must honor every shape flag the parser
+  // captured (is3D / cubemap / is_array) so the bound texture matches the
+  // GLSL declaration emitted by isf_emit_image_decl. Cube + array combos
+  // are rejected at parse time; this code therefore picks one shape via
+  // priority order: cubemap > 3D > array > 2D.
+  for(auto& e : store.images)
+  {
+    if(!e.owned)
+      continue;
+
+    QSize sz = renderSize.isValid() ? renderSize : QSize(256, 256);
+    QRhiTexture::Format fmt = parseImageFormat(e.format);
+    if(fmt == QRhiTexture::UnknownFormat)
+      continue; // parseImageFormat already warned
+    QRhiTexture::Flags flags = QRhiTexture::UsedWithLoadStore;
+    if(e.is3D)
+      flags |= QRhiTexture::ThreeDimensional;
+    if(e.cubemap)
+      flags |= QRhiTexture::CubeMap;
+    if(e.is_array)
+      flags |= QRhiTexture::TextureArray;
+
+    // Cubes use the size-only newTexture overload; QRhi infers face_count=6
+    // from the CubeMap flag. width must equal height (cube face is square)
+    // — we size both axes to the smaller of renderSize for safety.
+    if(e.cubemap)
+    {
+      const int edge = std::min(sz.width(), sz.height());
+      sz = QSize(edge, edge);
+    }
+    const int arrayLayers = e.layers > 0 ? e.layers : 4; // matches doc default
+
+    auto make_tex = [&](const char* suffix) -> QRhiTexture* {
+      QRhiTexture* t = nullptr;
+      if(e.cubemap)
+        t = rhi.newTexture(fmt, sz, 1, flags);
+      else if(e.is3D)
+        t = rhi.newTexture(
+            fmt, sz.width(), sz.height(),
+            e.depth > 0 ? e.depth : 16, 1, flags);
+      else if(e.is_array)
+        t = rhi.newTextureArray(fmt, arrayLayers, sz, 1, flags);
+      else
+        t = rhi.newTexture(fmt, sz, 1, flags);
+      t->setName(
+          QByteArray("ISF_StorageImage_") + e.name.c_str() + suffix);
+      if(!t->create())
+      {
+        qWarning() << "Failed to create storage image" << e.name.c_str() << suffix;
+        delete t;
+        return nullptr;
+      }
+      return t;
+    };
+
+    if(!e.texture)
+      e.texture = make_tex("");
+    if(e.persistent && !e.prev)
+      e.prev = make_tex("_prev");
+  }
+}
+
+QVarLengthArray<QRhiShaderResourceBinding, 8> buildExtraBindings(
+    const GraphicsStorageResources& store)
+{
+  QVarLengthArray<QRhiShaderResourceBinding, 8> out;
+
+  for(const auto& e : store.ssbos)
+  {
+    if(!e.buffer || e.binding < 0)
+      continue;
+
+    const auto stages = e.stages;
+    if(stages == QRhiShaderResourceBinding::StageFlags{})
+      continue;
+
+    if(e.access == "read_only")
+    {
+      out.append(QRhiShaderResourceBinding::bufferLoad(e.binding, stages, e.buffer));
+    }
+    else if(e.access == "write_only")
+    {
+      out.append(QRhiShaderResourceBinding::bufferStore(e.binding, stages, e.buffer));
+    }
+    else
+    {
+      out.append(QRhiShaderResourceBinding::bufferLoadStore(e.binding, stages, e.buffer));
+    }
+
+    if(e.persistent && e.prev && e.prev_binding >= 0)
+    {
+      out.append(
+          QRhiShaderResourceBinding::bufferLoad(e.prev_binding, stages, e.prev));
+    }
+  }
+
+  for(const auto& e : store.images)
+  {
+    if(!e.texture || e.binding < 0)
+      continue;
+    const auto stages = e.stages;
+    if(stages == QRhiShaderResourceBinding::StageFlags{})
+      continue;
+
+    if(e.access == "read_only")
+      out.append(QRhiShaderResourceBinding::imageLoad(e.binding, stages, e.texture, 0));
+    else if(e.access == "write_only")
+      out.append(QRhiShaderResourceBinding::imageStore(e.binding, stages, e.texture, 0));
+    else
+      out.append(QRhiShaderResourceBinding::imageLoadStore(e.binding, stages, e.texture, 0));
+
+    if(e.persistent && e.prev && e.prev_binding >= 0)
+    {
+      out.append(
+          QRhiShaderResourceBinding::imageLoad(e.prev_binding, stages, e.prev, 0));
+    }
+  }
+
+  for(const auto& e : store.ubos)
+  {
+    if(!e.buffer || e.binding < 0)
+      continue;
+    const auto stages = e.stages;
+    if(stages == QRhiShaderResourceBinding::StageFlags{})
+      continue;
+    out.append(QRhiShaderResourceBinding::uniformBuffer(e.binding, stages, e.buffer));
+  }
+
+  return out;
+}
+
+void bindUpstreamBuffers(
+    RenderList& renderer, const std::vector<Port*>& inputPorts,
+    GraphicsStorageResources& store,
+    QRhiShaderResourceBindings* srb)
+{
+  // Upstream renderers (halp-based nodes like ExtractBuffer2, RenderedCSFNode,
+  // ScenePreprocessorNode aux extractors, ...) publish their output buffer via
+  // the virtual NodeRenderer::bufferForOutput() — never by writing
+  // Port::value. RenderList::bufferForInput(edge) is the right lookup: it
+  // resolves the source node's renderer and calls bufferForOutput on it.
+  auto fetchUpstream = [&](Port* port) -> QRhiBuffer* {
+    for(Edge* edge : port->edges)
+    {
+      if(!edge || !edge->source)
+        continue;
+      if(edge->source->type != Types::Buffer)
+        continue;
+      if(auto view = renderer.bufferForInput(*edge); view.handle)
+        return view.handle;
+    }
+    return nullptr;
+  };
+  // For each SSBO that has an input_port_index and is either read-only or an
+  // indirect-draw buffer, try to fetch the buffer from the upstream port.
+  for(auto& e : store.ssbos)
+  {
+    if(e.input_port_index < 0)
+      continue;
+    if(e.input_port_index >= (int)inputPorts.size())
+      continue;
+
+    Port* port = inputPorts[e.input_port_index];
+    if(!port)
+      continue;
+
+    // Only ports of Type::Buffer carry SSBO pointers.
+    if(port->type != Types::Buffer)
+      continue;
+
+    if(auto* buf = fetchUpstream(port))
+    {
+      if(buf == e.buffer)
+        continue; // unchanged — nothing to do
+
+      if(!e.owned)
+      {
+        e.buffer = buf;
+        if(srb && e.binding >= 0)
+          replaceBuffer(*srb, e.binding, buf);
+      }
+      else if(e.access == "read_only")
+      {
+        if(e.owned && e.buffer)
+          e.buffer->deleteLater();
+        e.owned = false;
+        e.buffer = buf;
+        if(srb && e.binding >= 0)
+          replaceBuffer(*srb, e.binding, buf);
+      }
+    }
+    else if(!e.owned && store.sentinelBuffer && !port->edges.empty())
+    {
+      // Disconnect: we were borrowing an upstream buffer (!e.owned), the
+      // user had wired the port (port->edges non-empty), and the upstream
+      // is now gone (fetchUpstream returned nullptr). The prior upstream's
+      // QRhiBuffer was deleteLater'd when its node tore down, so the SRB
+      // binding now points at a dangling pointer. Adopt the sentinel
+      // zero-buffer so reads return zeros and the descriptor remains
+      // valid (Vulkan validation requires a live resource at every
+      // binding slot). Stays !owned — sentinel lifetime is owned by
+      // GraphicsStorageResources::release().
+      //
+      // The port->edges.empty() guard is critical for entries that are
+      // bound from the upstream geometry's auxiliary_buffers list (the
+      // pattern ScenePreprocessor uses for scene_lights / world_transforms
+      // / per_draws / scene_materials / scene_counts / scene_light_indices
+      // / camera UBO / env UBO into flattened-scene shaders). Those have
+      // input_port_index >= 0 but no port edges — bindUpstreamBuffersFrom-
+      // Geometry restores the binding immediately after this function.
+      // Without the guard, the sentinel temporarily clobbered them and
+      // (worse) flipped their state in a way that confused subsequent
+      // frames.
+      if(e.buffer != store.sentinelBuffer)
+      {
+        e.buffer = store.sentinelBuffer;
+        if(srb && e.binding >= 0)
+          replaceBuffer(*srb, e.binding, store.sentinelBuffer);
+      }
+    }
+  }
+
+  // UBOs: borrow the upstream buffer when one is published on the Buffer port.
+  // If the SRB is provided, patch its binding to point at the new buffer so
+  // the draw call binds the right descriptor. A per-frame "placeholder" UBO
+  // was allocated in ensureStorageResources so the binding slot exists even
+  // when no upstream is connected.
+  bool ubo_srb_changed = false;
+  for(auto& e : store.ubos)
+  {
+    if(e.input_port_index < 0)
+      continue;
+    if(e.input_port_index >= (int)inputPorts.size())
+      continue;
+    Port* port = inputPorts[e.input_port_index];
+    if(!port || port->type != Types::Buffer)
+      continue;
+    QRhiBuffer* found = fetchUpstream(port);
+    if(found == e.buffer)
+      continue;  // unchanged — nothing to do
+
+    if(found)
+    {
+      // An upstream is now providing a different buffer than what's currently
+      // bound. Drop any placeholder we owned and retarget the binding.
+      if(e.owned && e.buffer)
+        e.buffer->deleteLater();
+      e.owned = false;
+      e.buffer = found;
+
+      if(srb && e.binding >= 0)
+      {
+        replaceBuffer(*srb, e.binding, found);
+        ubo_srb_changed = true;
+      }
+    }
+    else if(!e.owned && store.sentinelBuffer && !port->edges.empty())
+    {
+      // Disconnect path mirroring the SSBO loop above: the upstream UBO
+      // went away (e.g. its producer node was deleted), and we were
+      // borrowing its buffer. Bind the sentinel so the SRB descriptor
+      // stays valid; reads return predictable zeros. Note that any
+      // owned placeholder allocated in ensureStorageResources is kept
+      // — we don't destroy it here, since the next reconnect will adopt
+      // the new upstream and we'd just have to re-create the
+      // placeholder. The sentinel takeover is transient.
+      //
+      // The port->edges.empty() guard mirrors the SSBO branch above:
+      // entries bound via the geometry name-match path (the camera UBO
+      // and env UBO from ScenePreprocessor) have no port edges; the
+      // sentinel must not fire for them — bindUpstreamBuffersFrom-
+      // Geometry restores them immediately after this function returns.
+      if(e.buffer != store.sentinelBuffer)
+      {
+        e.buffer = store.sentinelBuffer;
+        if(srb && e.binding >= 0)
+        {
+          replaceBuffer(*srb, e.binding, store.sentinelBuffer);
+          ubo_srb_changed = true;
+        }
+      }
+    }
+  }
+  // No trailing srb->create() — replaceBuffer() now uses the
+  // updateResources() fast path, which already rebuilds the backend
+  // descriptor set. Re-creating here would tear down the pool slot
+  // we just refreshed.
+  (void)ubo_srb_changed;
+}
+
+void bindUpstreamImagesFromGeometry(
+    GraphicsStorageResources& store, const ossia::geometry& geometry,
+    QRhiShaderResourceBindings* srb)
+{
+  // Symmetric to bindUpstreamBuffers' read-only SSBO branch, but for
+  // storage images. When a downstream csf_image_input is read_only and the
+  // upstream geometry publishes a storage image with the same name on its
+  // auxiliary_textures list (e.g. an upstream CSF or RawRaster wrote to it
+  // via csf_image_input ACCESS:write_only / read_write), swap our
+  // texture pointer to the upstream's published handle and free the
+  // auto-allocated placeholder.
+  //
+  // Without this, every read_only csf_image_input INPUTS reads from its
+  // OWN zero-initialised texture instead of the upstream's actual contents
+  // — silently broken. The downstream typically wants imageLoad on the
+  // upstream's writes (e.g. tile-render output sampled by a composite FS
+  // via imageLoad rather than texture()).
+  for(auto& e : store.images)
+  {
+    // Only read_only entries can adopt an upstream texture. write_only and
+    // read_write own their textures (the CSF / RawRaster IS the producer).
+    if(e.access != "read_only")
+      continue;
+    if(e.binding < 0)
+      continue;
+
+    const auto* aux = geometry.find_auxiliary_texture(e.name);
+    if(!aux)
+      continue; // No upstream publishing this name — keep placeholder.
+    auto* upstream_tex = static_cast<QRhiTexture*>(aux->native_handle);
+    if(!upstream_tex)
+      continue;
+
+    // Swap the underlying texture pointer when it actually changed —
+    // first time the upstream connects, or whenever the producer
+    // reallocates (resize, format change, …). Drop the auto-allocated
+    // placeholder we owned, adopt the upstream handle. Mark non-owned
+    // so later release() / persistent swap don't touch the upstream's
+    // lifetime.
+    if(upstream_tex != e.texture)
+    {
+      if(e.owned && e.texture)
+        e.texture->deleteLater();
+      e.owned = false;
+      e.texture = upstream_tex;
+    }
+
+    // Patch the SRB unconditionally when provided. Lets a multi-pass /
+    // multi-SRB caller invoke this helper once per SRB without
+    // re-running the upstream lookup (the early-out above guarantees
+    // idempotence). Pairs with the m_passes-per-pass loop in
+    // RenderedRawRasterPipelineNode::update.
+    if(srb)
+      replaceTexture(*srb, e.binding, e.texture);
+  }
+}
+
+void bindUpstreamBuffersFromGeometry(
+    QRhi& rhi, QRhiResourceUpdateBatch& res,
+    GraphicsStorageResources& store, const ossia::geometry& geometry,
+    QRhiShaderResourceBindings* srb)
+{
+  // SSBO/UBO sibling of bindUpstreamImagesFromGeometry. INPUTS-declared
+  // storage_input / uniform_input may carry the upstream buffer either via
+  // a dedicated Buffer port edge (handled by bindUpstreamBuffers) OR
+  // name-matched against the upstream geometry's auxiliary_buffers list
+  // — exactly the pattern ScenePreprocessor uses to publish scene_lights /
+  // world_transforms / per_draws / scene_materials / scene_counts /
+  // scene_light_indices / camera UBO / env UBO into a flattened scene
+  // shader (classic_pbr et al.).
+  //
+  // Without this name-match path, those bindings stayed at the 16-byte
+  // placeholder ensureStorageResources allocates for owned SSBOs:
+  // vertices read pd.transform_slot from a zero PerDraw, multiply by a
+  // zero world_transforms[0] matrix, collapse to origin → black scene.
+  //
+  // `geometry` is already a single ossia::geometry (the caller — typically
+  // RenderedRawRasterPipelineNode — unwraps from geometry.meshes->meshes[0]
+  // at the call site). Same convention as bindUpstreamImagesFromGeometry.
+  const auto& mesh = geometry;
+
+  // Look up the GPU/CPU buffer behind a named aux on the geometry.
+  // Returns {handle, byte_size, owned?} — owned means we just allocated +
+  // uploaded a CPU buffer (caller must release the prior owned handle).
+  struct ResolvedBuffer
+  {
+    QRhiBuffer* handle{};
+    int64_t byte_size{0};
+    bool owned{false};
+  };
+  auto resolve_aux = [&](const std::string& name, bool is_uniform) -> ResolvedBuffer {
+    auto* geo_aux = mesh.find_auxiliary(name);
+    if(!geo_aux || geo_aux->buffer < 0
+       || geo_aux->buffer >= (int)mesh.buffers.size())
+      return {};
+    const auto& geo_buf = mesh.buffers[geo_aux->buffer];
+    if(auto* gpu = ossia::get_if<ossia::geometry::gpu_buffer>(&geo_buf.data))
+    {
+      if(!gpu->handle)
+        return {};
+      return {static_cast<QRhiBuffer*>(gpu->handle),
+              geo_aux->byte_size > 0 ? geo_aux->byte_size : gpu->byte_size,
+              false};
+    }
+    else if(auto* cpu = ossia::get_if<ossia::geometry::cpu_buffer>(&geo_buf.data))
+    {
+      if(!cpu->raw_data || cpu->byte_size <= 0)
+        return {};
+      const int64_t sz
+          = geo_aux->byte_size > 0 ? geo_aux->byte_size : cpu->byte_size;
+      const auto usage
+          = is_uniform ? QRhiBuffer::UniformBuffer : QRhiBuffer::StorageBuffer;
+      auto* buf = rhi.newBuffer(QRhiBuffer::Immutable, usage, sz);
+      buf->setName(QByteArray("ISF_aux_geom_") + name.c_str());
+      if(!buf->create())
+      {
+        delete buf;
+        return {};
+      }
+      res.uploadStaticBuffer(buf, 0, sz, cpu->raw_data.get());
+      return {buf, sz, true};
+    }
+    return {};
+  };
+
+  for(auto& e : store.ssbos)
+  {
+    if(e.binding < 0)
+      continue;
+    // Indirect-draw SSBOs carry no shader binding; handled elsewhere.
+    if(!e.buffer_usage.empty())
+      continue;
+    auto resolved = resolve_aux(e.name, /*is_uniform=*/false);
+    if(!resolved.handle || resolved.handle == e.buffer)
+      continue;
+    // Drop the prior owned placeholder (or prior owned CPU upload) before
+    // adopting the new handle.
+    if(e.owned && e.buffer)
+      e.buffer->deleteLater();
+    e.buffer = resolved.handle;
+    e.size = resolved.byte_size;
+    e.owned = resolved.owned;
+    if(srb)
+      replaceBuffer(*srb, e.binding, e.buffer);
+  }
+
+  for(auto& e : store.ubos)
+  {
+    if(e.binding < 0)
+      continue;
+    auto resolved = resolve_aux(e.name, /*is_uniform=*/true);
+    if(!resolved.handle || resolved.handle == e.buffer)
+      continue;
+    if(e.owned && e.buffer)
+      e.buffer->deleteLater();
+    e.buffer = resolved.handle;
+    e.owned = resolved.owned;
+    if(srb)
+      replaceBuffer(*srb, e.binding, e.buffer);
+  }
+}
+
+void swapPersistentSSBOsState(GraphicsStorageResources& store)
+{
+  for(auto& e : store.ssbos)
+    if(e.persistent && e.buffer && e.prev)
+      std::swap(e.buffer, e.prev);
+  for(auto& e : store.images)
+    if(e.persistent && e.texture && e.prev)
+      std::swap(e.texture, e.prev);
+}
+
+void reapplyStorageBindings(
+    const GraphicsStorageResources& store, QRhiShaderResourceBindings& srb)
+{
+  for(const auto& e : store.ssbos)
+  {
+    if(!e.persistent || !e.buffer || !e.prev)
+      continue;
+    replaceBuffer(srb, e.binding, e.buffer);
+    replaceBuffer(srb, e.prev_binding, e.prev);
+  }
+  for(const auto& e : store.images)
+  {
+    if(!e.persistent || !e.texture || !e.prev)
+      continue;
+    replaceTexture(srb, e.binding, e.texture);
+    replaceTexture(srb, e.prev_binding, e.prev);
+  }
+  // No trailing srb.create() — the replace*() helpers use updateResources()
+  // which already refreshes the backend descriptor state. A create() here
+  // would re-allocate the descriptor set pool slot and defeat the
+  // fast-path swap (qrhivulkan.cpp:8707, updateResources).
+}
+
+void swapPersistentSSBOs(
+    GraphicsStorageResources& store, QRhiShaderResourceBindings& srb)
+{
+  swapPersistentSSBOsState(store);
+  reapplyStorageBindings(store, srb);
+}
+
+}
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/IsfBindingsBuilder.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/IsfBindingsBuilder.hpp
new file mode 100644
index 0000000000..b933d4ebc7
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/IsfBindingsBuilder.hpp
@@ -0,0 +1,423 @@
+#pragma once
+
+// Shared infrastructure for binding `storage_input` and `csf_image_input`
+// declarations into a graphics pipeline's shader resource bindings.
+//
+// Mirrors the pattern established by RenderedCSFNode (for compute) but wired
+// to Vertex|Fragment stages for ISF / Raw Raster Pipeline / Scene Pass nodes.
+
+#include <Gfx/Graph/Utils.hpp>
+#include <isf.hpp>
+
+#include <QtGui/private/qrhi_p.h>
+
+#include <score_plugin_gfx_export.h>
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+#include <vector>
+
+namespace score::gfx
+{
+
+/**
+ * @brief One SSBO attached to a graphics pipeline.
+ *
+ * Covers:
+ *  - user-declared storage_input's (shader-visible read-only / read-write SSBOs)
+ *  - persistent ping-pong pairs (current + previous frame)
+ *  - indirect-draw argument buffers (BUFFER_USAGE="indirect_draw")
+ *  - auxiliary buffers that travel with the geometry (forwarded from upstream)
+ */
+struct GraphicsSSBO
+{
+  std::string name;        //!< Base GLSL identifier (e.g. "particles")
+  std::string access;      //!< "read_only" / "write_only" / "read_write"
+  std::string buffer_usage;//!< "", "indirect_draw", "indirect_draw_indexed"
+  bool persistent{false};  //!< Ping-pong swapped every frame
+  bool owned{true};        //!< This SSBO owns `buffer` and `prev` (releases them)
+  int64_t size{0};         //!< Buffer size in bytes (0 = auto from layout)
+
+  // Layout fields (for size computation + validation). May be empty for auxiliaries.
+  std::vector<isf::storage_input::layout_field> layout;
+
+  // Buffer handles. `buffer` is the currently-written slot (R/W for persistent).
+  // `prev` is only set when persistent — holds the previous frame's data (R/O).
+  QRhiBuffer* buffer{};
+  QRhiBuffer* prev{};
+
+  // Resolved SRB binding slots.
+  int binding{-1};       //!< Binding of `buffer`
+  int prev_binding{-1};  //!< Binding of `prev` (only set when persistent)
+
+  // Stages that see this binding (fragment / vertex / both).
+  QRhiShaderResourceBinding::StageFlags stages{};
+
+  // Optional: indices into the Node's input/output port vectors. -1 = not
+  // connected to a port (e.g. private aux buffer or persistent-only).
+  int input_port_index{-1};
+  int output_port_index{-1};
+};
+
+/**
+ * @brief One storage image attached to a graphics pipeline.
+ */
+struct GraphicsStorageImage
+{
+  std::string name;
+  std::string access; //!< "read_only" / "write_only" / "read_write"
+  std::string format; //!< e.g. "rgba8", "r32f", "r32ui"
+  bool is3D{false};
+  bool cubemap{false};    //!< imageCube — 6-layer cubemap storage image
+  bool is_array{false};   //!< image2DArray — N-layer array texture
+  bool persistent{false}; //!< Ping-pong two textures swapped every frame
+  int depth{0};   //!< Explicit Z dimension for 3D textures; 0 = use default (16)
+  int layers{0}; //!< Layer count for is_array (0 = use parser-supplied default)
+
+  QRhiTexture* texture{}; //!< Current (write / read_write) slot
+  QRhiTexture* prev{};    //!< Previous frame (read-only); only set when persistent
+  bool owned{true};
+
+  int binding{-1};
+  int prev_binding{-1};   //!< Binding of `prev` (only set when persistent)
+  QRhiShaderResourceBinding::StageFlags stages{};
+
+  int input_port_index{-1};
+  int output_port_index{-1};
+};
+
+/**
+ * @brief One UBO sourced from an upstream Buffer port (uniform_input).
+ *
+ * Bound via QRhiShaderResourceBinding::uniformBuffer (std140) rather than
+ * the SSBO bufferLoad/bufferStore used for storage_input.
+ */
+struct GraphicsUBO
+{
+  std::string name;
+  QRhiBuffer* buffer{};
+  bool owned{false};      //!< Always false for now: borrowed from upstream.
+  int binding{-1};
+  QRhiShaderResourceBinding::StageFlags stages{};
+  int input_port_index{-1};
+};
+
+/**
+ * @brief Aggregate of all graphics-visible storage resources for a node.
+ */
+struct GraphicsStorageResources
+{
+  std::vector<GraphicsSSBO> ssbos;
+  std::vector<GraphicsStorageImage> images;
+  std::vector<GraphicsUBO> ubos;
+
+  // Quick aliases: first SSBO with BUFFER_USAGE="indirect_draw*". Populated
+  // by collectGraphicsStorageResources. Updated by callers when the underlying
+  // SSBO's buffer pointer changes (e.g. when an upstream CSF rebuilds it).
+  QRhiBuffer* indirectDrawBuffer{};
+  bool indirectDrawIndexed{false};
+  int indirectDrawSsboIndex{-1};
+
+  // Sentinel zero-buffer bound when an SSBO/UBO upstream port disconnects
+  // mid-session. QRhi (especially Vulkan) requires every SRB binding to
+  // point at a valid resource — without a sentinel, a disconnect leaves
+  // the binding pointing at a dangling QRhiBuffer* (the prior upstream's
+  // buffer, which was deleteLater'd when the upstream node was destroyed).
+  // Lazily allocated on first disconnect, sized to the largest binding
+  // observed (kSentinelSize). Single buffer reused for both SSBO and UBO
+  // disconnects since the descriptor type is set on the SRB binding side,
+  // not the buffer side; QRhi accepts a buffer with both StorageBuffer and
+  // UniformBuffer usage flags. owned=true; freed in release().
+  QRhiBuffer* sentinelBuffer{};
+  uint32_t sentinelSize{0};
+
+  void release()
+  {
+    for(auto& s : ssbos)
+    {
+      if(s.owned)
+      {
+        if(s.buffer) s.buffer->deleteLater();
+        if(s.prev)   s.prev->deleteLater();
+      }
+      s.buffer = nullptr;
+      s.prev = nullptr;
+    }
+    ssbos.clear();
+
+    for(auto& i : images)
+    {
+      if(i.owned)
+      {
+        if(i.texture) i.texture->deleteLater();
+        if(i.prev) i.prev->deleteLater();
+      }
+      i.texture = nullptr;
+      i.prev = nullptr;
+    }
+    images.clear();
+
+    for(auto& u : ubos)
+    {
+      if(u.owned && u.buffer)
+        u.buffer->deleteLater();
+      u.buffer = nullptr;
+    }
+    ubos.clear();
+
+    if(sentinelBuffer)
+    {
+      sentinelBuffer->deleteLater();
+      sentinelBuffer = nullptr;
+    }
+    sentinelSize = 0;
+
+    indirectDrawBuffer = nullptr;
+    indirectDrawSsboIndex = -1;
+  }
+};
+
+// --- API ------------------------------------------------------------------
+
+/**
+ * @brief Walk desc.inputs once and populate `out` with the storage buffers
+ *        and images declared by the shader.
+ *
+ * Bindings are assigned sequentially starting from `firstBinding`. Persistent
+ * SSBOs consume TWO consecutive bindings.
+ *
+ * No GPU resources are allocated here — call ensureStorageResources() later.
+ */
+SCORE_PLUGIN_GFX_EXPORT
+void collectGraphicsStorageResources(
+    const isf::descriptor& desc, int firstBinding, GraphicsStorageResources& out);
+
+/**
+ * @brief Create missing buffers and textures.
+ *
+ * Safe to call every frame — idempotent. Resizes buffers when they don't match
+ * the current layout. For persistent SSBOs, allocates both the current and
+ * prev buffers. For indirect-draw buffers, adds the IndirectBuffer usage flag.
+ */
+SCORE_PLUGIN_GFX_EXPORT
+void ensureStorageResources(
+    QRhi& rhi, QRhiResourceUpdateBatch& res, const RenderList& renderer,
+    const isf::descriptor& desc, GraphicsStorageResources& store,
+    QSize renderSize);
+
+/**
+ * @brief Produce the QRhiShaderResourceBinding list for the graphics pipeline.
+ *
+ * Call this from inside addOutputPass() after buildPipeline() has been set up.
+ * The result is concatenated with the standard bindings (sampler, material,
+ * processUBO, etc.) via the `additionalBindings` span in createDefaultBindings.
+ */
+SCORE_PLUGIN_GFX_EXPORT
+QVarLengthArray<QRhiShaderResourceBinding, 8> buildExtraBindings(
+    const GraphicsStorageResources& store);
+
+/**
+ * @brief Wire read-only SSBOs to upstream geometry buffers.
+ *
+ * When a storage_input is declared as `read_only` AND the upstream node
+ * supplies a buffer on the port, the binding is rewired to point at the
+ * upstream's QRhiBuffer (no allocation needed). Called each frame to track
+ * port changes.
+ */
+SCORE_PLUGIN_GFX_EXPORT
+void bindUpstreamBuffers(
+    RenderList& renderer, const std::vector<Port*>& inputPorts,
+    GraphicsStorageResources& store,
+    QRhiShaderResourceBindings* srb = nullptr);
+
+/**
+ * @brief Swap current/prev for all persistent SSBOs and storage images,
+ *        then update the SRB.
+ *
+ * Call at end of frame, after all passes have run. Symmetric to the existing
+ * texture ping-pong in RenderedISFNode (the `swap(passes, altPasses)` at
+ * RenderedISFNode.cpp:782).
+ */
+SCORE_PLUGIN_GFX_EXPORT
+void swapPersistentSSBOs(
+    GraphicsStorageResources& store, QRhiShaderResourceBindings& srb);
+
+/**
+ * @brief Swap current/prev pointers in `store` without touching any SRB.
+ *
+ * Used by multi-pass / multi-SRB renderers that need to apply the same
+ * post-swap state to many descriptor sets: call this once per frame, then
+ * call reapplyStorageBindings on every affected SRB. Calling
+ * swapPersistentSSBOs per-SRB would double-swap and cancel out.
+ */
+SCORE_PLUGIN_GFX_EXPORT
+void swapPersistentSSBOsState(GraphicsStorageResources& store);
+
+/**
+ * @brief Re-apply the current persistent-storage state to a single SRB.
+ *
+ * Pairs with swapPersistentSSBOsState: after swapping `store` once, call
+ * this on every SRB that references the persistent bindings so the
+ * descriptor set matches the new pointers. Uses replaceBuffer's
+ * updateResources() fast path — no srb->create() rebuild — to avoid
+ * thrashing the SRB pool slot every frame on a static scene (the
+ * cf4b7d6f5 / diag-211 fix removed the trailing create() that earlier
+ * versions of this function called).
+ */
+SCORE_PLUGIN_GFX_EXPORT
+void reapplyStorageBindings(
+    const GraphicsStorageResources& store, QRhiShaderResourceBindings& srb);
+
+/**
+ * @brief Wire read-only csf_image_input storage images to an upstream
+ *        geometry's published auxiliary_textures.
+ *
+ * Symmetric to `bindUpstreamBuffers` for SSBOs: when a csf_image_input is
+ * declared `read_only` AND the upstream geometry publishes a storage image
+ * with the same name (e.g. an upstream CSF wrote to it via image_input
+ * with `write_only`/`read_write`), this swaps the storage image's texture
+ * pointer to the upstream's published handle and frees the auto-allocated
+ * placeholder we created in `ensureStorageResources`.
+ *
+ * Without this, every read_only csf_image_input INPUTS in a downstream
+ * RawRaster / ISF stage reads from its OWN zero-initialised texture instead
+ * of the upstream's actual contents — silently broken.
+ *
+ * Called per-frame; idempotent. When `srb` is non-null, patches the binding
+ * in-place via `replaceTexture`. The lookup is purely by name match against
+ * `geometry.auxiliary_textures` (the same name-match pattern used by
+ * RawRaster's `m_auxTextureSamplers` rebind path).
+ */
+SCORE_PLUGIN_GFX_EXPORT
+void bindUpstreamImagesFromGeometry(
+    GraphicsStorageResources& store, const ossia::geometry& geometry,
+    QRhiShaderResourceBindings* srb = nullptr);
+
+/**
+ * @brief Wire INPUTS storage_input / uniform_input bindings to upstream
+ *        geometry's published auxiliary_buffers list (name-match).
+ *
+ * SSBO/UBO sibling of `bindUpstreamImagesFromGeometry`. ScenePreprocessor
+ * publishes scene_lights / world_transforms / per_draws / scene_materials /
+ * scene_counts / scene_light_indices / camera UBO / env UBO as named aux
+ * buffers travelling along the geometry edge — flattened-scene shaders
+ * (classic_pbr et al.) declare matching INPUTS storage_input/uniform_input
+ * blocks and the runtime resolves them by name.
+ *
+ * Without this, INPUTS storage_input/uniform_input that go through the
+ * m_storage path stay at the 16-byte placeholder allocated by
+ * `ensureStorageResources` for owned SSBOs — vertices read a zero
+ * PerDraw, multiply by a zero world_transforms matrix, and collapse to
+ * origin. (Indirect-draw storage_inputs are skipped — they have no shader
+ * binding.)
+ *
+ * For CPU-backed aux buffers a fresh QRhiBuffer is allocated and the data
+ * uploaded immediately into `res`; the entry's `owned` flag is updated so
+ * `release()` cleans up correctly. For GPU-backed aux buffers we just
+ * adopt the upstream handle (`owned=false`).
+ *
+ * Patches the SRB in-place when a target SRB is provided; idempotent so
+ * multi-SRB callers can invoke once per SRB without re-running the lookup.
+ */
+SCORE_PLUGIN_GFX_EXPORT
+void bindUpstreamBuffersFromGeometry(
+    QRhi& rhi, QRhiResourceUpdateBatch& res,
+    GraphicsStorageResources& store, const ossia::geometry& geometry,
+    QRhiShaderResourceBindings* srb = nullptr);
+
+/**
+ * @brief Decode an isf::storage_input::visibility string to Qt RHI stage flags.
+ *
+ * "fragment" → FragmentStage
+ * "vertex" → VertexStage
+ * "vertex+fragment" / "both" / "graphics" → Vertex | Fragment
+ * "compute" → ComputeStage
+ * "none" → 0
+ */
+SCORE_PLUGIN_GFX_EXPORT
+QRhiShaderResourceBinding::StageFlags visibilityToStages(std::string_view visibility) noexcept;
+
+/**
+ * @brief Byte size of a single GLSL primitive type as used for SSBO element
+ *        strides in this codebase.
+ *
+ * Coverage: scalars (`float`, `int`, `uint`, `bool`), vectors (`vec[234]`,
+ * `ivec[234]`, `uvec[234]`), and matrices (`mat2`, `mat3`, `mat4`). Sampler /
+ * image / opaque types are not covered (return the fallback). Returns 16 as a
+ * fallback for unknown / unsupported types.
+ *
+ * Conventions:
+ *  - Returns 12 for `vec3`/`ivec3`/`uvec3` (the bare component size). Consumers
+ *    that need std140 / std430 array stride must align to 16 themselves; for
+ *    that case prefer `std430ArrayStride` below, which encapsulates the rule
+ *    and keeps the two domains (bare type size vs. stride-in-SSBO) from
+ *    drifting at call sites. ISF auxiliary layouts continue to align at the
+ *    field level via `std430LayoutSize`.
+ *  - `mat2` is reported as 16 (two `vec2` columns, no per-column padding).
+ *  - `mat3` is reported as 48 (three `vec4`-padded columns); this matches both
+ *    std140 and std430 column-major layout for `mat3` in storage blocks.
+ *  - `mat4` is reported as 64.
+ *
+ * This is the single source of truth for GLSL type → element size in
+ * `score-plugin-gfx`; do not introduce private copies (see diagnostic 095).
+ *
+ * Note: For the vertex-attribute format → byte-size mapping
+ * (`ossia::geometry::attribute` enum), see the unrelated helper inside
+ * `RenderedCSFNode.cpp`; it operates on a different domain (binary attribute
+ * formats, not GLSL type strings).
+ */
+SCORE_PLUGIN_GFX_EXPORT
+int64_t glslTypeSizeBytes(std::string_view type) noexcept;
+
+/**
+ * @brief Same as glslTypeSizeBytes, but resolves user-defined types from
+ * the descriptor's TYPES section. Falls back to the built-in size table
+ * for primitives, then to descriptor.types lookup for struct names. The
+ * std430 size of a struct is the sum of its fields' sizes, each rounded
+ * up to a 16-byte boundary (matching the array-of-struct alignment rule
+ * already used by `std430LayoutSize` for AUXILIARY blocks). Returns 16
+ * (the lenient default) for unresolved names.
+ */
+SCORE_PLUGIN_GFX_EXPORT
+int64_t glslTypeSizeBytes(std::string_view type, const isf::descriptor& d) noexcept;
+
+/**
+ * @brief Compute the std430 element size of a layout (vector of
+ * `{name,type}` field entries), each field rounded up to 16 bytes per
+ * the array-of-struct alignment rule. Used by AUXILIARY blocks and by
+ * the user-defined struct lookup in glslTypeSizeBytes.
+ */
+SCORE_PLUGIN_GFX_EXPORT
+int64_t std430LayoutSize(
+    const std::vector<isf::storage_input::layout_field>& layout) noexcept;
+
+/**
+ * @brief std430 array stride for a GLSL primitive type when laid out as
+ * `T array[]` inside a shader storage block.
+ *
+ * Differs from `glslTypeSizeBytes` only for vec3-shaped vectors: per the
+ * std430 layout rules, an array of `vec3` (or `ivec3` / `uvec3`) keeps
+ * the element's vec4-aligned base alignment, so the per-element stride
+ * is 16 bytes — the trailing 4 bytes are padding the GPU does not write
+ * but consumer reads must skip. For scalars, vec2, vec4 and matrices,
+ * the stride equals the bare type size, so this returns
+ * `glslTypeSizeBytes(type)` unchanged.
+ *
+ * Use this — never `glslTypeSizeBytes` — when sizing a CSF SoA output
+ * SSBO buffer or setting a downstream vertex binding stride that mirrors
+ * the SSBO's std430 layout. Mixing the two is the source of the silent
+ * vec3 corruption diagnosed in the 3DGS pipeline.
+ */
+SCORE_PLUGIN_GFX_EXPORT
+int64_t std430ArrayStride(std::string_view type) noexcept;
+
+/**
+ * @brief Same as `std430ArrayStride`, but resolves user-defined struct
+ * names against the descriptor's TYPES section. Falls back to
+ * `glslTypeSizeBytes(type, d)` for non-vec3 primitives and structs.
+ */
+SCORE_PLUGIN_GFX_EXPORT
+int64_t std430ArrayStride(std::string_view type, const isf::descriptor& d) noexcept;
+
+}
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/MergeGeometriesNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/MergeGeometriesNode.cpp
new file mode 100644
index 0000000000..b561aa05b1
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/MergeGeometriesNode.cpp
@@ -0,0 +1,141 @@
+#include <Gfx/Graph/MergeGeometriesNode.hpp>
+#include <Gfx/Graph/NodeRenderer.hpp>
+#include <Gfx/Graph/RenderList.hpp>
+
+#include <algorithm>
+
+namespace score::gfx
+{
+
+struct RenderedMergeGeometriesNode final : NodeRenderer
+{
+  const MergeGeometriesNode& m_node;
+  ossia::geometry_spec m_outputSpec;
+  std::array<ossia::geometry_spec, MergeGeometriesNode::kMaxInputs> m_cachedInputs;
+
+  RenderedMergeGeometriesNode(const MergeGeometriesNode& n)
+      : NodeRenderer{n}
+      , m_node{n}
+  {
+  }
+
+  void init(RenderList&, QRhiResourceUpdateBatch&) override { m_initialized = true; }
+  void release(RenderList&) override
+  {
+    m_outputSpec = {};
+    for(auto& c : m_cachedInputs)
+      c = {};
+    m_initialized = false;
+  }
+
+  // Since m_portGeometries is now keyed by (port, source), look up the first
+  // entry matching the requested port. MergeGeometriesNode wires one input
+  // per port, so multi-source convergence on a single port isn't expected
+  // here; take the first match.
+  const ossia::geometry_spec* findFirstByPort(int32_t port) const
+  {
+    for(const auto& [k, v] : m_portGeometries)
+      if(k.first == port)
+        return &v;
+    return nullptr;
+  }
+
+  bool anyInputChanged() const
+  {
+    for(int i = 0; i < MergeGeometriesNode::kMaxInputs; ++i)
+    {
+      const auto* found = findFirstByPort((int32_t)i);
+      const ossia::geometry_spec& cur
+          = found ? *found : ossia::geometry_spec{};
+      if(!(cur == m_cachedInputs[i]))
+        return true;
+    }
+    return false;
+  }
+
+  void rebuild()
+  {
+    auto list = std::make_shared<ossia::mesh_list>();
+    auto filters = std::make_shared<ossia::geometry_filter_list>();
+    int64_t maxDirty = 0;
+    int64_t maxFilterDirty = 0;
+    for(int i = 0; i < MergeGeometriesNode::kMaxInputs; ++i)
+    {
+      const auto* found = findFirstByPort((int32_t)i);
+      if(!found || !found->meshes)
+      {
+        m_cachedInputs[i] = {};
+        continue;
+      }
+      const auto& in = *found;
+      list->meshes.insert(
+          list->meshes.end(),
+          in.meshes->meshes.begin(),
+          in.meshes->meshes.end());
+      maxDirty = std::max(maxDirty, in.meshes->dirty_index);
+      if(in.filters)
+      {
+        filters->filters.insert(
+            filters->filters.end(),
+            in.filters->filters.begin(),
+            in.filters->filters.end());
+        maxFilterDirty = std::max(maxFilterDirty, in.filters->dirty_index);
+      }
+      m_cachedInputs[i] = in;
+    }
+    list->dirty_index = maxDirty + 1;
+    filters->dirty_index = maxFilterDirty + 1;
+
+    m_outputSpec.meshes = std::move(list);
+    m_outputSpec.filters = std::move(filters);
+  }
+
+  void update(RenderList&, QRhiResourceUpdateBatch&, Edge*) override
+  {
+    if(!m_outputSpec.meshes || this->geometryChanged || anyInputChanged())
+    {
+      rebuild();
+      this->geometryChanged = false;
+    }
+  }
+
+  void runInitialPasses(
+      RenderList& renderer, QRhiCommandBuffer&, QRhiResourceUpdateBatch*&,
+      Edge& edge) override
+  {
+    if(!m_outputSpec.meshes)
+      return;
+    auto* sink = edge.sink;
+    if(!sink || !sink->node)
+      return;
+    auto rn_it = sink->node->renderedNodes.find(&renderer);
+    if(rn_it == sink->node->renderedNodes.end())
+      return;
+    auto it = std::find(sink->node->input.begin(), sink->node->input.end(), sink);
+    if(it == sink->node->input.end())
+      return;
+    int port_idx = (int)(it - sink->node->input.begin());
+    rn_it->second->process(port_idx, m_outputSpec, edge.source);
+  }
+
+  void runRenderPass(RenderList&, QRhiCommandBuffer&, Edge&) override { }
+
+  // Data-only renderer — no per-edge GPU pass state to release.
+  void removeOutputPass(RenderList&, Edge&) override { }
+};
+
+MergeGeometriesNode::MergeGeometriesNode()
+{
+  for(int i = 0; i < kMaxInputs; ++i)
+    input.push_back(new Port{this, {}, Types::Geometry, {}});
+  output.push_back(new Port{this, {}, Types::Geometry, {}});
+}
+
+MergeGeometriesNode::~MergeGeometriesNode() = default;
+
+NodeRenderer* MergeGeometriesNode::createRenderer(RenderList&) const noexcept
+{
+  return new RenderedMergeGeometriesNode{*this};
+}
+
+}
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/MergeGeometriesNode.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/MergeGeometriesNode.hpp
new file mode 100644
index 0000000000..a219e8039d
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/MergeGeometriesNode.hpp
@@ -0,0 +1,36 @@
+#pragma once
+#include <Gfx/Graph/Node.hpp>
+
+namespace score::gfx
+{
+
+/**
+ * @brief Concatenates up to N upstream geometry_specs into one.
+ *
+ * Intended use: combine independently-flattened scene partitions (static
+ * environment + animated characters + CSF-produced particles) into a
+ * single geometry_spec that a single downstream renderer can draw in one
+ * pass. All underlying GPU buffers are shared via `shared_ptr`; only the
+ * top-level mesh_list is rebuilt.
+ *
+ * For v1, up to 8 input geometry ports are exposed. Unconnected ports
+ * contribute nothing.
+ *
+ * Inputs:
+ *   - Port 0..7: Geometry (Types::Geometry)
+ *
+ * Outputs:
+ *   - Port 0: Geometry (Types::Geometry)
+ */
+class SCORE_PLUGIN_GFX_EXPORT MergeGeometriesNode : public ProcessNode
+{
+public:
+  static constexpr int kMaxInputs = 8;
+
+  MergeGeometriesNode();
+  ~MergeGeometriesNode() override;
+
+  score::gfx::NodeRenderer* createRenderer(RenderList& r) const noexcept override;
+};
+
+}
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/Mesh.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/Mesh.cpp
index 1cb2a3c8b3..63e4b8331f 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/Mesh.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/Mesh.cpp
@@ -40,6 +40,8 @@ void BasicMesh::preparePipeline(QRhiGraphicsPipeline& pip) const noexcept
   {
     pip.setDepthTest(true);
     pip.setDepthWrite(true);
+    // Reverse-Z project rule.
+    pip.setDepthOp(QRhiGraphicsPipeline::Greater);
   }
 
   pip.setTopology(this->topology);
@@ -61,6 +63,32 @@ void BasicMesh::draw(const MeshBuffers& bufs, QRhiCommandBuffer& cb) const noexc
   SCORE_ASSERT(buf->usage().testFlag(QRhiBuffer::VertexBuffer));
   setupBindings(bufs, cb);
 
+  if(bufs.useIndirectDraw && bufs.indirectDrawBuffer)
+  {
+#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0)
+    if(bufs.gpuIndirectSupported)
+    {
+      if(bufs.indirectDrawIndexed)
+        cb.drawIndexedIndirect(
+            bufs.indirectDrawBuffer, bufs.indirectDrawOffset,
+            bufs.indirectDrawCount, bufs.indirectDrawStride);
+      else
+        cb.drawIndirect(
+            bufs.indirectDrawBuffer, bufs.indirectDrawOffset,
+            bufs.indirectDrawCount, bufs.indirectDrawStride);
+      return;
+    }
+#endif
+    if(!bufs.cpuDrawCommands.empty())
+    {
+      for(const auto& cmd : bufs.cpuDrawCommands)
+        cb.draw(cmd.index_or_vertex_count, cmd.instance_count,
+                cmd.first_index_or_vertex, cmd.first_instance);
+      return;
+    }
+    return; // skip — no commands available yet
+  }
+
   cb.draw(vertexCount);
 }
 
@@ -211,4 +239,15 @@ void TexturedQuad::setupBindings(
 
   cb.setVertexInput(0, 2, bindings);
 }
+
+void drawMeshWithOptionalIndirect(
+    const Mesh& mesh, const MeshBuffers& bufs, QRhiCommandBuffer& cb) noexcept
+{
+  // All Mesh subclasses (BasicMesh, CustomMesh) now handle useIndirectDraw
+  // internally — they check bufs.useIndirectDraw after binding vertex inputs
+  // and dispatch to cb.drawIndirect/drawIndexedIndirect when set. So this
+  // helper just forwards to mesh.draw(). It exists as an explicit opt-in
+  // marker for renderers that intend to support indirect multi-draw.
+  mesh.draw(bufs, cb);
+}
 }
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/Mesh.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/Mesh.hpp
index 64f235cc36..6cadfa43fe 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/Mesh.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/Mesh.hpp
@@ -27,16 +27,44 @@ struct BufferView
   Usage usage{Usage::Direct};
 #endif
 
+  // False for borrowed buffers — e.g., gpu_buffer handles the caller
+  // owns (scene preprocessor's MDI arena buffers, registry arena
+  // buffers). RenderList::release only `delete`s when owned=true; owners
+  // outside the RenderList's m_vertexBuffers destroy their own handles.
+  bool owned{true};
+
   inline operator bool() const noexcept { return handle; }
 };
 struct MeshBuffers
 {
   ossia::small_vector<BufferView, 2> buffers;
 
-#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0)
+  // --- Multi-draw indirect state ---
+  // Always tracked regardless of Qt version. At draw time the path is:
+  //   gpuIndirectSupported && indirectDrawBuffer → drawIndirect (GPU, Qt 6.12+)
+  //   !gpuIndirectSupported && cpuDrawCommands   → per-command drawIndexed loop
+  //   neither                                    → single drawIndexed
   QRhiBuffer* indirectDrawBuffer{};
   bool useIndirectDraw{false};
   bool indirectDrawIndexed{false};
+  bool gpuIndirectSupported{false};  // set from RenderState::caps at init
+  quint32 indirectDrawOffset{0};
+  quint32 indirectDrawCount{1};
+  quint32 indirectDrawStride{0};
+
+  // CPU-side draw commands. Populated either:
+  //   a) directly by the producer (ScenePreprocessor has CPU data), or
+  //   b) via GPU readback when the indirect buffer is GPU-generated (CSF)
+  //      and gpuIndirectSupported is false.
+  ossia::small_vector<ossia::geometry::draw_command, 0> cpuDrawCommands;
+
+  // Readback result storage for the synchronous GPU→CPU fallback in
+  // RenderedRawRasterPipelineNode::runInitialPasses.
+  // Qt < 6.6 has a separate type for buffer readbacks.
+#if QT_VERSION >= QT_VERSION_CHECK(6, 6, 0)
+  QRhiReadbackResult readbackResult;
+#else
+  QRhiBufferReadbackResult readbackResult;
 #endif
 };
 /**
@@ -222,4 +250,19 @@ struct SCORE_PLUGIN_GFX_EXPORT TexturedQuad final : TexturedMesh
   setupBindings(const MeshBuffers& bufs, QRhiCommandBuffer& cb) const noexcept override;
 };
 
+/**
+ * @brief Draw a mesh, using indirect multi-draw when available in MeshBuffers.
+ *
+ * When `bufs.useIndirectDraw` is true (and Qt >= 6.12), dispatches to
+ * `cb.drawIndexedIndirect` / `cb.drawIndirect` with the offset/count/stride
+ * stored in `bufs`. Otherwise falls back to the mesh's standard `draw()`.
+ *
+ * This is the main draw entry point for ISF / RawRaster / Scene renderers so
+ * that they can transparently support multi-draw indirect just by wiring an
+ * indirect buffer into MeshBuffers.
+ */
+SCORE_PLUGIN_GFX_EXPORT
+void drawMeshWithOptionalIndirect(
+    const Mesh& mesh, const MeshBuffers& bufs, QRhiCommandBuffer& cb) noexcept;
+
 }
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/MultiWindowNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/MultiWindowNode.cpp
index ec739044ca..1c2b60eebf 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/MultiWindowNode.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/MultiWindowNode.cpp
@@ -435,7 +435,13 @@ class MultiWindowRenderer final : public score::gfx::OutputNodeRenderer
     if(windowIndex < 0 || windowIndex >= (int)m_perWindow.size())
       return;
 
-    auto* res = renderer.state.rhi->nextResourceUpdateBatch();
+    // Don't pre-allocate a batch here: renderSubRegion has early-return
+    // paths before any consumer (beginPass), and pre-allocating leaks
+    // one pool slot per discarded window on every render. The three
+    // UBO blocks inside renderSubRegion lazily allocate via
+    // `if(!res) res = ...->nextResourceUpdateBatch()`, and beginPass
+    // accepts a null batch — so passing nullptr here is safe.
+    QRhiResourceUpdateBatch* res = nullptr;
     renderSubRegion(windowIndex, renderer, cb, res);
   }
 
@@ -503,7 +509,7 @@ class MultiWindowRenderer final : public score::gfx::OutputNodeRenderer
       res->updateDynamicBuffer(pw.warpUBO, 0, sizeof(warpData), warpData);
     }
 
-    cb.beginPass(rt, Qt::black, {1.0f, 0}, res);
+    cb.beginPass(rt, Qt::black, {0.0f, 0}, res);
     res = nullptr;
     {
       auto sz = wo.swapChain->currentPixelSize();
@@ -557,15 +563,20 @@ void MultiWindowNode::setRenderSize(QSize sz)
 
   m_renderState->renderSize = sz;
 
-  // The offscreen target must be recreated BEFORE the render-list
-  // rebuild so that the new upstream pipelines are built against the
-  // new RPD and sample from the new offscreen texture. The old
-  // pipelines briefly reference the deleted RPD, but their destruction
-  // (inside the upcoming m_onResize) doesn't dereference it.
-  recreateOffscreenTarget();
-
+  // Tear down the existing render list (and all pipelines built against
+  // the old offscreen RPD) BEFORE recreating the offscreen target, so
+  // no pipeline ever references a freed RPD pointer. m_onResize triggers
+  // recreateOutputRenderList which calls release() on every pass — the
+  // pipeline destructors enqueue their underlying GPU resources via
+  // QRhi's deferred-release queue and never dereference the RPD again.
+  // Only after the render list has released its references is it safe
+  // to swap the offscreen RT/RPD; the subsequent createOutputRenderList
+  // (kicked off by the same m_onResize callback) will then build new
+  // pipelines against the freshly recreated m_offscreenTarget.
   if(m_onResize)
     m_onResize();
+
+  recreateOffscreenTarget();
 }
 
 void MultiWindowNode::setSourceRect(int windowIndex, QRectF rect)
@@ -612,12 +623,24 @@ void MultiWindowNode::setTransform(int windowIndex, int rotation, bool mirrorX,
 
 void MultiWindowNode::setSwapchainFlag(Gfx::SwapchainFlag flag)
 {
+  if(m_swapchainFlag == flag)
+    return;
   m_swapchainFlag = flag;
+  // Live flag change requires per-window swapchain recreation. Mirrors
+  // ScreenNode::setSwapchainFlag — destroyOutput tears down all windows;
+  // the Graph reconciler rebuilds them on next cycle picking up the new
+  // flag at the swapchain create site.
+  destroyOutput();
 }
 
 void MultiWindowNode::setSwapchainFormat(Gfx::SwapchainFormat format)
 {
+  if(m_swapchainFormat == format)
+    return;
   m_swapchainFormat = format;
+  // Same rebuild rationale — without it the field updated but the live
+  // swapchains kept their prior format (HDR↔SDR toggle silently inert).
+  destroyOutput();
 }
 
 void MultiWindowNode::startRendering()
@@ -657,7 +680,7 @@ void MultiWindowNode::renderBlack()
 
     auto cb = wo.swapChain->currentFrameCommandBuffer();
     auto batch = rhi->nextResourceUpdateBatch();
-    cb->beginPass(wo.swapChain->currentFrameRenderTarget(), Qt::black, {1.0f, 0}, batch);
+    cb->beginPass(wo.swapChain->currentFrameRenderTarget(), Qt::black, {0.0f, 0}, batch);
     cb->endPass();
 
     rhi->endFrame(wo.swapChain);
@@ -868,10 +891,6 @@ void MultiWindowNode::releaseWindowSwapChain(int index)
   if(!wo.swapChain && !wo.depthStencil && !wo.renderPassDescriptor)
     return;
 
-  // Wait for any in-flight frames touching this swap chain before tearing
-  // its resources down.
-  m_renderState->rhi->finish();
-
   // Release the renderer's per-window GPU state first, so its pipeline
   // (built against wo.renderPassDescriptor) is gone before we delete the
   // RPD itself.
@@ -887,16 +906,30 @@ void MultiWindowNode::releaseWindowSwapChain(int index)
     }
   }
 
-  delete wo.swapChain;
-  wo.swapChain = nullptr;
+  // Order matters: clear hasSwapChain BEFORE releasing wo.swapChain so a
+  // queued expose / resize event landing in the middle of teardown can
+  // never observe (hasSwapChain == true && swapChain dangling). See
+  // diagnostic 047.
+  wo.hasSwapChain = false;
 
-  delete wo.depthStencil;
+  // Use deleteLater() instead of a synchronous rhi->finish() + delete.
+  // rhi->finish() issues vkQueueWaitIdle which drains ALL in-flight work on
+  // the graphics queue — stalling every other window. deleteLater() defers
+  // native-object destruction to the next endFrame() when the relevant frame
+  // slot is known safe, with no cross-window stall. See diagnostic 048.
+  auto* sc = wo.swapChain;
+  wo.swapChain = nullptr;
+  auto* ds = wo.depthStencil;
   wo.depthStencil = nullptr;
-
-  delete wo.renderPassDescriptor;
+  auto* rpd = wo.renderPassDescriptor;
   wo.renderPassDescriptor = nullptr;
 
-  wo.hasSwapChain = false;
+  if(sc)
+    sc->deleteLater();
+  if(ds)
+    ds->deleteLater();
+  if(rpd)
+    rpd->deleteLater();
 }
 
 void MultiWindowNode::createOutput(score::gfx::OutputConfiguration conf)
@@ -1034,6 +1067,11 @@ void MultiWindowNode::destroyOutput()
   // there are still frames in flight when resources are destroyed.
   m_renderState->rhi->finish();
 
+  // Persist-across-rebuild contract: registry survives RL teardown,
+  // so its QRhi resources have to be torn down here (BEFORE
+  // RenderState::destroy below) while the device is still alive.
+  releaseRegistry();
+
   // Detach Window callbacks so a close that races with destruction can't
   // reach back into us while we're tearing things down.
   for(auto& wo : m_windowOutputs)
@@ -1051,6 +1089,11 @@ void MultiWindowNode::destroyOutput()
   //    outlive the rhi's teardown of per-window state.
   for(auto& wo : m_windowOutputs)
   {
+    // Order matters: clear hasSwapChain BEFORE deleting wo.swapChain so a
+    // queued event cannot observe (hasSwapChain == true && swapChain
+    // dangling). See diagnostic 047.
+    wo.hasSwapChain = false;
+
     delete wo.swapChain;
     wo.swapChain = nullptr;
 
@@ -1059,8 +1102,6 @@ void MultiWindowNode::destroyOutput()
 
     delete wo.renderPassDescriptor;
     wo.renderPassDescriptor = nullptr;
-
-    wo.hasSwapChain = false;
   }
 
   // 2. Release the offscreen target (texture + depth + RT + RPD). This
@@ -1090,6 +1131,43 @@ void MultiWindowNode::updateGraphicsAPI(GraphicsApi api)
     return;
 
   if(m_renderState->api != api)
+  {
+    destroyOutput();
+    return;
+  }
+
+  // Same API, but the requested sample count may have changed via the
+  // settings panel. Mirror ScreenNode's clamp-and-compare path: rebuild
+  // if the resolved sample count no longer matches what the rhi was
+  // created with.
+  auto* rhi = m_renderState->rhi;
+  if(!rhi)
+    return;
+
+  int samples_request
+      = score::AppContext().settings<Gfx::Settings::Model>().resolveSamples(api);
+  const auto supported = rhi->supportedSampleCounts();
+  if(supported.isEmpty())
+  {
+    samples_request = 1;
+  }
+  else
+  {
+    int chosen = supported.first();
+    for(int v : supported)
+    {
+      if(v == samples_request)
+      {
+        chosen = v;
+        break;
+      }
+      if(v < samples_request)
+        chosen = v;
+    }
+    samples_request = chosen;
+  }
+
+  if(m_renderState->samples != samples_request)
     destroyOutput();
 }
 
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/Node.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/Node.hpp
index fa847b03e3..35f26d9018 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/Node.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/Node.hpp
@@ -160,7 +160,19 @@ class SCORE_PLUGIN_GFX_EXPORT Node : public QObject
 
   int32_t nodeId = score::gfx::invalid_node_index;
   bool requiresDepth{};
-  bool addedToGraph{};
+
+  /**
+   * @brief Whether a given port has a user-specified render target size.
+   *
+   * Returns true only if the user explicitly set a size via render_target_spec.
+   * Used by backward size propagation to decide whether to inherit
+   * the downstream render target size.
+   */
+  bool hasExplicitRenderTargetSize(int32_t port) const noexcept
+  {
+    auto it = renderTargetSpecs.find(port);
+    return it != renderTargetSpecs.end() && it->second.size.has_value();
+  }
 
   QSize resolveRenderTargetSize(int32_t port, RenderList& renderer) const noexcept;
   RenderTargetSpecs
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/NodeRenderer.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/NodeRenderer.cpp
index 3ebeadc500..272853cf23 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/NodeRenderer.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/NodeRenderer.cpp
@@ -4,6 +4,10 @@
 
 #include <score/tools/Debug.hpp>
 
+#include <QDebug>
+#include <QMatrix4x4>
+#include <QQuaternion>
+
 #include <ossia/detail/algorithms.hpp>
 
 namespace score::gfx
@@ -14,6 +18,70 @@ TextureRenderTarget NodeRenderer::renderTargetForInput(const Port& p)
   return {};
 }
 
+void NodeRenderer::initState(RenderList&, QRhiResourceUpdateBatch&) { }
+
+void NodeRenderer::releaseState(RenderList&) { }
+
+void NodeRenderer::addOutputPass(RenderList&, Edge&, QRhiResourceUpdateBatch&) { }
+
+void NodeRenderer::updateInputSamplerFilter(
+    const Port& input, const RenderTargetSpecs& spec)
+{
+  // Default: no-op. Renderers that cache samplers should override.
+}
+
+void NodeRenderer::addInputEdge(RenderList&, Edge&, QRhiResourceUpdateBatch&) { }
+
+// When an upstream edge is removed (e.g. the user inserts a Transform3D in
+// the middle of an existing glTF → ScenePreprocessor wire), drop the cached
+// per-(port, source) entry this edge was populating. Without this, the
+// last scene/geometry pushed by the now-disconnected producer lingers in
+// m_portScenes / m_portGeometries forever and rebuildMergedScene keeps
+// merging it in — the user saw the "scene doesn't disappear until
+// stop/start" symptom. Also wipe the merge cache so the next merge runs
+// fresh.
+void NodeRenderer::removeInputEdge(RenderList&, Edge& edge)
+{
+  if(!edge.sink || !edge.sink->node)
+    return;
+
+  // Figure out which input port of the sink this edge was landing on.
+  const auto& inputs = edge.sink->node->input;
+  int32_t port = -1;
+  for(std::size_t i = 0; i < inputs.size(); ++i)
+  {
+    if(inputs[i] == edge.sink)
+    {
+      port = (int32_t)i;
+      break;
+    }
+  }
+  if(port < 0)
+    return;
+
+  const void* source_key = edge.source;
+  const PortSourceKey key{port, source_key};
+
+  m_portGeometries.erase(key);
+  m_portScenes.erase(key);
+  m_wrapCache.erase(key);
+
+  // Also drop the legacy nullptr-keyed slot in case this edge was the sole
+  // contributor via the 2-arg process() path.
+  const PortSourceKey legacyKey{port, nullptr};
+  m_portGeometries.erase(legacyKey);
+  m_portScenes.erase(legacyKey);
+  m_wrapCache.erase(legacyKey);
+
+  // Force rebuildMergedScene to recompute from scratch next time.
+  m_mergeCacheInputs.clear();
+  m_mergeCacheOutput = {};
+}
+
+bool NodeRenderer::hasOutputPassForEdge(Edge& edge) const { return false; }
+
+void NodeRenderer::seedInitialOutputs(RenderList&) { }
+
 void defaultPassesInit(
     PassMap& passes, const std::vector<Edge*>& edges, RenderList& renderer,
     const Mesh& mesh, const QShader& v, const QShader& f, QRhiBuffer* processUBO,
@@ -29,7 +97,7 @@ void defaultPassesInit(
       auto pip = score::gfx::buildPipeline(
           renderer, mesh, v, f, rt, processUBO, matUBO, samplers, additionalBindings);
       if(pip.pipeline)
-        passes.emplace_back(edge, pip);
+        passes.emplace_back(edge, Pass{rt, pip, nullptr});
     }
   }
 }
@@ -43,8 +111,8 @@ void defaultRenderPass(
   if(it != passes.end())
   {
     const auto sz = renderer.renderSize(&edge);
-    cb.setGraphicsPipeline(it->second.pipeline);
-    cb.setShaderResources(it->second.srb);
+    cb.setGraphicsPipeline(it->second.p.pipeline);
+    cb.setShaderResources(it->second.p.srb);
     cb.setViewport(QRhiViewport(0, 0, sz.width(), sz.height()));
 
     mesh.draw(bufs, cb);
@@ -61,11 +129,12 @@ void quadRenderPass(
 {
   auto it
       = ossia::find_if(passes, [ptr = &edge](const auto& p) { return p.first == ptr; });
-  SCORE_ASSERT(it != passes.end());
+  if(it == passes.end())
+    return;
   {
     const auto sz = renderer.renderSize(&edge);
-    cb.setGraphicsPipeline(it->second.pipeline);
-    cb.setShaderResources(it->second.srb);
+    cb.setGraphicsPipeline(it->second.p.pipeline);
+    cb.setShaderResources(it->second.p.srb);
     cb.setViewport(QRhiViewport(0, 0, sz.width(), sz.height()));
 
     const auto& mesh = renderer.defaultQuad();
@@ -115,6 +184,14 @@ void GenericNodeRenderer::defaultPassesInit(
 }
 
 void GenericNodeRenderer::init(RenderList& renderer, QRhiResourceUpdateBatch& res)
+{
+  initState(renderer, res);
+
+  for(Edge* edge : this->node.output[0]->edges)
+    addOutputPass(renderer, *edge, res);
+}
+
+void GenericNodeRenderer::initState(RenderList& renderer, QRhiResourceUpdateBatch& res)
 {
   m_mesh = &renderer.defaultTriangle();
   auto& mesh = *m_mesh;
@@ -122,8 +199,174 @@ void GenericNodeRenderer::init(RenderList& renderer, QRhiResourceUpdateBatch& re
   processUBOInit(renderer);
 
   m_material.init(renderer, node.input, m_samplers);
+  // Upload initial material data
+  if(m_material.buffer && m_material.size > 0)
+  {
+    auto& n = static_cast<const score::gfx::NodeModel&>(this->node);
+    if(n.m_materialData)
+      res.updateDynamicBuffer(m_material.buffer, 0, m_material.size, n.m_materialData.get());
+  }
+
+  m_initialized = true;
+}
+
+void GenericNodeRenderer::addOutputPass(
+    RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res)
+{
+  if(!m_mesh)
+    return;
+  if(this->node.output[0]->type != score::gfx::Types::Image)
+    return;
+
+  auto rt = renderer.renderTargetForOutput(edge);
+  if(!rt.renderTarget)
+    return;
+
+  // Every edge gets its own SRB. Layout is identical across edges
+  // (same node, same sampler count, same UBOs) so the SRBs are all
+  // layout-compatible — a requirement for sharing a pipeline built
+  // against any one of them.
+  auto* srb = score::gfx::createDefaultBindings(
+      renderer, rt, m_processUBO, m_material.buffer, m_samplers);
+  if(!srb)
+    return;
+
+  // Reuse an existing pipeline when this renderer already has one built
+  // against the same QRhiRenderPassDescriptor. Same rp-desc pointer ⇒
+  // same owning RT ⇒ every pipeline compatibility rule on Vulkan,
+  // D3D12 and Metal is satisfied. A different rt with an isCompatible
+  // rp-desc at a *different* pointer is deliberately not matched here:
+  // it would require tracking which pipelines still have a live rp-desc,
+  // and the common sharing case (two edges to the same sink port) already
+  // falls out of the pointer check.
+  QRhiGraphicsPipeline* pipeline = nullptr;
+  for(auto& [desc, pipe] : m_pipelineCache)
+  {
+    if(desc == rt.renderPass && pipe)
+    {
+      pipeline = pipe;
+      break;
+    }
+  }
+
+  if(!pipeline)
+  {
+    auto pip = score::gfx::buildPipeline(
+        renderer, *m_mesh, m_vertexS, m_fragmentS, rt, srb);
+    if(!pip.pipeline)
+    {
+      srb->deleteLater();
+      return;
+    }
+    pipeline = pip.pipeline;
+    m_pipelineCache.emplace_back(rt.renderPass, pipeline);
+  }
 
-  defaultPassesInit(renderer, mesh);
+  // Pass::p.pipeline is non-owning here — the cache owns it. removeOutputPass
+  // and releaseState null-out pipeline before Pipeline::release() so the
+  // Pass release path only destroys the SRB.
+  m_p.emplace_back(&edge, Pass{rt, Pipeline{pipeline, srb}, nullptr});
+}
+
+void GenericNodeRenderer::removeOutputPass(RenderList& renderer, Edge& edge)
+{
+  auto it
+      = ossia::find_if(m_p, [&](const auto& p) { return p.first == &edge; });
+  if(it == m_p.end())
+    return;
+
+  QRhiGraphicsPipeline* pipeline = it->second.p.pipeline;
+
+  // Determine ownership: the pipeline is cache-owned iff an m_pipelineCache
+  // entry still points to it. Passes produced by addOutputPass share
+  // cache-owned pipelines; Passes produced by defaultPassesInit (ImageNode
+  // and the like, which pre-date this cache) own their own pipeline.
+  auto cacheIt = ossia::find_if(
+      m_pipelineCache, [&](const auto& e) { return e.second == pipeline; });
+  const bool cacheOwned = (cacheIt != m_pipelineCache.end());
+
+  if(cacheOwned)
+  {
+    // Detach so Pipeline::release() won't deleteLater() the cached
+    // pipeline. The SRB is still per-edge and gets dropped normally.
+    it->second.p.pipeline = nullptr;
+  }
+  it->second.release();
+  m_p.erase(it);
+
+  if(!cacheOwned || !pipeline)
+    return;
+
+  // If no other Pass still references this cached pipeline, evict it.
+  // Otherwise long-lived renderers would accumulate one cache entry per
+  // historical rp-desc pointer until releaseState.
+  for(const auto& entry : m_p)
+  {
+    if(entry.second.p.pipeline == pipeline)
+      return; // still in use — leave the cache entry alone
+  }
+  pipeline->deleteLater();
+  m_pipelineCache.erase(cacheIt);
+}
+
+bool GenericNodeRenderer::hasOutputPassForEdge(Edge& edge) const
+{
+  return ossia::find_if(m_p, [&](const auto& p) { return p.first == &edge; })
+         != m_p.end();
+}
+
+void GenericNodeRenderer::releaseState(RenderList& renderer)
+{
+  if(!m_initialized)
+    return;
+
+  // Release any remaining passes. Pipelines stored in m_pipelineCache
+  // are owned by the renderer itself and must NOT be deleteLater'd via
+  // Pipeline::release(); any Pass whose p.pipeline is cache-owned gets
+  // its pipeline zeroed out first so the Pass only drops its SRB.
+  // Passes whose pipeline is NOT in the cache (produced by
+  // defaultPassesInit — see ImageNode::PreloadedRenderer) retain the
+  // original owning release semantics.
+  for(auto& pass : m_p)
+  {
+    auto* pipeline = pass.second.p.pipeline;
+    if(pipeline)
+    {
+      const bool cacheOwned = ossia::any_of(
+          m_pipelineCache, [&](const auto& e) { return e.second == pipeline; });
+      if(cacheOwned)
+        pass.second.p.pipeline = nullptr;
+    }
+    pass.second.release();
+  }
+  m_p.clear();
+
+  // Now destroy the cached pipelines.
+  for(auto& [desc, pipeline] : m_pipelineCache)
+  {
+    if(pipeline)
+      pipeline->deleteLater();
+  }
+  m_pipelineCache.clear();
+
+  for(auto sampler : m_samplers)
+  {
+    delete sampler.sampler;
+    // texture is deleted elsewhere
+  }
+  m_samplers.clear();
+
+  delete m_processUBO;
+  m_processUBO = nullptr;
+
+  delete m_material.buffer;
+  m_material.buffer = nullptr;
+
+  // FIXME Check that they get released?
+  // We should have a refcount for this
+  m_meshbufs = {};
+
+  m_initialized = false;
 }
 
 void GenericNodeRenderer::defaultUBOUpdate(
@@ -139,6 +382,7 @@ void GenericNodeRenderer::defaultUBOUpdate(
       char* data = n.m_materialData.get();
       res.updateDynamicBuffer(m_material.buffer, 0, m_material.size, data);
     }
+    materialChanged = false;
   }
 }
 
@@ -176,10 +420,32 @@ void GenericNodeRenderer::update(
 
 void GenericNodeRenderer::defaultRelease(RenderList&)
 {
+  // Mirror the ownership handling in releaseState — cache-owned pipelines
+  // are destroyed by the cache, not by Pipeline::release().
+  for(auto& pass : m_p)
+  {
+    auto* pipeline = pass.second.p.pipeline;
+    if(pipeline)
+    {
+      const bool cacheOwned = ossia::any_of(
+          m_pipelineCache, [&](const auto& e) { return e.second == pipeline; });
+      if(cacheOwned)
+        pass.second.p.pipeline = nullptr;
+    }
+    pass.second.release();
+  }
+  m_p.clear();
+
+  for(auto& [desc, pipeline] : m_pipelineCache)
+  {
+    if(pipeline)
+      pipeline->deleteLater();
+  }
+  m_pipelineCache.clear();
+
   for(auto sampler : m_samplers)
   {
     delete sampler.sampler;
-    // texture isdeleted elsewxheree
   }
   m_samplers.clear();
 
@@ -189,13 +455,9 @@ void GenericNodeRenderer::defaultRelease(RenderList&)
   delete m_material.buffer;
   m_material.buffer = nullptr;
 
-  for(auto& pass : m_p)
-    pass.second.release();
-  m_p.clear();
-
-  // FIXME Check that they get released?
-  // We should have a refcount for this
   m_meshbufs = {};
+
+  m_initialized = false;
 }
 
 void NodeRenderer::runInitialPasses(
@@ -206,10 +468,74 @@ void NodeRenderer::runInitialPasses(
 void NodeRenderer::runRenderPass(RenderList&, QRhiCommandBuffer& commands, Edge& edge) {
 }
 
+// Rebuild `this->scene` as the merge of every m_portScenes entry,
+// memoized on the set of input scene_state pointers. When unchanged, the
+// previous merged scene_spec (and its scene_state shared_ptr) is reused
+// verbatim — which is what lets downstream consumers like
+// ScenePreprocessorNode keep their version/pointer caches hot instead of
+// re-decoding textures and re-uploading vertex/index buffers per frame.
+void NodeRenderer::rebuildMergedScene()
+{
+  ossia::small_vector<MergeCacheKey, 4> sig;
+  ossia::small_vector<const ossia::scene_spec*, 4> valid;
+  for(auto& kv : m_portScenes)
+  {
+    const auto& s = kv.second;
+    // Drop the `!s.state->empty()` filter: env-only producers
+    // (EnvironmentLoader, CubemapLoader, …) have an empty roots vector
+    // but still contribute environment fields — dropping them here
+    // would make their skybox / ambient / fog updates invisible. Empty
+    // roots are handled gracefully by the downstream merge.
+    if(s.state)
+    {
+      sig.push_back({s.state.get(), s.state->version});
+      valid.push_back(&s);
+    }
+  }
+
+  if(sig == m_mergeCacheInputs && m_mergeCacheOutput.state)
+  {
+    this->scene = m_mergeCacheOutput;
+    return;
+  }
+  m_mergeCacheInputs.assign(sig.begin(), sig.end());
+
+  if(valid.empty())
+  {
+    this->scene = {};
+    m_mergeCacheOutput = {};
+    return;
+  }
+  if(valid.size() == 1)
+  {
+    this->scene = *valid[0];
+    m_mergeCacheOutput = this->scene;
+    return;
+  }
+
+  ossia::small_vector<ossia::scene_spec, 4> input_copies;
+  input_copies.reserve(valid.size());
+  for(auto* s : valid)
+    input_copies.push_back(*s);
+  this->scene
+      = ossia::merge_scenes(std::span<const ossia::scene_spec>{
+          input_copies.data(), input_copies.size()});
+  m_mergeCacheOutput = this->scene;
+}
+
 void NodeRenderer::process(int32_t port, const ossia::geometry_spec& v)
 {
-  // Store per-port for multi-geometry-port nodes (CSF)
-  m_portGeometries[port] = v;
+  process(port, v, nullptr);
+}
+
+void NodeRenderer::process(
+    int32_t port, const ossia::geometry_spec& v, const void* source_key)
+{
+  const PortSourceKey key{port, source_key};
+
+  // Store per-(port,source) for multi-geometry-port nodes (CSF) and for
+  // multi-producer accumulation on the same port.
+  m_portGeometries[key] = v;
 
   // Backward compat: keep the single geometry field updated
   // (used by GenericNodeRenderer, RenderedRawRasterPipelineNode, etc.)
@@ -218,28 +544,146 @@ void NodeRenderer::process(int32_t port, const ossia::geometry_spec& v)
     this->geometry = v;
     geometryChanged = true;
   }
-  else
+  else if(this->geometry.meshes)
   {
-    if(this->geometry.meshes)
+    for(auto& mesh : this->geometry.meshes->meshes)
     {
-      for(auto& mesh : this->geometry.meshes->meshes)
+      for(auto& buf : mesh.buffers)
       {
-        for(auto& buf : mesh.buffers)
+        if(buf.dirty)
         {
-          if(buf.dirty)
-          {
-            geometryChanged = true;
-            break;
-          }
-        }
-        if(geometryChanged)
+          geometryChanged = true;
           break;
+        }
       }
+      if(geometryChanged)
+        break;
+    }
+  }
+
+  // Auto-wrap into scene for scene-aware renderers. The wrap is cached
+  // per (port,source) keyed on the geometry_spec identity: if the same
+  // spec is re-pushed (common case — glTF / FBX loaders re-publish every
+  // frame even when nothing changed) the wrapper's scene_state shared_ptr
+  // stays stable across frames, which is what the merge memoization
+  // relies on.
+  auto& cache_entry = m_wrapCache[key];
+  if(cache_entry.first != v || !cache_entry.second.state)
+  {
+    cache_entry.first = v;
+    cache_entry.second = ossia::wrap_geometry_as_scene(v);
+  }
+  m_portScenes[key] = cache_entry.second;
+  sceneChanged = true;
+  rebuildMergedScene();
+}
+
+void NodeRenderer::process(int32_t port, const ossia::scene_spec& v)
+{
+  process(port, v, nullptr);
+}
+
+void NodeRenderer::process(
+    int32_t port, const ossia::scene_spec& v, const void* source_key)
+{
+  const PortSourceKey key{port, source_key};
+  m_portScenes[key] = v;
+  sceneChanged = true;
+  rebuildMergedScene();
+
+  // For backward compatibility: extract the first geometry from the scene
+  // so that renderers that only understand geometry_spec still work.
+  auto geom = ossia::extract_first_geometry(v);
+  if(geom)
+  {
+    m_portGeometries[key] = geom;
+    if(this->geometry != geom)
+    {
+      this->geometry = geom;
+      geometryChanged = true;
     }
   }
 }
 
-void NodeRenderer::process(int32_t port, const ossia::transform3d& v) { }
+void NodeRenderer::process(int32_t port, const ossia::transform3d& v)
+{
+  // Apply the matrix transform to the last root node in the scene.
+  // Geometry is always pushed before transform for the same edge.
+  // We wrap the last root's children under a scene_transform payload.
+  if(!this->scene.state || this->scene.state->empty())
+    return;
+
+  // Convert matrix-based transform3d to TRS scene_transform.
+  // The matrix is column-major (from QMatrix4x4::data()).
+  QMatrix4x4 mat(v.matrix, 4, 4);
+  QVector3D translation = mat.column(3).toVector3D();
+
+  // Extract rotation (assumes no shear)
+  QVector3D col0 = mat.column(0).toVector3D();
+  QVector3D col1 = mat.column(1).toVector3D();
+  QVector3D col2 = mat.column(2).toVector3D();
+  QVector3D scale(col0.length(), col1.length(), col2.length());
+
+  QMatrix3x3 rotMat;
+  if(scale.x() > 0.f) col0 /= scale.x();
+  if(scale.y() > 0.f) col1 /= scale.y();
+  if(scale.z() > 0.f) col2 /= scale.z();
+  float rot3x3[9] = {
+      col0.x(), col1.x(), col2.x(),
+      col0.y(), col1.y(), col2.y(),
+      col0.z(), col1.z(), col2.z()};
+  QQuaternion quat = QQuaternion::fromRotationMatrix(QMatrix3x3(rot3x3));
+
+  ossia::scene_transform xform;
+  xform.translation[0] = translation.x();
+  xform.translation[1] = translation.y();
+  xform.translation[2] = translation.z();
+  xform.rotation[0] = quat.x();
+  xform.rotation[1] = quat.y();
+  xform.rotation[2] = quat.z();
+  xform.rotation[3] = quat.scalar();
+  xform.scale[0] = scale.x();
+  xform.scale[1] = scale.y();
+  xform.scale[2] = scale.z();
+
+  // Rebuild: wrap the last root under a new parent with [transform, old_root]
+  auto new_roots = std::make_shared<std::vector<ossia::scene_node_ptr>>();
+  for(auto& root : *this->scene.state->roots)
+    new_roots->push_back(root);
+
+  if(!new_roots->empty())
+  {
+    auto& last_root = new_roots->back();
+    if(last_root)
+    {
+      auto new_children = std::make_shared<std::vector<ossia::scene_payload>>();
+      new_children->push_back(xform);
+      // Carry over original children
+      if(last_root->has_children())
+        for(auto& child : *last_root->children)
+          new_children->push_back(child);
+
+      auto new_node = std::make_shared<ossia::scene_node>();
+      new_node->id = last_root->id;
+      new_node->children = std::move(new_children);
+      new_roots->back() = std::move(new_node);
+    }
+  }
+
+  auto new_state = std::make_shared<ossia::scene_state>();
+  new_state->roots = std::move(new_roots);
+  if(this->scene.state->materials)
+    new_state->materials = this->scene.state->materials;
+  if(this->scene.state->animations)
+    new_state->animations = this->scene.state->animations;
+
+  this->scene.state = std::move(new_state);
+  // transform3d mutates the merged scene in place; republish it on the
+  // (port, nullptr) slot since there's no single upstream producer identity
+  // for the transformed result.
+  m_portScenes[PortSourceKey{port, nullptr}] = this->scene;
+  sceneChanged = true;
+}
 
 void GenericNodeRenderer::defaultRenderPass(
     RenderList& renderer, const Mesh& mesh, QRhiCommandBuffer& cb, Edge& edge)
@@ -261,7 +705,7 @@ void GenericNodeRenderer::runRenderPass(
   defaultRenderPass(renderer, mesh, cb, edge);
 }
 
-void GenericNodeRenderer::updateInputTexture(const Port& input, QRhiTexture* tex)
+void GenericNodeRenderer::updateInputTexture(const Port& input, QRhiTexture* tex, QRhiTexture* depthTex)
 {
   int sampler_idx = 0;
   for(auto* p : node.input)
@@ -269,7 +713,12 @@ void GenericNodeRenderer::updateInputTexture(const Port& input, QRhiTexture* tex
     if(p == &input)
       break;
     if(p->type == Types::Image)
+    {
       sampler_idx++;
+      // Skip the depth sampler that follows ports with SamplableDepth
+      if((p->flags & Flag::SamplableDepth) == Flag::SamplableDepth)
+        sampler_idx++;
+    }
   }
 
   if(sampler_idx < (int)m_samplers.size())
@@ -279,15 +728,30 @@ void GenericNodeRenderer::updateInputTexture(const Port& input, QRhiTexture* tex
     {
       sampl.texture = tex;
       for(auto& [e, pass] : m_p)
-        if(pass.srb)
-          score::gfx::replaceTexture(*pass.srb, sampl.sampler, tex);
+        if(pass.p.srb)
+          score::gfx::replaceTexture(*pass.p.srb, sampl.sampler, tex);
+    }
+
+    // Update the depth sampler if the port has SamplableDepth
+    if(depthTex
+       && (input.flags & Flag::SamplableDepth) == Flag::SamplableDepth
+       && sampler_idx + 1 < (int)m_samplers.size())
+    {
+      auto& depthSampl = m_samplers[sampler_idx + 1];
+      if(depthSampl.texture != depthTex)
+      {
+        depthSampl.texture = depthTex;
+        for(auto& [e, pass] : m_p)
+          if(pass.p.srb)
+            score::gfx::replaceTexture(*pass.p.srb, depthSampl.sampler, depthTex);
+      }
     }
   }
 }
 
 void GenericNodeRenderer::release(RenderList& r)
 {
-  defaultRelease(r);
+  releaseState(r);
 }
 
 score::gfx::NodeRenderer::~NodeRenderer() { }
@@ -307,7 +771,7 @@ QRhiTexture* NodeRenderer::textureForOutput(const Port& output)
   return nullptr;
 }
 
-void NodeRenderer::updateInputTexture(const Port& input, QRhiTexture* tex)
+void NodeRenderer::updateInputTexture(const Port& input, QRhiTexture* tex, QRhiTexture* depthTex)
 {
 }
 
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/NodeRenderer.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/NodeRenderer.hpp
index bec85ba180..71e56dfe38 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/NodeRenderer.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/NodeRenderer.hpp
@@ -26,9 +26,16 @@ class SCORE_PLUGIN_GFX_EXPORT NodeRenderer
   //! downstream-provided render target.
   virtual QRhiTexture* textureForOutput(const Port& output);
 
-  //! Updates the sampler texture for a GrabsFromSource input port.
-  //! Called from the render loop when the upstream texture may have changed.
-  virtual void updateInputTexture(const Port& input, QRhiTexture* tex);
+  //! Updates the sampler texture for an input port.
+  //! Called when the upstream texture may have changed (edge add, RT recreation).
+  //! If the port has SamplableDepth and depthTex is non-null, the depth
+  //! sampler (immediately after the color sampler) is also updated.
+  virtual void updateInputTexture(const Port& input, QRhiTexture* tex, QRhiTexture* depthTex = nullptr);
+
+  //! Updates the sampler filter/address settings for an input port.
+  //! Called when the render target spec changes (e.g. linear → nearest).
+  virtual void updateInputSamplerFilter(
+      const Port& input, const RenderTargetSpecs& spec);
 
   //! Called when all the inbound nodes to a texture input have finished rendering.
   //! Mainly useful to slip in a readback.
@@ -47,17 +54,126 @@ class SCORE_PLUGIN_GFX_EXPORT NodeRenderer
 
   virtual void release(RenderList&) = 0;
 
+  /**
+   * @name Incremental lifecycle API
+   *
+   * These methods enable dynamic graph editing by splitting the init/release
+   * lifecycle into edge-independent state and per-edge passes.
+   *
+   * Renderers that override these are incrementally updateable: adding or
+   * removing an output edge only creates/destroys one pass, without touching
+   * the rest of the renderer's GPU resources.
+   *
+   * Default implementations are no-ops for backward compatibility.
+   * @{
+   */
+
+  /// Initialize edge-independent state: material UBO, samplers, mesh, shaders.
+  /// Called once when the renderer enters a RenderList.
+  virtual void initState(RenderList& renderer, QRhiResourceUpdateBatch& res);
+
+  /// Release edge-independent state.
+  /// Called once when the renderer leaves a RenderList.
+  virtual void releaseState(RenderList& renderer);
+
+  /// Create a pass for a new output edge (pipeline, SRB, processUBO).
+  virtual void addOutputPass(
+      RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res);
+
+  /// Remove the pass for a removed output edge.
+  /// Pure-virtual: every concrete renderer must explicitly handle edge
+  /// removal. Sinks (OutputNodeRenderer) and data-only renderers that
+  /// store no per-edge GPU state can override with an empty body.
+  virtual void removeOutputPass(RenderList& renderer, Edge& edge) = 0;
+
+  /// Notify the renderer that a new input edge was connected.
+  /// Typically updates sampler textures or geometry bindings.
+  virtual void
+  addInputEdge(RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res);
+
+  /// Notify the renderer that an input edge was disconnected.
+  virtual void removeInputEdge(RenderList& renderer, Edge& edge);
+
+  /// Check if this renderer already has an output pass for the given edge.
+  virtual bool hasOutputPassForEdge(Edge& edge) const;
+
+  /// Seed downstream consumers once at init-time with this renderer's
+  /// current outputs. Default no-op. Halp scene/geometry producers (Camera,
+  /// EnvironmentLoader, Light, …) override this to run their
+  /// operator()() once during reconciliation and immediately push the
+  /// result into each downstream sink's per-port scene cache — without
+  /// this, a live-inserted producer's output wouldn't reach the sink's
+  /// `m_portScenes` until the next render frame's upstream scan fires the
+  /// producer's runInitialPasses, which can arrive too late relative to
+  /// the sink's own frame-start cache snapshot and produce the
+  /// "Camera inserted live has no effect until stop/restart" symptom.
+  virtual void seedInitialOutputs(RenderList& renderer);
+
+  /** @} */
+
   void checkForChanges()
   {
-    materialChanged = node.hasMaterialChanged(materialChangedIndex);
-    renderTargetSpecsChanged
-        = node.hasRenderTargetChanged(renderTargetSpecsChangedIndex);
+    // Use |= to preserve flags set externally (e.g. by reconciliation
+    // or maybeRebuild). The flag is cleared by the renderer's update()
+    // after processing, preventing infinite re-uploads.
+    materialChanged |= node.hasMaterialChanged(materialChangedIndex);
+    renderTargetSpecsChanged |= node.hasRenderTargetChanged(renderTargetSpecsChangedIndex);
+  }
+
+  /// Sync only the render target spec index without touching materialChanged.
+  /// Used after initState() so the first render's checkForChanges() sees a
+  /// material mismatch (triggering initial upload) but not a spurious rt_changed.
+  void syncRenderTargetIndex()
+  {
+    node.hasRenderTargetChanged(renderTargetSpecsChangedIndex);
+    renderTargetSpecsChanged = false;
   }
 
-  // FIXME this will change when we have a proper scene node
   void process(int32_t port, const ossia::geometry_spec& v);
+  void process(int32_t port, const ossia::scene_spec& v);
   virtual void process(int32_t port, const ossia::transform3d& v);
 
+  /// Source-aware overloads. `source_key` is an opaque identity of the
+  /// upstream output port that produced this data (typically `edge.source`).
+  /// Multiple producers converging on the same sink port each get their own
+  /// storage slot, so their scenes accumulate additively instead of
+  /// overwriting each other. Callers that don't care pass nullptr — all such
+  /// callers then share a single per-port slot (legacy behavior).
+  void process(int32_t port, const ossia::geometry_spec& v, const void* source_key);
+  void process(int32_t port, const ossia::scene_spec& v, const void* source_key);
+
+  /// Find the first geometry stored on the given sink port (across all
+  /// sources). Legacy single-producer-per-port consumers use this to
+  /// preserve pre-multi-producer behavior without caring who produced it.
+  const ossia::geometry_spec* findGeometryByPort(int32_t port) const
+  {
+    for(const auto& [k, v] : m_portGeometries)
+      if(k.first == port)
+        return &v;
+    return nullptr;
+  }
+
+  /// Enumerate every scene_spec published on `port` (across all sources).
+  /// Populated for ALL geometry/scene edges — raw geometry_spec deliveries
+  /// are auto-wrapped into scene_specs and cached (see m_wrapCache), so the
+  /// scene_state_ptr returned here is stable across frames when the input
+  /// doesn't actually change. Callers doing scene-broadcast iterate this
+  /// and check scene_state::dirty_index + state pointer for invalidation.
+  template <typename F>
+  void forEachSceneOnPort(int32_t port, F&& fn) const
+  {
+    for(const auto& [k, v] : m_portScenes)
+      if(k.first == port && v.state)
+        fn(v);
+  }
+
+private:
+  /// Recompute `this->scene` from the current per-port inputs, reusing the
+  /// memoized merge when the set of input scene_state pointers is unchanged.
+  void rebuildMergedScene();
+
+public:
+
   const Node& node;
 
   /**
@@ -72,21 +188,100 @@ class SCORE_PLUGIN_GFX_EXPORT NodeRenderer
    */
   ossia::geometry_spec geometry;
 
-  /// Per-port geometry storage for nodes with multiple geometry inputs.
-  /// Key is the input port index.
-  ossia::small_flat_map<int32_t, ossia::geometry_spec, 4> m_portGeometries;
+  /// Per-(port, source) geometry storage. Multi-keyed so multiple upstream
+  /// producers converging on the same sink port each get their own slot
+  /// (additive merge rather than overwrite). The source_key is the upstream
+  /// output Port pointer (opaque void*); nullptr is a valid single-slot key
+  /// for legacy callers.
+  using PortSourceKey = std::pair<int32_t, const void*>;
+  ossia::small_flat_map<PortSourceKey, ossia::geometry_spec, 4> m_portGeometries;
+
+  /**
+   * @brief The scene to use (when receiving scene_spec data).
+   *
+   * When a geometry_spec is received, it is auto-wrapped into a scene_spec
+   * so that downstream scene-aware renderers can always work with scenes.
+   * Backward-compat renderers continue reading the `geometry` field.
+   */
+  ossia::scene_spec scene;
+
+  /// Per-(port, source) scene storage. See m_portGeometries comment.
+  ossia::small_flat_map<PortSourceKey, ossia::scene_spec, 4> m_portScenes;
+
+  /// Merge cache: the set of (scene_state pointer, version) pairs we
+  /// last merged, and the resulting merged scene_spec. Keyed on BOTH
+  /// pointer and version because halp-style producers (Camera,
+  /// Environment, Light, …) keep a stable `m_state`
+  /// shared_ptr and mutate its contents in place — keying on pointer
+  /// alone would return a stale cached merge even after a slider moved.
+  /// The version monotonically bumps on each producer update, so
+  /// (ptr, version) changes whenever content changes.
+  using MergeCacheKey = std::pair<const ossia::scene_state*, int64_t>;
+  ossia::small_vector<MergeCacheKey, 4> m_mergeCacheInputs;
+  ossia::scene_spec m_mergeCacheOutput;
+
+  /// Cache the wrap_geometry_as_scene result per geometry_spec so a
+  /// geometry source re-pushing the same geometry_spec every frame
+  /// produces a stable wrapped-scene shared_ptr (otherwise every frame
+  /// produces a new wrapper → merge cache miss → full re-upload).
+  ossia::small_flat_map<
+      PortSourceKey, std::pair<ossia::geometry_spec, ossia::scene_spec>, 4>
+      m_wrapCache;
 
   int32_t nodeId{-1};
   bool materialChanged{false};
   bool geometryChanged{false};
+  bool sceneChanged{false};
   bool renderTargetSpecsChanged{false};
 
+  /// Guard for idempotent release — prevents double-release of GPU resources.
+  /// Set to true at end of init(), cleared at start of release().
+  bool m_initialized{false};
+
 private:
   int64_t materialChangedIndex{-1};
   int64_t renderTargetSpecsChangedIndex{-1};
 };
 
-using PassMap = ossia::small_vector<std::pair<Edge*, Pipeline>, 2>;
+struct Pass
+{
+  // User-declared ctors (including the implicit ones made explicit
+  // here) suppress -Wmissing-field-initializers on the many call sites
+  // that brace-init this struct with three arguments — the fallback
+  // plan is always default-constructed into an empty list, which is
+  // exactly what non-fallback pipelines need. Removing aggregate-init
+  // eligibility is intentional; the tradeoff is one line per call
+  // site (if they want to set fallback_bindings, they assign after).
+  Pass() = default;
+  Pass(TextureRenderTarget rt, Pipeline pi, QRhiBuffer* ubo)
+      : renderTarget{std::move(rt)}, p{pi}, processUBO{ubo} {}
+
+  TextureRenderTarget renderTarget;
+  Pipeline p;
+  QRhiBuffer* processUBO{};
+  // Bindings for "REQUIRED: false" VERTEX_INPUTS that had no matching
+  // upstream attribute when this pass's pipeline was built. Empty for
+  // pipelines where the shader is strict-matched (the common case).
+  // Consumed by the draw path: each slot's buffer is bound at its
+  // `binding_index` in the vertex-input array before the draw call.
+  // The buffers themselves are owned by VertexFallbackPool — the plan
+  // holds non-owning pointers.
+  FallbackBindingPlan fallback_bindings;
+
+  void release()
+  {
+    p.release();
+    if(processUBO)
+    {
+      processUBO->deleteLater();
+      processUBO = nullptr;
+    }
+    fallback_bindings.clear();
+    // renderTarget NOT released here — owned by RenderList
+  }
+};
+
+using PassMap = ossia::small_vector<std::pair<Edge*, Pass>, 2>;
 SCORE_PLUGIN_GFX_EXPORT
 void defaultPassesInit(
     PassMap& passes, const std::vector<Edge*>& edges, RenderList& renderer,
@@ -128,6 +323,26 @@ class SCORE_PLUGIN_GFX_EXPORT GenericNodeRenderer : public score::gfx::NodeRende
   // Pipeline
   PassMap m_p;
 
+  // Per-renderer pipeline cache, keyed by QRhiRenderPassDescriptor pointer.
+  // Edges targeting the same QRhiRenderTarget (and therefore the same
+  // rp-desc pointer) share one QRhiGraphicsPipeline — the pipeline object
+  // is bound to an rp-desc layout, not to the RT object itself, and QRhi
+  // guarantees the same pipeline can be used with any RT whose rp-desc
+  // isCompatible with the pipeline's. Looking up by pointer (rather than
+  // by serialized format) is the conservative choice: a pointer match
+  // means "same rp-desc, same owning RT alive" and cannot collide with a
+  // stale entry because a freshly allocated rp-desc always sits at a
+  // different address than one that was just destroyed via deleteLater.
+  //
+  // Ownership: Pass::p.pipeline is NON-OWNING — the actual QRhiGraphicsPipeline
+  // lives in this cache. Pass::p.srb is still per-edge and owned by the Pass.
+  // GenericNodeRenderer::removeOutputPass and releaseState take care of
+  // nulling Pass::p.pipeline before calling Pipeline::release() so it
+  // does not try to deleteLater() a pointer we still own here.
+  ossia::small_vector<
+      std::pair<QRhiRenderPassDescriptor*, QRhiGraphicsPipeline*>, 2>
+      m_pipelineCache;
+
   MeshBuffers m_meshbufs;
 
   QRhiBuffer* m_processUBO{};
@@ -147,6 +362,13 @@ class SCORE_PLUGIN_GFX_EXPORT GenericNodeRenderer : public score::gfx::NodeRende
 
   void init(RenderList& renderer, QRhiResourceUpdateBatch& res) override;
 
+  void initState(RenderList& renderer, QRhiResourceUpdateBatch& res) override;
+  void releaseState(RenderList& renderer) override;
+  void addOutputPass(
+      RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) override;
+  void removeOutputPass(RenderList& renderer, Edge& edge) override;
+  bool hasOutputPassForEdge(Edge& edge) const override;
+
   void defaultUBOUpdate(RenderList& renderer, QRhiResourceUpdateBatch& res);
   void defaultMeshUpdate(RenderList& renderer, QRhiResourceUpdateBatch& res);
   void update(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) override;
@@ -163,7 +385,7 @@ class SCORE_PLUGIN_GFX_EXPORT GenericNodeRenderer : public score::gfx::NodeRende
 
   void runRenderPass(RenderList&, QRhiCommandBuffer& commands, Edge& edge) override;
 
-  void updateInputTexture(const Port& input, QRhiTexture* tex) override;
+  void updateInputTexture(const Port& input, QRhiTexture* tex, QRhiTexture* depthTex = nullptr) override;
 };
 
 }
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/OutputNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/OutputNode.cpp
index 7275300449..bfddcb57c8 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/OutputNode.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/OutputNode.cpp
@@ -1,3 +1,4 @@
+#include <Gfx/Graph/GpuResourceRegistry.hpp>
 #include <Gfx/Graph/OutputNode.hpp>
 namespace score::gfx
 {
@@ -15,4 +16,31 @@ void OutputNodeRenderer::finishFrame(
 {
 }
 
+GpuResourceRegistry& OutputNode::acquireRegistry()
+{
+  // Persist-across-rebuild contract: lazy-allocated once per OutputNode.
+  // RenderList::init then either calls GpuResourceRegistry::init() (first
+  // RL on this OutputNode / first RL after a releaseRegistry()) or reuses
+  // the populated state as-is (every subsequent rebuild — what we want
+  // for the resize fast path).
+  if(!m_registry)
+    m_registry = std::make_unique<GpuResourceRegistry>();
+  return *m_registry;
+}
+
+void OutputNode::releaseRegistry()
+{
+  // Concrete subclasses MUST call this from destroyOutput() BEFORE the
+  // QRhi is torn down. destroyOwned() `delete`s the QRhiBuffer /
+  // QRhiTexture / QRhiSampler wrappers directly (no deleteLater path —
+  // the registry has outlived the RenderList that used to plumb
+  // releaseBuffer for it), so the QRhi must still be alive to honour the
+  // QRhiResource destructors.
+  if(m_registry)
+  {
+    m_registry->destroyOwned();
+    m_registry.reset();
+  }
+}
+
 }
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/OutputNode.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/OutputNode.hpp
index 5618ae07d7..0059662cc1 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/OutputNode.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/OutputNode.hpp
@@ -5,8 +5,12 @@
 #include <Gfx/Graph/Uniforms.hpp>
 
 #include <score_plugin_gfx_export.h>
+
+#include <memory>
+
 namespace score::gfx
 {
+class GpuResourceRegistry;
 struct OutputConfiguration
 {
   GraphicsApi graphicsApi{};
@@ -21,6 +25,12 @@ class SCORE_PLUGIN_GFX_EXPORT OutputNodeRenderer : public score::gfx::NodeRender
   virtual ~OutputNodeRenderer();
   virtual void
   finishFrame(RenderList&, QRhiCommandBuffer& commands, QRhiResourceUpdateBatch*& res);
+
+  // Sinks have no output edges, so there is nothing to release per-edge.
+  // Concrete sinks may still override (e.g. to drop per-input bookkeeping
+  // routed through addOutputPass), but the default is a true no-op rather
+  // than the dangerous silent base-class no-op.
+  void removeOutputPass(RenderList&, Edge&) override { }
 };
 
 class Window;
@@ -69,7 +79,55 @@ class SCORE_PLUGIN_GFX_EXPORT OutputNode : public score::gfx::Node
 
   virtual Configuration configuration() const noexcept = 0;
 
+  /**
+   * @brief Persistent GPU resource registry for this output.
+   *
+   * Persist-across-rebuild contract: this used to live on the
+   * RenderList (created in RenderList::init, destroyed in
+   * RenderList::release), so every viewport-resize-driven RL rebuild
+   * threw away ~100 MiB of texture-array data, the mesh slabs, and
+   * the producer arena slot indices — all of which describe scene
+   * content, not framebuffer state. Hoisting ownership to the
+   * OutputNode lets these survive across `Graph::recreateOutputRenderList`.
+   *
+   * Lifetime: lazy-allocated on first acquireRegistry() call (typically
+   * from RenderList::init), tied to the OutputNode's QRhi. Concrete
+   * outputs MUST call releaseRegistry() inside their destroyOutput()
+   * BEFORE tearing down the QRhi (via RenderState::destroy or
+   * setSwapchainFormat-style replacement) — otherwise the registry's
+   * QRhi resources would be freed against a destroyed device.
+   *
+   * Returns a non-null reference. Always allocates if the slot is empty.
+   */
+  GpuResourceRegistry& acquireRegistry();
+
+  /**
+   * @brief Non-owning accessor. Returns null if no registry has been
+   * acquired yet (e.g. queried before the first RenderList::init).
+   */
+  GpuResourceRegistry* registry() const noexcept { return m_registry.get(); }
+
+  /**
+   * @brief Tear down the registry's QRhi resources directly. Idempotent.
+   *
+   * MUST be called by concrete subclasses' destroyOutput() before they
+   * tear down the QRhi. Calls GpuResourceRegistry::destroyOwned() which
+   * `delete`s the buffer / texture / sampler wrappers (the QRhi is
+   * still alive at that point — the caller's responsibility), then
+   * resets the unique_ptr so a subsequent acquireRegistry() rebuilds
+   * fresh against the new QRhi.
+   *
+   * Safe to call when no registry exists (no-op).
+   */
+  void releaseRegistry();
+
 protected:
   explicit OutputNode();
+
+  // Persistent across RenderList rebuilds. See acquireRegistry() docs.
+  // unique_ptr is opaque-typed in this header (forward-declared above);
+  // its destructor needs the full type, hence the out-of-line ~OutputNode
+  // implementation in OutputNode.cpp.
+  std::unique_ptr<GpuResourceRegistry> m_registry;
 };
 }
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/PipelineStateHelpers.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/PipelineStateHelpers.cpp
new file mode 100644
index 0000000000..ac58cefc93
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/PipelineStateHelpers.cpp
@@ -0,0 +1,360 @@
+#include "PipelineStateHelpers.hpp"
+
+#include <algorithm>
+#include <cctype>
+
+namespace
+{
+// Case-insensitive comparison: "lessOrEqual" == "less_or_equal" == "LEQUAL".
+// Strips underscores/hyphens so all forms compare equal.
+static bool ieq(std::string_view a, const char* b)
+{
+  std::size_t bi = 0;
+  for(std::size_t i = 0; i < a.size(); ++i)
+  {
+    char ca = (char)std::tolower((unsigned char)a[i]);
+    if(ca == '_' || ca == '-' || ca == ' ')
+      continue;
+    if(b[bi] == '\0')
+      return false;
+    char cb = (char)std::tolower((unsigned char)b[bi]);
+    if(ca != cb)
+      return false;
+    ++bi;
+  }
+  return b[bi] == '\0';
+}
+}
+
+namespace score::gfx
+{
+
+QRhiGraphicsPipeline::CompareOp toCompareOp(std::string_view s) noexcept
+{
+  if(ieq(s, "never"))          return QRhiGraphicsPipeline::Never;
+  if(ieq(s, "less") || ieq(s, "l"))  return QRhiGraphicsPipeline::Less;
+  if(ieq(s, "equal") || ieq(s, "eq")) return QRhiGraphicsPipeline::Equal;
+  if(ieq(s, "lessorequal") || ieq(s, "lessequal") || ieq(s, "lequal"))
+    return QRhiGraphicsPipeline::LessOrEqual;
+  if(ieq(s, "greater") || ieq(s, "g") || ieq(s, "gt"))
+    return QRhiGraphicsPipeline::Greater;
+  if(ieq(s, "notequal") || ieq(s, "neq") || ieq(s, "ne"))
+    return QRhiGraphicsPipeline::NotEqual;
+  if(ieq(s, "greaterorequal") || ieq(s, "greaterequal") || ieq(s, "gequal"))
+    return QRhiGraphicsPipeline::GreaterOrEqual;
+  if(ieq(s, "always"))         return QRhiGraphicsPipeline::Always;
+  return QRhiGraphicsPipeline::Less;
+}
+
+QRhiGraphicsPipeline::CullMode toCullMode(std::string_view s) noexcept
+{
+  if(ieq(s, "none"))  return QRhiGraphicsPipeline::None;
+  if(ieq(s, "front")) return QRhiGraphicsPipeline::Front;
+  if(ieq(s, "back"))  return QRhiGraphicsPipeline::Back;
+  return QRhiGraphicsPipeline::None;
+}
+
+QRhiGraphicsPipeline::FrontFace toFrontFace(std::string_view s) noexcept
+{
+  if(ieq(s, "ccw") || ieq(s, "counterclockwise"))
+    return QRhiGraphicsPipeline::CCW;
+  if(ieq(s, "cw") || ieq(s, "clockwise"))
+    return QRhiGraphicsPipeline::CW;
+  return QRhiGraphicsPipeline::CCW;
+}
+
+QRhiGraphicsPipeline::PolygonMode toPolygonMode(std::string_view s) noexcept
+{
+  if(ieq(s, "fill") || ieq(s, "solid"))     return QRhiGraphicsPipeline::Fill;
+  if(ieq(s, "line") || ieq(s, "wireframe")) return QRhiGraphicsPipeline::Line;
+  return QRhiGraphicsPipeline::Fill;
+}
+
+QRhiGraphicsPipeline::Topology toTopology(std::string_view s) noexcept
+{
+  if(ieq(s, "triangles") || ieq(s, "triangle_list"))
+    return QRhiGraphicsPipeline::Triangles;
+  if(ieq(s, "triangle_strip")) return QRhiGraphicsPipeline::TriangleStrip;
+  if(ieq(s, "triangle_fan"))   return QRhiGraphicsPipeline::TriangleFan;
+  if(ieq(s, "lines") || ieq(s, "line_list"))
+    return QRhiGraphicsPipeline::Lines;
+  if(ieq(s, "line_strip"))     return QRhiGraphicsPipeline::LineStrip;
+  if(ieq(s, "points"))         return QRhiGraphicsPipeline::Points;
+  return QRhiGraphicsPipeline::Triangles;
+}
+
+QRhiGraphicsPipeline::BlendFactor toBlendFactor(std::string_view s) noexcept
+{
+  using B = QRhiGraphicsPipeline;
+  if(ieq(s, "zero"))                  return B::Zero;
+  if(ieq(s, "one"))                   return B::One;
+  if(ieq(s, "srccolor"))              return B::SrcColor;
+  if(ieq(s, "oneminussrccolor") || ieq(s, "1-srccolor")) return B::OneMinusSrcColor;
+  if(ieq(s, "dstcolor"))              return B::DstColor;
+  if(ieq(s, "oneminusdstcolor") || ieq(s, "1-dstcolor")) return B::OneMinusDstColor;
+  if(ieq(s, "srcalpha"))              return B::SrcAlpha;
+  if(ieq(s, "oneminussrcalpha") || ieq(s, "1-srcalpha")) return B::OneMinusSrcAlpha;
+  if(ieq(s, "dstalpha"))              return B::DstAlpha;
+  if(ieq(s, "oneminusdstalpha") || ieq(s, "1-dstalpha")) return B::OneMinusDstAlpha;
+  if(ieq(s, "constantcolor"))         return B::ConstantColor;
+  if(ieq(s, "oneminusconstantcolor") || ieq(s, "1-constantcolor")) return B::OneMinusConstantColor;
+  if(ieq(s, "constantalpha"))         return B::ConstantAlpha;
+  if(ieq(s, "oneminusconstantalpha") || ieq(s, "1-constantalpha")) return B::OneMinusConstantAlpha;
+  if(ieq(s, "srcalphasaturate"))      return B::SrcAlphaSaturate;
+  if(ieq(s, "src1color"))             return B::Src1Color;
+  if(ieq(s, "oneminussrc1color"))     return B::OneMinusSrc1Color;
+  if(ieq(s, "src1alpha"))             return B::Src1Alpha;
+  if(ieq(s, "oneminussrc1alpha"))     return B::OneMinusSrc1Alpha;
+  return B::One;
+}
+
+QRhiGraphicsPipeline::BlendOp toBlendOp(std::string_view s) noexcept
+{
+  using B = QRhiGraphicsPipeline;
+  if(ieq(s, "add"))             return B::Add;
+  if(ieq(s, "subtract") || ieq(s, "sub")) return B::Subtract;
+  if(ieq(s, "reversesubtract") || ieq(s, "revsub")) return B::ReverseSubtract;
+  if(ieq(s, "min"))             return B::Min;
+  if(ieq(s, "max"))             return B::Max;
+  return B::Add;
+}
+
+QRhiGraphicsPipeline::StencilOp toStencilOp(std::string_view s) noexcept
+{
+  using S = QRhiGraphicsPipeline;
+  if(ieq(s, "zero"))              return S::StencilZero;
+  if(ieq(s, "keep"))              return S::Keep;
+  if(ieq(s, "replace"))           return S::Replace;
+  if(ieq(s, "incrementandclamp") || ieq(s, "incclamp") || ieq(s, "increment"))
+    return S::IncrementAndClamp;
+  if(ieq(s, "decrementandclamp") || ieq(s, "decclamp") || ieq(s, "decrement"))
+    return S::DecrementAndClamp;
+  if(ieq(s, "invert"))            return S::Invert;
+  if(ieq(s, "incrementandwrap") || ieq(s, "incwrap"))
+    return S::IncrementAndWrap;
+  if(ieq(s, "decrementandwrap") || ieq(s, "decwrap"))
+    return S::DecrementAndWrap;
+  return S::Keep;
+}
+
+QRhiGraphicsPipeline::ColorMask toColorMask(std::string_view s) noexcept
+{
+  using M = QRhiGraphicsPipeline;
+  M::ColorMask out = M::ColorMask(0);
+  for(char c : s)
+  {
+    switch(std::tolower((unsigned char)c))
+    {
+      case 'r': out |= M::R; break;
+      case 'g': out |= M::G; break;
+      case 'b': out |= M::B; break;
+      case 'a': out |= M::A; break;
+      default: break;
+    }
+  }
+  if(out == M::ColorMask(0))
+    out = M::R | M::G | M::B | M::A;
+  return out;
+}
+
+QRhiGraphicsPipeline::TargetBlend toTargetBlend(const isf::blend_attachment& b) noexcept
+{
+  QRhiGraphicsPipeline::TargetBlend out;
+  out.enable = b.enable;
+  out.srcColor = toBlendFactor(b.src_color);
+  out.dstColor = toBlendFactor(b.dst_color);
+  out.opColor  = toBlendOp(b.op_color);
+  out.srcAlpha = toBlendFactor(b.src_alpha);
+  out.dstAlpha = toBlendFactor(b.dst_alpha);
+  out.opAlpha  = toBlendOp(b.op_alpha);
+  out.colorWrite = toColorMask(b.color_write);
+  return out;
+}
+
+QRhiGraphicsPipeline::StencilOpState toStencilOpState(const isf::stencil_op_state& s) noexcept
+{
+  QRhiGraphicsPipeline::StencilOpState out;
+  out.failOp      = toStencilOp(s.fail_op);
+  out.depthFailOp = toStencilOp(s.depth_fail_op);
+  out.passOp      = toStencilOp(s.pass_op);
+  out.compareOp   = toCompareOp(s.compare_op);
+  return out;
+}
+
+// --- pipeline_state manipulation ------------------------------------------
+
+isf::pipeline_state mergeState(isf::pipeline_state base, const isf::pipeline_state& over)
+{
+  if(over.depth_test.has_value())             base.depth_test = over.depth_test;
+  if(over.depth_write.has_value())            base.depth_write = over.depth_write;
+  if(over.depth_compare.has_value())          base.depth_compare = over.depth_compare;
+  if(over.depth_bias.has_value())             base.depth_bias = over.depth_bias;
+  if(over.slope_scaled_depth_bias.has_value())base.slope_scaled_depth_bias = over.slope_scaled_depth_bias;
+  if(over.cull_mode.has_value())              base.cull_mode = over.cull_mode;
+  if(over.front_face.has_value())             base.front_face = over.front_face;
+  if(over.polygon_mode.has_value())           base.polygon_mode = over.polygon_mode;
+  if(over.line_width.has_value())             base.line_width = over.line_width;
+  if(over.vertex_count.has_value())           base.vertex_count = over.vertex_count;
+  if(over.instance_count.has_value())         base.instance_count = over.instance_count;
+  if(over.topology.has_value())               base.topology = over.topology;
+  if(over.blend_all.has_value())              base.blend_all = over.blend_all;
+  if(!over.blend_per_attachment.empty())      base.blend_per_attachment = over.blend_per_attachment;
+  if(over.stencil_test.has_value())           base.stencil_test = over.stencil_test;
+  if(over.stencil_read_mask.has_value())      base.stencil_read_mask = over.stencil_read_mask;
+  if(over.stencil_write_mask.has_value())     base.stencil_write_mask = over.stencil_write_mask;
+  if(over.stencil_front.has_value())          base.stencil_front = over.stencil_front;
+  if(over.stencil_back.has_value())           base.stencil_back = over.stencil_back;
+  return base;
+}
+
+bool stateAffectsPipeline(const isf::pipeline_state& s) noexcept
+{
+  return s.depth_test.has_value()
+      || s.depth_write.has_value()
+      || s.depth_compare.has_value()
+      || s.depth_bias.has_value()
+      || s.slope_scaled_depth_bias.has_value()
+      || s.cull_mode.has_value()
+      || s.front_face.has_value()
+      || s.polygon_mode.has_value()
+      || s.line_width.has_value()
+      || s.blend_all.has_value()
+      || !s.blend_per_attachment.empty()
+      || s.stencil_test.has_value()
+      || s.stencil_read_mask.has_value()
+      || s.stencil_write_mask.has_value()
+      || s.stencil_front.has_value()
+      || s.stencil_back.has_value()
+      || s.topology.has_value()
+#if QT_VERSION >= QT_VERSION_CHECK(6, 9, 0)
+      // shading_rate toggles the QRhiGraphicsPipeline::UsesShadingRate opt-in
+      // flag (set in Utils.cpp buildPipelineWithState), so it does affect the
+      // pipeline even though the per-draw rate itself is recorded on the
+      // command buffer at draw time.
+      || s.shading_rate.has_value()
+#endif
+      ;
+  // vertex_count / instance_count don't affect the pipeline itself
+  // (they change draw arguments, not pipeline state), so they're
+  // intentionally absent from this check.
+}
+
+void applyPipelineState(
+    QRhiGraphicsPipeline& pip,
+    const isf::pipeline_state& state,
+    int colorAttachmentCount,
+    bool depthAttachmentAvailable,
+    bool wantsDepthByDefault) noexcept
+{
+  // ── Depth ──────────────────────────────────────────────────────────
+  // Only override depth state when explicitly set, OR when we need to force
+  // it off (no depth attachment, or upstream doesn't require depth). This
+  // preserves whatever the caller / mesh.preparePipeline already configured.
+  if(state.depth_test.has_value())
+  {
+    pip.setDepthTest(depthAttachmentAvailable && *state.depth_test);
+  }
+  else if(!depthAttachmentAvailable || !wantsDepthByDefault)
+  {
+    pip.setDepthTest(false);
+  }
+
+  if(state.depth_write.has_value())
+  {
+    pip.setDepthWrite(depthAttachmentAvailable && *state.depth_write);
+  }
+  else if(!depthAttachmentAvailable || !wantsDepthByDefault)
+  {
+    pip.setDepthWrite(false);
+  }
+
+  // Reverse-Z project rule: when depth is enabled and the shader didn't
+  // pick a compare op explicitly, default to Greater (near → 1.0, far →
+  // 0.0 in the float depth buffer). QRhi's built-in default is Less, which
+  // rejects every fragment under reverse-Z conventions.
+  if(state.depth_compare.has_value())
+    pip.setDepthOp(toCompareOp(*state.depth_compare));
+  else
+    pip.setDepthOp(QRhiGraphicsPipeline::Greater);
+  if(state.depth_bias.has_value())
+    pip.setDepthBias((int)*state.depth_bias);
+  if(state.slope_scaled_depth_bias.has_value())
+    pip.setSlopeScaledDepthBias(*state.slope_scaled_depth_bias);
+
+  // ── Cull / front-face / polygon mode ────────────────────────────────
+  // Only override when explicitly set; else preserve the caller's setup.
+  if(state.cull_mode.has_value())
+    pip.setCullMode(toCullMode(*state.cull_mode));
+
+  if(state.front_face.has_value())
+    pip.setFrontFace(toFrontFace(*state.front_face));
+
+  if(state.polygon_mode.has_value())
+    pip.setPolygonMode(toPolygonMode(*state.polygon_mode));
+
+  if(state.line_width.has_value())
+    pip.setLineWidth(*state.line_width);
+
+  // Topology override (paired with vertex_count for procedural draws):
+  // lets a shader that uses VERTEX_COUNT emit points / lines / strips
+  // without depending on the incoming geometry's topology.
+  if(state.topology.has_value())
+    pip.setTopology(toTopology(*state.topology));
+
+  // ── Blending ────────────────────────────────────────────────────────
+  // Only override target blends when the shader explicitly declares blend
+  // state. Otherwise the caller's seeded blend (e.g. legacy premul-alpha)
+  // is preserved bit-exact.
+  const int nAttachments = std::max(1, colorAttachmentCount);
+  if(!state.blend_per_attachment.empty())
+  {
+    QVarLengthArray<QRhiGraphicsPipeline::TargetBlend, 4> blends;
+    blends.reserve(nAttachments);
+    for(int i = 0; i < nAttachments; ++i)
+    {
+      std::size_t idx = std::min<std::size_t>(i, state.blend_per_attachment.size() - 1);
+      blends.push_back(toTargetBlend(state.blend_per_attachment[idx]));
+    }
+    pip.setTargetBlends(blends.begin(), blends.end());
+  }
+  else if(state.blend_all.has_value())
+  {
+    QVarLengthArray<QRhiGraphicsPipeline::TargetBlend, 4> blends;
+    blends.reserve(nAttachments);
+    auto t = toTargetBlend(*state.blend_all);
+    for(int i = 0; i < nAttachments; ++i)
+      blends.push_back(t);
+    pip.setTargetBlends(blends.begin(), blends.end());
+  }
+
+  // ── Stencil ─────────────────────────────────────────────────────────
+  // Toggle is gated on `stencil_test` only; sub-fields apply
+  // independently so a shader can override e.g. front op without
+  // re-stating `stencil_test`.
+  if(state.stencil_test.has_value())
+    pip.setStencilTest(*state.stencil_test);
+  if(state.stencil_front.has_value())
+    pip.setStencilFront(toStencilOpState(*state.stencil_front));
+  if(state.stencil_back.has_value())
+    pip.setStencilBack(toStencilOpState(*state.stencil_back));
+  if(state.stencil_read_mask.has_value())
+    pip.setStencilReadMask(*state.stencil_read_mask);
+  if(state.stencil_write_mask.has_value())
+    pip.setStencilWriteMask(*state.stencil_write_mask);
+
+  // ── Variable-rate shading (per-draw rate) ───────────────────────────
+  // NOTE: there is NO QRhiGraphicsPipeline::setShadingRate() and no
+  // QRhiGraphicsPipeline::ShadingRate enum in ANY Qt version (the previous
+  // code here did not compile on the >=6.12 builds it claimed to target).
+  // The pipeline only carries the opt-in flag
+  // QRhiGraphicsPipeline::UsesShadingRate, which Utils.cpp's
+  // buildPipelineWithState() already sets when caps.variableRateShading is
+  // true. The actual per-draw coarse-pixel rate is the command-buffer state
+  // QRhiCommandBuffer::setShadingRate(QSize), which must be recorded between
+  // setGraphicsPipeline() and draw() at the draw site (CustomMesh::draw /
+  // Mesh::draw). applyPipelineState() has no command buffer in scope, so it
+  // intentionally does nothing with state.shading_rate here. The requested
+  // {w,h} maps directly to the coarse-pixel QSize (clamped to {1,2,4}).
+}
+
+}
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/PipelineStateHelpers.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/PipelineStateHelpers.hpp
new file mode 100644
index 0000000000..5984d32ca1
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/PipelineStateHelpers.hpp
@@ -0,0 +1,85 @@
+#pragma once
+#include <isf.hpp>
+
+#include <QtGui/private/qrhi_p.h>
+
+#include <score_plugin_gfx_export.h>
+
+#include <string_view>
+
+namespace score::gfx
+{
+
+// --- String → Qt RHI enum mappers ----------------------------------------
+//
+// All mappers are case-insensitive and accept common synonyms
+// (e.g. "lequal" / "less_equal" both map to CompareOp::LessOrEqual).
+// Unknown strings fall back to a sensible default (documented per function).
+
+SCORE_PLUGIN_GFX_EXPORT
+QRhiGraphicsPipeline::CompareOp toCompareOp(std::string_view s) noexcept;
+
+SCORE_PLUGIN_GFX_EXPORT
+QRhiGraphicsPipeline::CullMode toCullMode(std::string_view s) noexcept;
+
+SCORE_PLUGIN_GFX_EXPORT
+QRhiGraphicsPipeline::FrontFace toFrontFace(std::string_view s) noexcept;
+
+SCORE_PLUGIN_GFX_EXPORT
+QRhiGraphicsPipeline::PolygonMode toPolygonMode(std::string_view s) noexcept;
+
+SCORE_PLUGIN_GFX_EXPORT
+QRhiGraphicsPipeline::BlendFactor toBlendFactor(std::string_view s) noexcept;
+
+SCORE_PLUGIN_GFX_EXPORT
+QRhiGraphicsPipeline::BlendOp toBlendOp(std::string_view s) noexcept;
+
+SCORE_PLUGIN_GFX_EXPORT
+QRhiGraphicsPipeline::StencilOp toStencilOp(std::string_view s) noexcept;
+
+SCORE_PLUGIN_GFX_EXPORT
+QRhiGraphicsPipeline::ColorMask toColorMask(std::string_view s) noexcept;
+
+// --- Conversion helpers ---------------------------------------------------
+
+SCORE_PLUGIN_GFX_EXPORT
+QRhiGraphicsPipeline::TargetBlend toTargetBlend(const isf::blend_attachment& b) noexcept;
+
+SCORE_PLUGIN_GFX_EXPORT
+QRhiGraphicsPipeline::StencilOpState toStencilOpState(const isf::stencil_op_state& s) noexcept;
+
+// --- pipeline_state manipulation ------------------------------------------
+
+// Merge two pipeline_states: every field that is set in `over` wins, otherwise
+// `base`'s field is kept. Used to combine the descriptor's global state with a
+// per-pass override_state.
+SCORE_PLUGIN_GFX_EXPORT
+isf::pipeline_state mergeState(isf::pipeline_state base, const isf::pipeline_state& over);
+
+// Returns true if the state has any field set (i.e. would affect a pipeline).
+SCORE_PLUGIN_GFX_EXPORT
+bool stateAffectsPipeline(const isf::pipeline_state&) noexcept;
+
+// Apply the state to a graphics pipeline.
+// - `colorAttachmentCount`: used to size per-attachment blend vectors.
+// - `depthAttachmentAvailable`: true when the target RT has a depth attachment;
+//   depth-test/write are forced off otherwise.
+// - `wantsDepthByDefault`: legacy fallback. When state.depth_test is nullopt
+//   AND wantsDepthByDefault is false, depth test/write are force-disabled
+//   (equivalent to today's `!renderer.anyNodeRequiresDepth()` path).
+//
+// Only fields explicitly set in `state` are overridden. Cull, front-face,
+// polygon mode, blend, and stencil all preserve whatever the caller (or
+// `mesh.preparePipeline()`) configured before this call. The caller is
+// responsible for seeding sensible defaults (e.g. premul-alpha blend) before
+// invoking this, so that shaders declaring partial pipeline_state don't
+// silently lose unrelated defaults.
+SCORE_PLUGIN_GFX_EXPORT
+void applyPipelineState(
+    QRhiGraphicsPipeline& pip,
+    const isf::pipeline_state& state,
+    int colorAttachmentCount,
+    bool depthAttachmentAvailable,
+    bool wantsDepthByDefault) noexcept;
+
+}
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/PreviewNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/PreviewNode.cpp
index 80a89926b2..9151b8e6e5 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/PreviewNode.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/PreviewNode.cpp
@@ -36,9 +36,15 @@ std::shared_ptr<RenderState> importRenderState(QSize sz, QRhi* rhi)
   }
   state.version = Gfx::Settings::shaderVersionForAPI(state.api);
   state.rhi = rhi;
-  state.samples = 1; // FIXME
+  // The host widget owns this rhi, so we can't follow the global samples
+  // setting here — but we should at least query what the rhi actually
+  // supports rather than assuming 1. Final RT sample count is set by the
+  // host via setSampleCount on its own swap chain.
+  state.samples = rhi->supportedSampleCounts().value(0, 1);
   state.renderSize = sz;
   state.outputSize = sz;
+
+  state.caps.populate(*rhi);
   return st;
 }
 
@@ -106,7 +112,24 @@ void PreviewNode::createOutput(score::gfx::OutputConfiguration conf)
   conf.onReady();
 }
 
-void PreviewNode::destroyOutput() { }
+void PreviewNode::destroyOutput()
+{
+  // Persist-across-rebuild contract: registry survives RL teardown,
+  // so its QRhi resources must be released here (BEFORE we drop our
+  // RenderState reference) while the host-owned QRhi is still alive.
+  // The host (Qt widget) is responsible for outliving us, but we tear
+  // down our own resources first to keep the contract symmetric with
+  // ScreenNode / BackgroundNode / MultiWindowNode.
+  releaseRegistry();
+
+  // Host owns the underlying QRhi and the m_renderTarget / m_texture aliases
+  // — we don't free those. The shared_ptr<RenderState> is the only piece
+  // PreviewNode actually owns; reset it so a createOutput → destroyOutput →
+  // createOutput cycle drops the prior state instead of relying on
+  // make_shared assignment to release the previous holder. Matches the
+  // unified sink contract every other OutputNode subclass observes.
+  m_renderState.reset();
+}
 
 std::shared_ptr<score::gfx::RenderState> PreviewNode::renderState() const
 {
@@ -233,7 +256,7 @@ class PreviewRendererInvertY final : public score::gfx::OutputNodeRenderer
       score::gfx::RenderList& renderer, QRhiCommandBuffer& cb,
       QRhiResourceUpdateBatch*& res) override
   {
-    cb.beginPass(m_renderTarget.renderTarget, Qt::black, {1.0f, 0}, res);
+    cb.beginPass(m_renderTarget.renderTarget, Qt::black, {0.0f, 0}, res);
     res = nullptr;
     {
       const auto sz = renderer.state.renderSize;
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderList.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderList.cpp
index 768b003273..f689aa964d 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderList.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderList.cpp
@@ -1,13 +1,20 @@
 
 #include <Gfx/Graph/CustomMesh.hpp>
+#include <Gfx/Graph/GpuResourceRegistry.hpp>
 #include <Gfx/Graph/Mesh.hpp>
 #include <Gfx/Graph/NodeRenderer.hpp>
 #include <Gfx/Graph/OutputNode.hpp>
 #include <Gfx/Graph/RenderList.hpp>
+#include <Gfx/Graph/VertexFallbackPool.hpp>
 #include <Gfx/Settings/Model.hpp>
 
 #include <score/tools/Debug.hpp>
 
+#include <QVarLengthArray>
+
+#include <array>
+#include <chrono>
+
 //#define RENDERDOC_PROFILING 0
 #if defined(RENDERDOC_PROFILING)
 #include "renderdoc_app.h"
@@ -59,6 +66,20 @@ RenderList::RenderList(OutputNode& output, const std::shared_ptr<RenderState>& s
 
 RenderList::~RenderList()
 {
+  // Defensive: run release() here too. The normal path is Graph::~Graph
+  // calling release() on every RL before the destructor fires, but a
+  // late onResize during app shutdown can spawn a brand-new RL (via
+  // Graph::recreateOutputRenderList) after the ~Graph loop has already
+  // moved past the release step. That new RL reaches ~RenderList
+  // without anyone having freed its QRhi resources — by the time the
+  // shared_ptr drops, the output node's destroyOutput() is next in
+  // line, calling RenderState::destroy() → vkDestroyDevice on a device
+  // that still owns the new RL's empty textures, InvertYRenderer's
+  // render target, etc. (observed as VUID-vkDestroyDevice-device-05137
+  // leaks of a handful of VkImages + views + one render pass +
+  // framebuffer). release() is idempotent, so calling it again when
+  // the Graph already did is a no-op.
+  release();
   for(auto node : this->nodes)
   {
     node->renderedNodes.erase(this);
@@ -84,18 +105,140 @@ void RenderList::init()
   m_outputUBO
       = rhi.newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, sizeof(OutputUBO));
   m_outputUBO->setName("RenderList::m_outputUBO");
-  m_outputUBO->create();
-
+  SCORE_ASSERT(m_outputUBO->create());
+
+  // Typed placeholders so that a shader declaring sampler3D / samplerCube /
+  // sampler2DArray / sampler2D can be bound to a view of the matching type
+  // before any upstream edge has delivered a real texture. Without these,
+  // Vulkan's VUID-vkCmdDraw-viewType-07752 fires ("VkImageViewType is
+  // VK_IMAGE_VIEW_TYPE_2D but OpTypeImage has Dim=3D") every frame until
+  // an upstream texture arrives — and forever if no edge ever connects.
+  //
+  // create() must succeed here: a null handle reaches vkUpdateDescriptorSets
+  // as VK_NULL_HANDLE and the NVIDIA driver segfaults while dereferencing
+  // it in a later vkCmdPipelineBarrier. Assert the typed fallbacks exist.
   m_emptyTexture
       = rhi.newTexture(QRhiTexture::RGBA8, QSize{1, 1}, 1, QRhiTexture::Flag{});
   m_emptyTexture->setName("RenderList::m_emptyTexture");
-  m_emptyTexture->create();
-
-  m_lastSize = state.renderSize;
-
+  SCORE_ASSERT(m_emptyTexture->create());
+
+  m_emptyTexture3D = rhi.newTexture(
+      QRhiTexture::RGBA8, 1, 1, 1, 1,
+      QRhiTexture::ThreeDimensional);
+  m_emptyTexture3D->setName("RenderList::m_emptyTexture3D");
+  SCORE_ASSERT(m_emptyTexture3D->create());
+
+  m_emptyTextureCube = rhi.newTexture(
+      QRhiTexture::RGBA8, QSize{1, 1}, 1, QRhiTexture::CubeMap);
+  m_emptyTextureCube->setName("RenderList::m_emptyTextureCube");
+  SCORE_ASSERT(m_emptyTextureCube->create());
+
+  // Must use newTextureArray — the 6-arg newTexture() overload is for 3D
+  // textures (depth > 1 is a volume slice count, not an array layer count),
+  // and QRhi rejects any texture with both ThreeDimensional and TextureArray
+  // flags. Passing TextureArray to the 3D overload happened to be tolerated
+  // by earlier Qt builds on some backends but hits an assertion under the
+  // current validation path.
+  m_emptyTextureArray = rhi.newTextureArray(
+      QRhiTexture::RGBA8, /*arraySize*/ 1, QSize(1, 1));
+  m_emptyTextureArray->setName("RenderList::m_emptyTextureArray");
+  SCORE_ASSERT(m_emptyTextureArray->create());
+
+  // Allocate the initial resource-update batch NOW (before the registry
+  // init below would otherwise allocate it) so we can queue zero-fills
+  // for the empty texture placeholders into the same batch. Vulkan does
+  // NOT zero-initialise new VkImage memory — without these uploads the
+  // placeholders carry device-memory garbage on every fresh RL.
+  //
+  // Why this matters: classic_pbr_openpbr samples cubemaps
+  // (irradiance_map, prefiltered_map, skybox) and a 2D LUT (brdf_lut).
+  // When NO upstream producer is wired for those inputs the consumer
+  // falls back to m_emptyTextureCube / m_emptyTexture. Sampling those
+  // returns the uninit page contents -> the BSDF math reads garbage
+  // -> wildly different IBL contribution per resize ("drift" symptom).
+  // classic_pbr_full doesn't sample any cubemap input, so it never
+  // hits the empty-cubemap fallback and is immune to this bug.
+  //
+  // 1x1 RGBA8 = 4 bytes per face. Cubemap = 6 faces. Total upload per
+  // RL init: ~16 bytes. Trivial.
   SCORE_ASSERT(!m_initialBatch);
   m_initialBatch = state.rhi->nextResourceUpdateBatch();
   SCORE_ASSERT(m_initialBatch);
+  {
+    static const std::array<char, 4> blackPixel{0, 0, 0, 0};
+    QRhiTextureSubresourceUploadDescription src(blackPixel.data(), 4);
+    src.setSourceSize(QSize{1, 1});
+    // 2D
+    {
+      QRhiTextureUploadEntry e(0, 0, src);
+      m_initialBatch->uploadTexture(m_emptyTexture, {e});
+    }
+    // 3D — one slice
+    {
+      QRhiTextureUploadEntry e(0, 0, src);
+      m_initialBatch->uploadTexture(m_emptyTexture3D, {e});
+    }
+    // 2D Array — one layer
+    {
+      QRhiTextureUploadEntry e(0, 0, src);
+      m_initialBatch->uploadTexture(m_emptyTextureArray, {e});
+    }
+    // Cube — six faces
+    {
+      QRhiTextureUploadDescription cubeDesc;
+      QVarLengthArray<QRhiTextureUploadEntry, 6> entries;
+      for(int face = 0; face < 6; ++face)
+        entries.append(QRhiTextureUploadEntry(face, 0, src));
+      cubeDesc.setEntries(entries.cbegin(), entries.cend());
+      m_initialBatch->uploadTexture(m_emptyTextureCube, cubeDesc);
+    }
+  }
+
+  // Scene-graph arena store (camera / light / material / per_draw
+  // buffers). Source nodes grab slots from it at construction and
+  // write their own packed bytes at their own update(), so
+  // ScenePreprocessor never CPU-touches this data in the render path.
+  //
+  // Persist-across-rebuild contract: the registry is OWNED by the
+  // OutputNode (OutputNode::m_registry). On the first RL for this
+  // output it is freshly allocated + init()'d; on every subsequent
+  // RL rebuild (viewport resize / fallback rebuild path) we adopt
+  // the populated state as-is. Skipping the re-init() preserves
+  // ~100 MiB of texture-array layers, ~70 K-vertex mesh slabs, every
+  // arena buffer (no zero-fill), and all producer slot indices —
+  // none of that scene-content data depends on framebuffer size.
+  // See REPORT/OPT-resize-perf.md §3 #2 for the full cost analysis.
+  m_registry = &output.acquireRegistry();
+  if(!m_registry->isInitialized())
+  {
+    m_registry->init(rhi, *m_initialBatch);
+    // Seed reserved arena slots (e.g. Material slot 0 = default white
+    // dielectric). Runs after registry init so the seed lands AFTER the
+    // arena zero-fill (uploadStaticBuffer ordering is preserved within
+    // the same batch). Idempotent on repeat calls but we gate it here
+    // anyway so the explicit upload only happens when the arena was
+    // actually re-initialised this RL cycle.
+    m_registry->seedDefaults(*m_initialBatch);
+  }
+  else
+  {
+    // Reuse path. Arena buffers, texture arrays, mesh slabs and slot
+    // generations all carry over from the previous RL on this output.
+    // Producers' raw_*_slot members survive (the renderers themselves
+    // are recreated on RL rebuild — they re-allocate fresh slots — but
+    // the slot-stride / generation-table / free-list state is intact).
+    // ScenePreprocessor::init() compares against this same pointer to
+    // decide whether to wipe its m_loaderMaterialSlots / m_envSlot
+    // bookkeeping; matching pointer → no wipe → no re-allocation churn.
+    SCORE_ASSERT(m_registry->boundRhi() == &rhi);
+  }
+
+  // Fallback vertex-buffer pool for "REQUIRED: false" VERTEX_INPUTS.
+  // Lazy-allocates on first use (remapPipelineVertexInputs side), so
+  // zero cost when no shader opts in.
+  m_vertexFallbackPool = std::make_unique<VertexFallbackPool>();
+
+  m_lastSize = state.renderSize;
 }
 
 QRhiResourceUpdateBatch* RenderList::initialBatch() const noexcept
@@ -103,31 +246,151 @@ QRhiResourceUpdateBatch* RenderList::initialBatch() const noexcept
   return m_initialBatch;
 }
 
+QSize RenderList::resolveDownstreamSize(
+    const Node* node,
+    const ossia::small_flat_map<const Port*, RenderTargetSpecs, 16>& resolvedSpecs)
+    const noexcept
+{
+  QSize best{0, 0};
+
+  for(const auto* out_port : node->output)
+  {
+    for(const auto* edge : out_port->edges)
+    {
+      const Port* sink = edge->sink;
+
+      // Case 1: sink is the output node — use its render size.
+      if(sink->node == &output)
+      {
+        best = QSize(
+            std::max(best.width(), state.renderSize.width()),
+            std::max(best.height(), state.renderSize.height()));
+        continue;
+      }
+
+      // Case 2: sink port was already resolved (downstream, processed earlier
+      // in reverse topological order).
+      if(auto it = resolvedSpecs.find(sink); it != resolvedSpecs.end())
+      {
+        best = QSize(
+            std::max(best.width(), it->second.size.width()),
+            std::max(best.height(), it->second.size.height()));
+        continue;
+      }
+
+      // Case 3: sink has a renderer that provides its own RT
+      // (e.g. Crousti nodes overriding renderTargetForInput).
+      if(auto rn_it = sink->node->renderedNodes.find(this);
+         rn_it != sink->node->renderedNodes.end())
+      {
+        auto tex = rn_it->second->renderTargetForInput(*sink);
+        if(tex.texture)
+        {
+          auto sz = tex.texture->pixelSize();
+          best = QSize(
+              std::max(best.width(), sz.width()),
+              std::max(best.height(), sz.height()));
+          continue;
+        }
+      }
+    }
+  }
+
+  return best; // {0,0} if no downstream found — caller keeps renderSize fallback
+}
+
 void RenderList::createAllInputRenderTargets()
 {
-  int cur_port = 0;
-  for(auto* node : nodes)
+  // Phase 1: resolve specs in reverse topological order (sinks first).
+  // This ensures downstream RTs are resolved before upstream ones,
+  // so that nodes without explicit sizes inherit the downstream size
+  // instead of defaulting to the global output resolution.
+  ossia::small_flat_map<const Port*, RenderTargetSpecs, 16> resolvedSpecs;
+
+  for(auto it = nodes.rbegin(); it != nodes.rend(); ++it)
   {
-    // Output node manages its own RT via its renderer (e.g. ScaledRenderer::m_inputTarget)
+    auto* node = *it;
+    // Output node manages its own RT via its renderer
     if(node == &output)
       continue;
-    cur_port = 0;
+
+    int cur_port = 0;
     for(auto* in : node->input)
     {
       if(in->type == Types::Image
          && (in->flags & Flag::GrabsFromSource) != Flag::GrabsFromSource)
       {
         auto spec = node->resolveRenderTargetSpecs(cur_port, *this);
-        bool wantsDepth = requiresDepth(*in);
-        bool wantsSamplableDepth = (in->flags & Flag::SamplableDepth) == Flag::SamplableDepth;
-        auto rt = score::gfx::createRenderTarget(
-            state, spec.format, spec.size, samples(),
-            wantsDepth || wantsSamplableDepth, wantsSamplableDepth);
-        m_inputRenderTargets[in] = std::move(rt);
+
+        // If no explicit size, inherit from downstream.
+        if(!node->hasExplicitRenderTargetSize(cur_port))
+        {
+          QSize downstream = resolveDownstreamSize(node, resolvedSpecs);
+          if(!downstream.isEmpty())
+            spec.size = downstream;
+          // else: keep renderer.state.renderSize (ultimate fallback)
+        }
+
+        resolvedSpecs[in] = spec;
       }
       cur_port++;
     }
   }
+
+  // Phase 2: create render targets using resolved specs.
+  for(auto& [port, spec] : resolvedSpecs)
+  {
+    bool wantsDepth = requiresDepth(*port);
+    bool wantsSamplableDepth
+        = (port->flags & Flag::SamplableDepth) == Flag::SamplableDepth;
+    auto rt = score::gfx::createRenderTarget(
+        state, spec.format, spec.size, samples(),
+        wantsDepth || wantsSamplableDepth, wantsSamplableDepth);
+    m_inputRenderTargets[port] = std::move(rt);
+  }
+}
+
+void RenderList::onEdgeRemoved(
+    Edge& edge, const ossia::hash_set<const Port*>* preserveSinks)
+{
+  // Notify source renderer
+  if(auto src_it = edge.source->node->renderedNodes.find(this);
+     src_it != edge.source->node->renderedNodes.end())
+  {
+    src_it->second->removeOutputPass(*this, edge);
+  }
+
+  // Notify sink renderer (needs a batch for potential resource updates)
+  if(auto sink_it = edge.sink->node->renderedNodes.find(this);
+     sink_it != edge.sink->node->renderedNodes.end())
+  {
+    sink_it->second->removeInputEdge(*this, edge);
+  }
+
+  // If the sink port has no more edges after this one is removed
+  // (called before actual edge destruction, so the edge is still in the list),
+  // release the render target — unless the caller has told us a new feed
+  // is coming in the same batch. Inserting a filter between A and B would
+  // otherwise destroy B's input RT here, only for reconcile to immediately
+  // re-allocate an RT with the same spec at the same slot. The caller is
+  // responsible for only marking sinks whose RT specs will remain valid;
+  // a mismatch is picked up later by the rt_changed surgical path in
+  // render().
+  if(edge.sink->edges.size() <= 1)
+  {
+    if(!preserveSinks || !preserveSinks->contains(edge.sink))
+      removeInputRenderTarget(edge.sink);
+  }
+}
+
+void RenderList::removeInputRenderTarget(const Port* port)
+{
+  auto it = m_inputRenderTargets.find(port);
+  if(it != m_inputRenderTargets.end())
+  {
+    it->second.release();
+    m_inputRenderTargets.erase(it);
+  }
 }
 
 TextureRenderTarget RenderList::renderTargetForInputPort(const Port& p) const noexcept
@@ -155,7 +418,15 @@ void RenderList::release()
   {
     for(auto& b : bufs.second.buffers)
     {
-      delete b.handle;
+      // Only delete buffers this RenderList owns. Borrowed gpu_buffer
+      // handles (e.g., the scene preprocessor's MDI arena buffers, the
+      // GpuResourceRegistry's arena buffers wrapped as gpu_buffer in the
+      // emitted geometry) are destroyed by their original producer and
+      // must NOT be raw-deleted here — otherwise the later
+      // registry->destroy() hits a freed pointer in
+      // QRhiResource::deleteLater.
+      if(b.owned && b.handle)
+        delete b.handle;
     }
   }
 
@@ -172,6 +443,36 @@ void RenderList::release()
   delete m_emptyTexture;
   m_emptyTexture = nullptr;
 
+  // The 3 typed empty-texture placeholders are also allocated in init()
+  // but were originally missing from the release path — they leaked on
+  // every maybeRebuild cycle (ASan flagged both createRenderList's and
+  // maybeRebuild's init() call sites).
+  delete m_emptyTexture3D;
+  m_emptyTexture3D = nullptr;
+
+  delete m_emptyTextureCube;
+  m_emptyTextureCube = nullptr;
+
+  delete m_emptyTextureArray;
+  m_emptyTextureArray = nullptr;
+
+  // Persist-across-rebuild contract: do NOT destroy the registry here.
+  // It is owned by the OutputNode and survives RL rebuild — the next
+  // createRenderList for this output will re-adopt the same instance
+  // and skip the (expensive) init() path. The actual QRhi-resource
+  // teardown lives in OutputNode::releaseRegistry() which the concrete
+  // sink (ScreenNode / BackgroundNode / MultiWindowNode / ...) calls
+  // from destroyOutput() before the QRhi itself is freed. Just clear
+  // our non-owning pointer so a stale dereference after release() is
+  // a clean nullptr crash, not a use-after-free.
+  m_registry = nullptr;
+
+  if(m_vertexFallbackPool)
+  {
+    m_vertexFallbackPool->release();
+    m_vertexFallbackPool.reset();
+  }
+
   // If nothing happened
   if(m_initialBatch)
   {
@@ -210,6 +511,36 @@ bool RenderList::maybeRebuild(bool force)
   const QSize outputSize = state.renderSize;
   if(outputSize != m_lastSize || !m_built || force)
   {
+    // Drain the in-flight CB before the mid-frame release()+init().
+    //
+    // maybeRebuild is called from renderInternal (line ~845), which runs
+    // INSIDE Window::render's beginFrame/endFrame brackets. release()
+    // raw-deletes / deleteLater()s SRBs, samplers, UBOs, etc. that may
+    // be referenced by the resource-update batch already queued into
+    // cbD->commands earlier in renderInternal (commands.resourceUpdate
+    // around line 1036), or by ScenePreprocessor's runInitialPasses
+    // beginExternal/copyBuffer/endExternal block (which synchronously
+    // flushes cbD->commands into the VkCommandBuffer at
+    // qrhivulkan.cpp:6640-6643).
+    //
+    // Without this drain, recordPrimaryCommandBuffer at endFrame
+    // dereferences the released VkBuffer/VkSampler handles -> validation
+    // cascade (vkResetCommandPool with pending CBs, vkBeginCommandBuffer
+    // on active CB, eventual device loss in vkQueueSubmit /
+    // vkWaitForFences) -> CRASH in nvoglv64.dll (NVIDIA's unified Vulkan
+    // driver) at vkCmdBeginRenderPass.
+    //
+    // finish() mid-frame is a documented and supported QRhi operation
+    // (qrhivulkan.cpp:3121-3164): it submits the partial CB,
+    // vkQueueWaitIdle, then restarts a fresh CB on the same slot. After
+    // finish(), the CB queue is empty and we can safely tear down +
+    // re-init RenderList resources.
+    //
+    // Triggers only on first frame after a resize / m_built==false /
+    // forced rebuild. Steady-state cost: zero.
+    if(state.rhi && state.rhi->isRecordingFrame())
+      state.rhi->finish();
+
     m_built = false;
     release();
 
@@ -304,20 +635,40 @@ RenderList::Buffers RenderList::acquireMesh(
   auto& rhi = *state.rhi;
   // 1. Try to find mesh from the exact same geometry
   const auto& [p, f] = spec;
+
+  auto dump_bufs = [](const char* tag, CustomMesh* m, const MeshBuffers& mb) {
+    if(!::score::gfx::buftrace_enabled())
+      return;
+    QDebug d = qDebug().nospace();
+    d << "[BUFTRACE] " << tag << " mesh=" << (void*)m
+      << " bufs.size=" << (qsizetype)mb.buffers.size() << " [";
+    for(std::size_t i = 0; i < mb.buffers.size(); ++i)
+    {
+      if(i)
+        d << ",";
+      d << (void*)mb.buffers[i].handle;
+    }
+    d << "] indirect=" << (void*)mb.indirectDrawBuffer;
+  };
+
   if(auto it = m_customMeshCache.find(spec); it != m_customMeshCache.end())
   {
     if(auto m = const_cast<CustomMesh*>(safe_cast<const CustomMesh*>(it->second)))
     {
       auto meshbufs_it = this->m_vertexBuffers.find(m);
       SCORE_ASSERT(meshbufs_it != this->m_vertexBuffers.end());
-      auto mb = meshbufs_it->second;
+      auto& mb = meshbufs_it->second;
 
-      // FIX the thraed-unsafety: basically, we need to
-      // have some level of double- or triple-buffering
       if(auto cur_idx = p->dirty_index; m->dirtyGeometryIndex != cur_idx)
       {
+        BUFTRACE() << "acquireMesh PATH 1a: dirty_index "
+                   << m->dirtyGeometryIndex << "->" << cur_idx
+                   << " mesh=" << (void*)m
+                   << " spec=" << (void*)p.get();
+        dump_bufs("  before reload", m, mb);
         m->reload(*p, f);
         m->update(rhi, mb, res);
+        dump_bufs("  after reload", m, mb);
         for(auto& mesh: p->meshes) {
           for(auto& buf : mesh.buffers) {
             buf.dirty = false;
@@ -338,8 +689,11 @@ RenderList::Buffers RenderList::acquireMesh(
 
         if(dirty)
         {
+          BUFTRACE() << "acquireMesh PATH 1b: buf.dirty mesh=" << (void*)m;
+          dump_bufs("  before reload", m, mb);
           m->reload(*p, f);
           m->update(rhi, mb, res);
+          dump_bufs("  after reload", m, mb);
           for(auto& mesh: p->meshes) {
             for(auto& buf : mesh.buffers) {
               buf.dirty = false;
@@ -364,8 +718,13 @@ RenderList::Buffers RenderList::acquireMesh(
         auto& mb = currentbufs;
         auto cur_idx = p->dirty_index;
 
+        BUFTRACE() << "acquireMesh PATH 2 (reuse): mesh=" << (void*)m
+                   << " old_spec=" << (void*)it->first.meshes.get()
+                   << " new_spec=" << (void*)p.get();
+        dump_bufs("  before reload", m, mb);
         m->reload(*p, f);
         m->update(rhi, mb, res);
+        dump_bufs("  after reload", m, mb);
 
         for(auto& mesh: p->meshes) {
           for(auto& buf : mesh.buffers) {
@@ -375,6 +734,11 @@ RenderList::Buffers RenderList::acquireMesh(
 
         m->dirtyGeometryIndex = cur_idx;
 
+        // Sync the vertex buffer cache so that path 1 on subsequent frames
+        // picks up the updated handles (especially gpu_buffer pointers that
+        // were replaced rather than resized in-place).
+        meshbufs_it->second = mb;
+
         // Re-key: erase stale entry and insert under the new geometry_spec
         // to prevent cache growth from feedback loops creating new shared_ptrs each frame.
         m_customMeshCache.erase(it);
@@ -386,31 +750,32 @@ RenderList::Buffers RenderList::acquireMesh(
   }
 
   // 3. Really not found, we allocate a new mesh for good
+  BUFTRACE() << "acquireMesh PATH 3 (fresh): spec=" << (void*)p.get();
   auto m = new CustomMesh{*p, f};
   auto meshbufs = initMeshBuffer(*m, res);
 
 #if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0)
-  // Check for well-known _indirect_draw auxiliary buffer convention
+  // Check for well-known _indirect_draw auxiliary buffer convention.
+  //
+  // The engine emits a uniform 5-word indirect command (stride 20):
+  //   { index_or_vertex_count, instance_count, first_index_or_vertex,
+  //     base_vertex, first_instance }  -- see ossia::geometry::draw_command /
+  // ScenePreprocessorNode's IndirectCmd. This matches QRhiDrawIndexedIndirect-
+  // Command (5 u32) exactly, so the INDEXED path is GPU-safe at stride 20.
+  //
+  // The NON-indexed QRhiDrawIndirectCommand is only 4 u32 (vertexCount,
+  // instanceCount, firstVertex, firstInstance). Pointing drawIndirect() at a
+  // 5-word/stride-20 buffer makes the GPU read firstInstance from word 3
+  // (our base_vertex dummy) instead of word 4 — diverging from the CPU
+  // fallback, which reads word 4. There is no way to reshape the producer's
+  // buffer here, so we deliberately DO NOT enable the GPU indirect path for
+  // the non-indexed case (force indexed-only MDI): the mesh falls back to its
+  // normal draw, avoiding wrong/garbage firstInstance. Indexed MDI below gets
+  // the full stride/count treatment.
   if(!meshbufs.useIndirectDraw && !p->meshes.empty())
   {
     const auto& mesh = p->meshes[0];
-    if(auto* aux = mesh.find_auxiliary("_indirect_draw"))
-    {
-      if(aux->buffer >= 0 && aux->buffer < (int)mesh.buffers.size())
-      {
-        const auto& buf_data = mesh.buffers[aux->buffer].data;
-        if(auto* gpu = ossia::get_if<ossia::geometry::gpu_buffer>(&buf_data))
-        {
-          if(gpu->handle)
-          {
-            meshbufs.indirectDrawBuffer = static_cast<QRhiBuffer*>(gpu->handle);
-            meshbufs.useIndirectDraw = true;
-            meshbufs.indirectDrawIndexed = false;
-          }
-        }
-      }
-    }
-    else if(auto* aux_idx = mesh.find_auxiliary("_indirect_draw_indexed"))
+    if(auto* aux_idx = mesh.find_auxiliary("_indirect_draw_indexed"))
     {
       if(aux_idx->buffer >= 0 && aux_idx->buffer < (int)mesh.buffers.size())
       {
@@ -419,13 +784,31 @@ RenderList::Buffers RenderList::acquireMesh(
         {
           if(gpu->handle)
           {
+            constexpr quint32 stride = 5 * sizeof(uint32_t); // 20, matches CustomMesh
             meshbufs.indirectDrawBuffer = static_cast<QRhiBuffer*>(gpu->handle);
             meshbufs.useIndirectDraw = true;
             meshbufs.indirectDrawIndexed = true;
+            meshbufs.indirectDrawOffset = (quint32)std::max<int64_t>(0, aux_idx->byte_offset);
+            meshbufs.indirectDrawStride = stride;
+            // drawIndirect requires stride >= 16 and count >= 1; derive the
+            // command count from the aux region size (was never set before →
+            // count defaulted to 1, drawing only the first command).
+            const int64_t avail = (aux_idx->byte_size > 0)
+                ? aux_idx->byte_size
+                : (int64_t)gpu->byte_size - aux_idx->byte_offset;
+            meshbufs.indirectDrawCount
+                = (avail > 0) ? (quint32)(avail / stride) : 1u;
+            if(meshbufs.indirectDrawCount == 0)
+              meshbufs.indirectDrawCount = 1;
           }
         }
       }
     }
+    else if(mesh.find_auxiliary("_indirect_draw"))
+    {
+      // Non-indexed GPU MDI intentionally unsupported (see comment above).
+      // Leave useIndirectDraw=false so the mesh draws via its normal path.
+    }
   }
 #endif
 
@@ -441,7 +824,51 @@ void RenderList::clearRenderers()
   m_built = false;
 }
 
-bool RenderList::requiresDepth(Port& p) const noexcept
+bool RenderList::resizeSwapchainSizedTargets(QSize newSize)
+{
+  // Bail to fallback if there's nothing to resize. The fallback
+  // (recreateOutputRenderList) handles initial output setup.
+  if(newSize.width() <= 0 || newSize.height() <= 0)
+    return false;
+  if(renderers.empty())
+    return false;
+
+  // Already at the right size — no-op success. Avoids a wasted
+  // round-trip through maybeRebuild when Qt fires multiple onResize
+  // callbacks for the same final size.
+  if(newSize == m_lastSize)
+    return true;
+
+  // Update the shared RenderState's size. m_lastSize stays at the
+  // OLD value here — we WANT maybeRebuild's `outputSize != m_lastSize`
+  // check to fire on the next render frame so it triggers a full
+  // release+init cycle. With the persistent GpuResourceRegistry
+  // (commit 703c2937f) and the rt_changed downstream-size
+  // propagation (createAllInputRenderTargets), maybeRebuild is now
+  // cheap enough to be the correct way to handle resize.
+  //
+  // Why we don't try to update RTs here directly: the rt_changed
+  // surgical block called resolveRenderTargetSpecs PER-PORT without
+  // the downstream-propagation that createAllInputRenderTargets
+  // applies. Nodes with explicit per-port sizes cached from earlier
+  // graph setup keep their explicit size on resize, while
+  // createAllInputRenderTargets uses resolveDownstreamSize to
+  // properly propagate the new output size upstream. The user's
+  // openpbr scene has nodes with cached explicit sizes that wouldn't
+  // update via the surgical path → low-resolution rendering on resize.
+  //
+  // maybeRebuild() routes through release()+init()+createAllInputRenderTargets()
+  // which IS the correct propagation; with registry persistence the
+  // cost is bounded (no arena destroy/create, no texture re-upload,
+  // pipeline cache stays warm).
+  state.renderSize = newSize;
+  state.outputSize = newSize;
+  m_built = false;  // forces maybeRebuild's release+init on next frame
+
+  return true;
+}
+
+bool RenderList::requiresDepth(const Port& p) const noexcept
 {
   for(auto& edge : p.edges)
     if(edge->source->node->requiresDepth)
@@ -503,6 +930,82 @@ void RenderList::render(QRhiCommandBuffer& commands, bool force)
   if(renderers.size() <= 1 && !force)
     return;
 
+  // Frame counter + wall-clock timer for diagnostics. Emits the frame
+  // header with the time since the previous render() entry so the pasted
+  // log shows per-frame cost. Values include CPU record + any synchronous
+  // GPU waits inside setShaderResources / beginPass etc., i.e. roughly
+  // the wall-time equivalent of "how fast is this pipeline".
+  // Plan 09 S6: per-frame GPU-time + PSO-stall observability. Read the
+  // CB-wide GPU time for the most recently COMPLETED frame and attribute
+  // it to the "frame" label; the per-pass breakdown is a QRhi follow-up
+  // (current API only exposes CB-scoped timings).
+  //
+  // One-frame staleness is a QRhi contract: `lastCompletedGpuTime()`
+  // returns the PREVIOUS frame's elapsed GPU time, not the in-progress
+  // one. The panel reports it as such.
+#if QT_VERSION >= QT_VERSION_CHECK(6, 6, 0)
+  // Use the per-instance `frame` member (incremented at the end of render())
+  // as the diagnostic frame number rather than a process-/thread-global
+  // counter, so the number is attributed to THIS RenderList.
+  const int64_t frameNumber = this->frame;
+  if(state.caps.timestamps)
+  {
+    const double last_ms = commands.lastCompletedGpuTime();
+    if(last_ms > 0.0)
+      m_gpuTimings.record("frame", last_ms);
+  }
+  // PSO stall telemetry: sample totalPipelineCreationTime, compute the
+  // delta since last frame. A spike > 10 ms means a new PSO compiled
+  // on the hot path — usually a cold cache or new preset variant.
+  if(state.rhi)
+  {
+    // NOTE: totalPipelineCreationTime is rhi-wide and these two throttle
+    // counters SHOULD be per-RenderList members so that multiple RenderLists
+    // sharing a render thread don't (a) consume each other's PSO-time delta
+    // or (b) race a shared thread_local cooldown. Converting them to members
+    // requires adding fields to RenderList.hpp, which is outside this change's
+    // editable scope — see report. The two genuine bugs that ARE fixable here
+    // (the frame-number misattribution and the cooldown decrement being gated
+    // on the stall branch) are fixed: frameNumber comes from this->frame, and
+    // the decrement now ticks every frame below.
+    static thread_local qint64 s_lastPsoCreationNs = 0;
+    static thread_local int s_flushCoolDown = 0;
+    const auto stats = state.rhi->statistics();
+    const qint64 delta_ns = stats.totalPipelineCreationTime - s_lastPsoCreationNs;
+    s_lastPsoCreationNs = stats.totalPipelineCreationTime;
+    const double delta_ms = double(delta_ns) / 1'000'000.0;
+
+    // Tick the cooldown EVERY frame (was previously decremented only inside
+    // the stall branch, so it counted stalls rather than frames and the
+    // ~5s throttle never actually elapsed in wall time).
+    if(s_flushCoolDown > 0)
+      --s_flushCoolDown;
+
+    if(delta_ms > 10.0)
+    {
+      qWarning().noquote().nospace()
+          << "[GPU] PSO compile stall on frame " << frameNumber
+          << ": " << delta_ms << " ms — consider prewarming preset pipelines.";
+
+      // Plan 09 S6: mid-session pipeline-cache flush. When a stall
+      // hits we've just compiled one or more fresh PSOs — good time
+      // to persist the cache so the same compilation doesn't have to
+      // happen again on next launch, even if score crashes. Throttled
+      // to at most once per ~5s (300 frames at 60 Hz) to avoid
+      // churning the cache file on prolonged compile-heavy scenes.
+      if(s_flushCoolDown <= 0 && state.savePipelineCache)
+      {
+        state.savePipelineCache();
+        s_flushCoolDown = 300;
+      }
+    }
+    // Also record into the timings panel so it shows up next to frame
+    // time. Zero deltas are filtered out by GpuTimings::record.
+    m_gpuTimings.record("pso_compile", delta_ms);
+  }
+#endif
+  m_gpuTimings.tickFrame();
+
   bool rt_changed = false;
   for(auto* renderer : renderers)
   {
@@ -531,23 +1034,137 @@ void RenderList::render(QRhiCommandBuffer& commands, bool force)
 
   if(rt_changed && !rebuilt)
   {
-    for(auto node : renderers)
+    // Surgical render target update: only recreate the specific RTs and
+    // passes that actually changed, rather than destroying everything.
+    //
+    // Process output node first (its RT size/format determines upstream defaults),
+    // then intermediate nodes.
+
+    // Pass 1: output node
+    if(auto out_it = output.renderedNodes.find(this);
+       out_it != output.renderedNodes.end())
     {
-      node->release(*this);
+      auto* outRenderer = out_it->second;
+      if(outRenderer->renderTargetSpecsChanged)
+      {
+        // Output renderer owns its RT — re-init it.
+        outRenderer->releaseState(*this);
+        outRenderer->initState(*this, *updateBatch);
+        outRenderer->checkForChanges();
+        outRenderer->materialChanged = true;
+        outRenderer->geometryChanged = true;
+        outRenderer->renderTargetSpecsChanged = false;
+
+        // Recreate upstream passes that target the output's input ports.
+        for(auto* in : output.input)
+        {
+          for(auto* edge : in->edges)
+          {
+            auto src_it = edge->source->node->renderedNodes.find(this);
+            if(src_it != edge->source->node->renderedNodes.end())
+            {
+              src_it->second->removeOutputPass(*this, *edge);
+              src_it->second->addOutputPass(*this, *edge, *updateBatch);
+            }
+          }
+        }
+      }
     }
 
-    // Recreate centralized input render targets
-    for(auto& [port, rt] : m_inputRenderTargets)
-      rt.release();
-    m_inputRenderTargets.clear();
-    createAllInputRenderTargets();
-
-    for(auto node : renderers)
+    // Pass 2: intermediate nodes with changed RT specs
+    for(auto* renderer : renderers)
     {
-      node->init(*this, *updateBatch);
-      node->materialChanged = true;
-      node->geometryChanged = true;
-      node->renderTargetSpecsChanged = true;
+      if(!renderer->renderTargetSpecsChanged)
+        continue;
+      // Skip output node (handled above)
+      if(&renderer->node == &output)
+        continue;
+
+      // Phase A: scan ports, recreate input RTs whose specs changed,
+      // and collect the changed-port set so phase C only re-adds
+      // upstream passes for those.
+      QVarLengthArray<Port*, 4> changedPorts;
+      int cur_port = 0;
+      for(auto* in : renderer->node.input)
+      {
+        if(in->type == Types::Image
+           && (in->flags & Flag::GrabsFromSource) != Flag::GrabsFromSource)
+        {
+          auto newSpec = renderer->node.resolveRenderTargetSpecs(cur_port, *this);
+          auto oldIt = m_inputRenderTargets.find(in);
+
+          bool specChanged = false;
+          if(oldIt != m_inputRenderTargets.end())
+          {
+            auto* oldTex = oldIt->second.texture;
+            if(oldTex)
+              specChanged = (oldTex->format() != newSpec.format)
+                         || (oldTex->pixelSize() != newSpec.size);
+          }
+
+          // Always update sampler filter settings when specs changed
+          // (filter/address changes don't require RT recreation)
+          renderer->updateInputSamplerFilter(*in, newSpec);
+
+          if(specChanged)
+          {
+            changedPorts.append(in);
+
+            // Remove upstream passes that target this port
+            for(auto* edge : in->edges)
+            {
+              auto src_it = edge->source->node->renderedNodes.find(this);
+              if(src_it != edge->source->node->renderedNodes.end())
+                src_it->second->removeOutputPass(*this, *edge);
+            }
+
+            // Recreate the render target
+            oldIt->second.release();
+            bool wantsDepth = requiresDepth(*in);
+            bool wantsSamplableDepth
+                = (in->flags & Flag::SamplableDepth) == Flag::SamplableDepth;
+            oldIt->second = score::gfx::createRenderTarget(
+                state, newSpec.format, newSpec.size, samples(),
+                wantsDepth || wantsSamplableDepth, wantsSamplableDepth);
+          }
+        }
+        cur_port++;
+      }
+
+      // Phase B: if ANY input RT actually changed shape, the renderer's
+      // INTERNAL size-dependent state (intermediate RTs, MRT,
+      // persistent AUX, depth/MSAA attachments sized to output, etc.)
+      // is stale and needs re-init. Without this, the resize-only
+      // fast path produced "internal render resolution not updated" --
+      // input RT was recreated correctly but the renderer's own
+      // internal RTs stayed at the old size. initState wires up
+      // samplers against the current m_inputRenderTargets so we
+      // don't need a separate updateInputTexture pass.
+      //
+      // Phase C: re-add upstream passes ONLY for the ports whose RT
+      // was recreated (others kept their existing passes intact in
+      // phase A). Done after Phase B so the upstream's addOutputPass
+      // sees this renderer's freshly-built per-pass state.
+      if(!changedPorts.empty())
+      {
+        renderer->releaseState(*this);
+        renderer->initState(*this, *updateBatch);
+        renderer->checkForChanges();
+        renderer->materialChanged = true;
+        renderer->geometryChanged = true;
+
+        for(auto* in : changedPorts)
+        {
+          for(auto* edge : in->edges)
+          {
+            auto src_it = edge->source->node->renderedNodes.find(this);
+            if(src_it != edge->source->node->renderedNodes.end())
+              src_it->second->addOutputPass(*this, *edge, *updateBatch);
+          }
+        }
+      }
+
+      renderer->renderTargetSpecsChanged = false;
     }
   }
   // Check if the viewport has changed
@@ -586,11 +1203,14 @@ void RenderList::render(QRhiCommandBuffer& commands, bool force)
     for(auto edge : input->edges)
     {
       auto src = edge->source;
-      SCORE_ASSERT(src);
+      if(!src)
+        continue;
+
+      auto rn_it = src->node->renderedNodes.find(this);
+      if(rn_it == src->node->renderedNodes.end())
+        continue; // Source node has no renderer in this RL (transient during incremental update)
 
-      SCORE_ASSERT(
-          src->node->renderedNodes.find(this) != src->node->renderedNodes.end());
-      NodeRenderer* prev_renderer = src->node->renderedNodes.find(this)->second;
+      NodeRenderer* prev_renderer = rn_it->second;
 
       prevRenderers.push_back({edge, prev_renderer});
 
@@ -650,14 +1270,16 @@ void RenderList::render(QRhiCommandBuffer& commands, bool force)
           // Update the downstream node's sampler to point to the
           // upstream's current texture (it may have changed since init).
           auto rendered = node->renderedNodes.find(this);
-          SCORE_ASSERT(rendered != node->renderedNodes.end());
+          if(rendered == node->renderedNodes.end())
+            continue;
           NodeRenderer* sink_renderer = rendered->second;
 
           for(auto [edge, prev_renderer] : prevRenderers)
           {
             if(auto* srcTex = prev_renderer->textureForOutput(*edge->source))
             {
-              sink_renderer->updateInputTexture(*input, srcTex);
+              auto rt = renderTargetForInputPort(*input);
+              sink_renderer->updateInputTexture(*input, srcTex, rt.depthTexture);
             }
           }
 
@@ -674,7 +1296,16 @@ void RenderList::render(QRhiCommandBuffer& commands, bool force)
           // issues a clearBuffers command.
           {
             auto rendered = node->renderedNodes.find(this);
-            SCORE_ASSERT(rendered != node->renderedNodes.end());
+            if(rendered == node->renderedNodes.end())
+            {
+              if(updateBatch)
+              {
+                commands.resourceUpdate(updateBatch);
+                updateBatch = nullptr;
+              }
+              updateBatch = state.rhi->nextResourceUpdateBatch();
+              continue;
+            }
             NodeRenderer* renderer = rendered->second;
 
             auto rt = renderer->renderTargetForInput(*input);
@@ -683,8 +1314,7 @@ void RenderList::render(QRhiCommandBuffer& commands, bool force)
             if(rt)
             {
               QColor bg = (it + 1 == this->nodes.rend() ? Qt::black : Qt::transparent);
-              // Normal drawing node
-              commands.beginPass(rt.renderTarget, bg, {1.0f, 0}, updateBatch);
+              commands.beginPass(rt.renderTarget, bg, {0.0f, 0}, updateBatch);
               updateBatch = nullptr;
 
               // FIXME z-sort
@@ -716,13 +1346,14 @@ void RenderList::render(QRhiCommandBuffer& commands, bool force)
           SCORE_ASSERT(updateBatch);
         }
       }
-      else if(input->type == Types::Buffer || input->type == Types::Geometry)
+      else if(input->type == Types::Buffer || input->type == Types::Geometry || input->type == Types::Scene)
       {
         prepare_render(input);
 
         {
           auto rendered = node->renderedNodes.find(this);
-          SCORE_ASSERT(rendered != node->renderedNodes.end());
+          if(rendered == node->renderedNodes.end())
+            continue;
           NodeRenderer* renderer = rendered->second;
 
           if(updateBatch)
@@ -752,11 +1383,23 @@ void RenderList::render(QRhiCommandBuffer& commands, bool force)
 
   // Finally the output node may have some rendering to do too
   {
-    SCORE_ASSERT(!this->output.renderedNodes.empty());
-    SCORE_ASSERT(
-        dynamic_cast<OutputNodeRenderer*>(this->output.renderedNodes.begin()->second));
+    if(this->output.renderedNodes.empty())
+    {
+      // Pool-leak fix: updateBatch was allocated earlier in the render
+      // loop (line 769 or via the per-edge prepare_render path) and
+      // must be returned before bailing out — otherwise the pool slot
+      // stays pinned until the QRhi is destroyed, and during rapid
+      // resize this condition can fire many times in succession.
+      if(updateBatch) { updateBatch->release(); updateBatch = nullptr; }
+      return;
+    }
     auto output_renderer
-        = static_cast<OutputNodeRenderer*>(this->output.renderedNodes.begin()->second);
+        = dynamic_cast<OutputNodeRenderer*>(this->output.renderedNodes.begin()->second);
+    if(!output_renderer)
+    {
+      if(updateBatch) { updateBatch->release(); updateBatch = nullptr; }
+      return;
+    }
 
     if(this->output.configuration().outputNeedsRenderPass)
     {
@@ -800,9 +1443,40 @@ void RenderList::update(QRhiResourceUpdateBatch& res)
 
     m_outputUBOData.renderSize[0] = this->m_lastSize.width();
     m_outputUBOData.renderSize[1] = this->m_lastSize.height();
+    m_outputUBOData.sampleCount = m_samples;
 
     res.updateDynamicBuffer(m_outputUBO, 0, sizeof(OutputUBO), &m_outputUBOData);
   }
 }
 
+void RenderState::Caps::populate(QRhi& rhi)
+{
+#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0)
+  drawIndirect = rhi.isFeatureSupported(QRhi::DrawIndirect);
+  drawIndirectMulti = rhi.isFeatureSupported(QRhi::DrawIndirectMulti);
+#endif
+#if QT_VERSION >= QT_VERSION_CHECK(6, 11, 0)
+  instanceIndexIncludesBaseInstance
+      = rhi.isFeatureSupported(QRhi::InstanceIndexIncludesBaseInstance);
+  depthClamp = rhi.isFeatureSupported(QRhi::DepthClamp);
+#endif
+#if QT_VERSION >= QT_VERSION_CHECK(6, 10, 0)
+#endif
+#if QT_VERSION >= QT_VERSION_CHECK(6, 9, 0)
+  variableRateShading = rhi.isFeatureSupported(QRhi::VariableRateShading);
+#endif
+#if QT_VERSION >= QT_VERSION_CHECK(6, 8, 0)
+  textureViewFormat = rhi.isFeatureSupported(QRhi::TextureViewFormat);
+  resolveDepthStencil = rhi.isFeatureSupported(QRhi::ResolveDepthStencil);
+#endif
+#if QT_VERSION >= QT_VERSION_CHECK(6, 7, 0)
+  multiview = rhi.isFeatureSupported(QRhi::MultiView);
+#endif
+
+  timestamps = rhi.isFeatureSupported(QRhi::Timestamps);
+  tessellation = rhi.isFeatureSupported(QRhi::Tessellation);
+  geometryShader = rhi.isFeatureSupported(QRhi::GeometryShader);
+  baseInstance = rhi.isFeatureSupported(QRhi::BaseInstance);
+  pipelineCacheDataLoadSave = rhi.isFeatureSupported(QRhi::PipelineCacheDataLoadSave);
+}
 }
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderList.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderList.hpp
index 94a0aa6ae3..5e2b407a12 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderList.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderList.hpp
@@ -1,11 +1,22 @@
 #pragma once
 #include <Gfx/Graph/CommonUBOs.hpp>
+#include <Gfx/Graph/GpuTiming.hpp>
 #include <Gfx/Graph/Node.hpp>
 
+#include <ossia/detail/hash_map.hpp>
+
+#include <memory>
+
+namespace Gfx
+{
+class AssetTable;
+}
 namespace score::gfx
 {
 
+class GpuResourceRegistry;
 class OutputNode;
+class VertexFallbackPool;
 /**
  * @brief List of nodes to be rendered to an output.
  *
@@ -17,6 +28,7 @@ class OutputNode;
  */
 class SCORE_PLUGIN_GFX_EXPORT RenderList
 {
+  friend struct Graph;
 private:
   std::shared_ptr<RenderState> m_state;
 
@@ -36,6 +48,14 @@ class SCORE_PLUGIN_GFX_EXPORT RenderList
    */
   [[nodiscard]] QRhiResourceUpdateBatch* initialBatch() const noexcept;
 
+  /**
+   * @brief Store a resource update batch to be submitted on the next render frame.
+   *
+   * Used by incremental edge additions that happen after the first render frame
+   * (when the original m_initialBatch has already been consumed).
+   */
+  void setInitialBatch(QRhiResourceUpdateBatch* batch) noexcept { m_initialBatch = batch; }
+
   /**
    * @brief Create buffers for a mesh and mark them for upload.
    *
@@ -66,6 +86,32 @@ class SCORE_PLUGIN_GFX_EXPORT RenderList
    */
   bool maybeRebuild(bool force = false);
 
+  /**
+   * @brief Fast-path for pure viewport resize.
+   *
+   * Update state.renderSize / state.outputSize / m_lastSize to @p newSize
+   * and mark every renderer's renderTargetSpecsChanged so the existing
+   * `rt_changed` surgical block in renderInternal handles the actual
+   * RT recreation + sampler rebinding on the next render frame.
+   *
+   * Skips the full `recreateOutputRenderList` teardown + rebuild
+   * (release+createRenderList) — saves the bulk of resize cost
+   * (pipeline compiles, ScenePreprocessor REBUILD, mesh slab uploads,
+   * texture array reallocation, etc.). Persistent registry +
+   * persistent ScenePreprocessor caches mean none of that work is
+   * actually needed for a pure size change.
+   *
+   * Returns true on success. Returns false (caller should fall back
+   * to recreateOutputRenderList) when:
+   *   - newSize is invalid
+   *   - renderers vector is empty (RL not yet initialised)
+   * The caller (Graph::onResize) handles the fallback path.
+   *
+   * Cost: O(N renderers), no GPU drain, no allocations until the
+   * next render frame's rt_changed block recreates the RTs.
+   */
+  bool resizeSwapchainSizedTargets(QSize newSize);
+
   /**
    * @brief Obtain the texture corresponding to an output port.
    *
@@ -120,10 +166,25 @@ class SCORE_PLUGIN_GFX_EXPORT RenderList
   void clearRenderers();
 
   /**
-   * @brief Texture to use when a texture is missing
+   * @brief Texture to use when a texture is missing (2D)
    */
   QRhiTexture& emptyTexture() const noexcept { return *m_emptyTexture; }
 
+  /**
+   * @brief Texture to use when a 3D (sampler3D) texture is missing
+   */
+  QRhiTexture& emptyTexture3D() const noexcept { return *m_emptyTexture3D; }
+
+  /**
+   * @brief Texture to use when a cubemap (samplerCube) is missing
+   */
+  QRhiTexture& emptyTextureCube() const noexcept { return *m_emptyTextureCube; }
+
+  /**
+   * @brief Texture to use when a 2D array (sampler2DArray) is missing
+   */
+  QRhiTexture& emptyTextureArray() const noexcept { return *m_emptyTextureArray; }
+
   /**
    * @brief UBO corresponding to the output parameters:
    *
@@ -132,6 +193,63 @@ class SCORE_PLUGIN_GFX_EXPORT RenderList
    */
   QRhiBuffer& outputUBO() const noexcept { return *m_outputUBO; }
 
+  /**
+   * @brief Per-output GPU arena store for scene-graph source nodes.
+   *
+   * Returns a reference to the registry that owns the Camera / Light /
+   * Material / PerDraw arena buffers. Source nodes (Camera, Light,
+   * PBRMesh, …) allocate a slot from this registry at construction and
+   * write their packed bytes into it at their own update().
+   *
+   * Persist-across-rebuild contract: the registry is owned by the
+   * OutputNode (OutputNode::m_registry) and survives RenderList
+   * rebuilds — the same registry pointer is observed by both the
+   * pre- and post-rebuild RenderList for a given OutputNode. Consumers
+   * that cache the registry pointer (e.g. ScenePreprocessor's
+   * m_registry) can compare against the new RL's registry on init(),
+   * skip cache wipes when unchanged.
+   *
+   * Valid between init() and release().
+   */
+  GpuResourceRegistry& registry() noexcept { return *m_registry; }
+  const GpuResourceRegistry& registry() const noexcept { return *m_registry; }
+
+  /**
+   * @brief Per-RenderList pool of neutral fallback vertex buffers for
+   *        "REQUIRED: false" VERTEX_INPUTS whose upstream geometry does
+   *        not provide a matching attribute.
+   *
+   * Valid between init() and release(). See VertexFallbackPool.hpp.
+   */
+  VertexFallbackPool& vertexFallbackPool() noexcept { return *m_vertexFallbackPool; }
+
+  /**
+   * @brief Per-RenderList GPU-timing collector.
+   *
+   * Renderers wrap their begin/endPass regions in `ScopedGpuTimer` to
+   * attribute the CB-wide lastCompletedGpuTime to the named pass. The
+   * result is one frame stale — see GpuTiming.hpp for details.
+   *
+   * The S6 observability panel reads `gpuTimings().snapshot()` on its
+   * UI tick and displays per-pass rolling means.
+   */
+  GpuTimings& gpuTimings() noexcept { return m_gpuTimings; }
+  const GpuTimings& gpuTimings() const noexcept { return m_gpuTimings; }
+
+  /**
+   * @brief Session-wide asset decode cache.
+   *
+   * Set by Graph::createRenderList from GfxContext's AssetTable.
+   * May be null on test RenderLists or after teardown. Consumers
+   * must guard.
+   *
+   * Plan 09 S1: one decode per asset per session; preprocessor's
+   * texture-decode path checks this first, falls back to decode +
+   * stage otherwise.
+   */
+  Gfx::AssetTable* assetTable() const noexcept { return m_assetTable; }
+  void setAssetTable(Gfx::AssetTable* t) noexcept { m_assetTable = t; }
+
   /**
    * @brief A quad mesh correct for this API
    */
@@ -147,7 +265,7 @@ class SCORE_PLUGIN_GFX_EXPORT RenderList
    * 
    * e.g. it's not needed if we're just doing some generative shaders.
    */
-  bool requiresDepth(score::gfx::Port& p) const noexcept;
+  bool requiresDepth(const score::gfx::Port& p) const noexcept;
   bool anyNodeRequiresDepth() const noexcept { return m_requiresDepth; }
 
   int samples() const noexcept { return m_samples; }
@@ -160,14 +278,82 @@ class SCORE_PLUGIN_GFX_EXPORT RenderList
 
   void createAllInputRenderTargets();
 
+  /**
+   * @brief Mark this render list as fully built.
+   *
+   * Prevents maybeRebuild() from unnecessarily tearing down and
+   * recreating all resources on the first render frame after
+   * createRenderList() has already fully initialized everything.
+   */
+  void markBuilt() noexcept { m_built = true; m_lastSize = state.renderSize; }
+
+  /// Set the "any node requires depth" flag computed from the node graph.
+  /// Mirrors what maybeRebuild() recomputes; called from
+  /// Graph::createRenderList so the freshly-built RL doesn't need a
+  /// first-frame maybeRebuild to populate it.
+  void markRequiresDepth(bool value) noexcept { m_requiresDepth = value; }
+
+  /// Notify that an edge was removed. Notifies renderers, releases RT if unused.
+  ///
+  /// @param preserveSinks Optional set of sink Ports that should keep their
+  ///   input render target even if this edge was their only feed. Used by
+  ///   batched edge updates (see GfxContext::incrementalEdgeUpdate) so that
+  ///   inserting a filter between two nodes doesn't destroy and immediately
+  ///   re-allocate the same RT when the old and new edges share a sink port.
+  void
+  onEdgeRemoved(Edge& edge, const ossia::hash_set<const Port*>* preserveSinks = nullptr);
+
+  /// Remove the render target for a specific input port.
+  void removeInputRenderTarget(const Port* port);
+
+  /**
+   * @brief Resolve the downstream render target size for a node.
+   *
+   * Returns the maximum size across all downstream render targets that
+   * this node renders to. Used as fallback when a node's input port
+   * has no explicit render target size.
+   */
+  QSize resolveDownstreamSize(
+      const Node* node,
+      const ossia::small_flat_map<const Port*, RenderTargetSpecs, 16>& resolvedSpecs)
+      const noexcept;
+
 private:
   OutputUBO m_outputUBOData;
 
   QRhiResourceUpdateBatch* m_initialBatch{};
 
+  // Scene-graph arena store (camera / light / material / per_draw buffers).
+  // Persist-across-rebuild contract: ownership is on the OutputNode
+  // (OutputNode::m_registry), so the registry — and all its arena
+  // buffers, mesh slabs, texture-array channels, ScenePreprocessor
+  // material/env slots — survives `Graph::recreateOutputRenderList`
+  // (viewport resize / fallback rebuild). RenderList::init() either
+  // calls GpuResourceRegistry::init() once (first RL on this output)
+  // or adopts the populated state as-is (every subsequent rebuild).
+  // RenderList::release() does NOT destroy it. OutputNode::releaseRegistry()
+  // tears it down via destroyOwned() when its QRhi goes away.
+  GpuResourceRegistry* m_registry{};
+
+  // Pool of tiny shared vertex buffers used to satisfy "REQUIRED: false"
+  // VERTEX_INPUTS whose upstream geometry is missing an attribute.
+  // Same lifetime as m_registry.
+  std::unique_ptr<VertexFallbackPool> m_vertexFallbackPool;
+
+  // GPU-timing collector. Lives as long as the RenderList — outlives
+  // individual renderers so per-pass measurements survive node churn.
+  GpuTimings m_gpuTimings;
+
+  // Session-wide asset decode cache. Non-owning; GfxContext is the
+  // owner. May be null.
+  Gfx::AssetTable* m_assetTable{};
+
   // Material
   QRhiBuffer* m_outputUBO{};
   QRhiTexture* m_emptyTexture{};
+  QRhiTexture* m_emptyTexture3D{};
+  QRhiTexture* m_emptyTextureCube{};
+  QRhiTexture* m_emptyTextureArray{};
 
   /**
    * @brief Cache of vertex buffers.
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderState.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderState.hpp
index 33299f5a50..2ab3fef624 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderState.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderState.hpp
@@ -55,21 +55,87 @@ struct RenderState
   GraphicsApi api{};
   QShaderVersion version{};
 
-#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0)
-  struct
+  struct Caps
   {
+    // Indirect draw — Qt 6.12+; populated only on compatible builds.
     bool drawIndirect{false};
     bool drawIndirectMulti{false};
+
+    // Always queryable.
+    bool multiview{false};
+    bool resolveDepthStencil{false};
+    bool tessellation{false};
+    bool geometryShader{false};
+
+    // Extended set (Plan 09 S0). Drives shader feature gating +
+    // observability.
+    //
+    // baseInstance:
+    //   Lets indirect draws use `firstInstance` as the draw ID via
+    //   `gl_BaseInstance` (ARB_shader_draw_parameters). MDI's per-draw
+    //   lookup table reads this way.
+    //
+    // instanceIndexIncludesBaseInstance:
+    //   Disambiguates whether `gl_InstanceIndex` already contains the
+    //   `firstInstance` offset (Vulkan-like) or not. Shader prepass
+    //   injects a `#define SCORE_INSTANCE_INDEX_INCLUDES_BASE_INSTANCE`
+    //   based on this flag so presets work on both paths.
+    //
+    // variableRateShading:
+    //   Per-tile shading-rate maps (VK_EXT_fragment_shading_rate,
+    //   D3D12 VRS). Feeds the VRS-opt-in path on fullscreen presets.
+    //
+    // timestamps:
+    //   Whether `QRhiCommandBuffer::lastCompletedGpuTime()` returns
+    //   meaningful values. Prereq for the per-pass timing panel.
+    //
+    // pipelineCacheDataLoadSave:
+    //   Backend supports pipeline binary cache round-trip. Used by
+    //   tryLoadPipelineCache / tryStorePipelineCache; surfaced so
+    //   upper layers can skip PSO prewarm when unsupported.
+    //
+    // textureViewFormat:
+    //   R32UI ↔ R32F aliasing. Needed by the visibility buffer preset
+    //   and surfaced early so consumers can feature-detect uniformly.
+    //
+    // depthClamp:
+    //   For reverse-Z shadow passes to avoid near-plane clipping;
+    //   shadow_cascades / point_shadow presets opt in when available.
+    bool baseInstance{false};
+    bool instanceIndexIncludesBaseInstance{false};
+    bool variableRateShading{false};
+    bool timestamps{false};
+    bool pipelineCacheDataLoadSave{false};
+    bool textureViewFormat{false};
+    bool depthClamp{false};
+
+    void populate(QRhi& rhi);
   } caps;
-#endif
 
   // Called after QRhi is destroyed to clean up an imported VkDevice
   std::function<void()> customDeviceCleanup;
 
+  // Called right before the QRhi is destroyed, while its pipeline cache is
+  // still accessible. Used to persist QRhi::pipelineCacheData() to disk.
+  std::function<void()> preRhiDestroy;
+
+  // Mid-session pipeline-cache flush (Plan 09 S6). Same storage path
+  // as preRhiDestroy but callable during normal operation — invoked
+  // from RenderList::render after a PSO-compile burst so the cache
+  // survives crashes / force-quits without a clean shutdown. Null
+  // when the backend doesn't support PipelineCacheDataLoadSave.
+  std::function<void()> savePipelineCache;
+
   void destroy()
   {
     window.reset();
 
+    if(preRhiDestroy)
+    {
+      preRhiDestroy();
+      preRhiDestroy = nullptr;
+    }
+
     delete rhi;
     rhi = nullptr;
 
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedCSFNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedCSFNode.cpp
index 947a9d5e31..ad9691e6d9 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedCSFNode.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedCSFNode.cpp
@@ -2,6 +2,8 @@
 
 #include <Gfx/Graph/CommonUBOs.hpp>
 #include <Gfx/Graph/ISFNode.hpp>
+#include <Gfx/Graph/ISFVisitors.hpp>
+#include <Gfx/Graph/IsfBindingsBuilder.hpp>
 #include <Gfx/Graph/RenderedCSFNode.hpp>
 #include <Gfx/Graph/RenderedISFSamplerUtils.hpp>
 #include <Gfx/Graph/RhiComputeBarrier.hpp>
@@ -12,6 +14,8 @@
 
 #include <ossia/dataflow/geometry_port.hpp>
 #include <ossia/detail/algorithms.hpp>
+#include <ossia/detail/hash.hpp>
+#include <ossia/detail/hash_map.hpp>
 #include <ossia/math/math_expression.hpp>
 
 #include <boost/algorithm/string/replace.hpp>
@@ -24,36 +28,63 @@ namespace score::gfx
 static QRhiTexture::Format
 getTextureFormat(const QString& format)  noexcept
 {
-  // Map CSF format strings to Qt RHI texture formats
-  if(format == "RGBA8")
-    return QRhiTexture::RGBA8;
-  else if(format == "BGRA8")
-    return QRhiTexture::BGRA8;
-  else if(format == "R8")
-    return QRhiTexture::R8;
-
+  // Map CSF format strings to Qt RHI texture formats.
+  //
+  // Case-insensitive comparison: libisf emits the FORMAT layout qualifier
+  // lowercased into the GLSL (`layout(r32ui) uniform uimage3D ...`), but
+  // the CSF JSON parser stores `image->format` verbatim — so an author
+  // writing `"FORMAT": "r32ui"` (the lowercase form that matches the
+  // generated GLSL one-to-one) used to silently fall through to the
+  // RGBA8 default at texture creation, while the shader compiled with
+  // r32ui — producing a Vulkan validation error
+  // VUID-vkCmdDispatch-format-07753 ("UINT component type required, bound
+  // descriptor format is VK_FORMAT_R8G8B8A8_UNORM") and undefined values
+  // on every imageLoad / imageStore. Normalise to upper-case once and
+  // dispatch.
+  const QString f = format.toUpper();
+
+  if(f == "RGBA8")    return QRhiTexture::RGBA8;
+  if(f == "BGRA8")    return QRhiTexture::BGRA8;
+  if(f == "R8")       return QRhiTexture::R8;
 #if QT_VERSION >= QT_VERSION_CHECK(6, 4, 0)
-  else if(format == "RG8")
-    return QRhiTexture::RG8;
+  if(f == "RG8")      return QRhiTexture::RG8;
 #endif
-  else if(format == "R16")
-    return QRhiTexture::R16;
-
+  if(f == "R16")      return QRhiTexture::R16;
 #if QT_VERSION >= QT_VERSION_CHECK(6, 4, 0)
-  else if(format == "RG16")
-    return QRhiTexture::RG16;
+  if(f == "RG16")     return QRhiTexture::RG16;
 #endif
-  else if(format == "RGBA16F") return QRhiTexture::RGBA16F;
-  else if(format == "RGBA32F") return QRhiTexture::RGBA32F;
-  else if(format == "R16F")
-    return QRhiTexture::R16F;
-  else if(format == "R32F")
-    return QRhiTexture::R32F;
-
+  if(f == "RGBA16F")  return QRhiTexture::RGBA16F;
+  if(f == "RGBA32F")  return QRhiTexture::RGBA32F;
+  if(f == "R16F")     return QRhiTexture::R16F;
+  if(f == "R32F")     return QRhiTexture::R32F;
 #if QT_VERSION >= QT_VERSION_CHECK(6, 4, 0)
-  else if(format == "RGB10A2")
-    return QRhiTexture::RGB10A2;
+  if(f == "RGB10A2")  return QRhiTexture::RGB10A2;
+#endif
+
+  // Integer formats — required for atomic image ops (imageAtomicOr / Add /
+  // Min / Max / Exchange / CompareExchange in GLSL). Atomics in Vulkan,
+  // D3D12 and Metal 3.1+ work on the R{8,32}{UI,SI} family; the wider
+  // {RG,RGBA}{32}{UI,SI} variants are sample-only on most desktop GPUs but
+  // still legal as storage images. Keep symmetry with QRhiTexture::Format
+  // — RG32UI / RGBA32UI are exposed so users who want to pack two/four
+  // counters per voxel into one atomic-OR target can opt in.
+  //
+  // Added to QRhiTexture::Format in Qt 6.10 — guard so older Qt builds
+  // (6.2 / 6.4 / 6.6 / 6.8) compile. On older Qt, the request silently
+  // falls through to RGBA8 (and a Vulkan validation error if the shader
+  // declared an integer layout qualifier on its image), but the builds
+  // don't break.
+#if QT_VERSION >= QT_VERSION_CHECK(6, 10, 0)
+  if(f == "R8UI")     return QRhiTexture::R8UI;
+  if(f == "R32UI")    return QRhiTexture::R32UI;
+  if(f == "RG32UI")   return QRhiTexture::RG32UI;
+  if(f == "RGBA32UI") return QRhiTexture::RGBA32UI;
+  if(f == "R8SI")     return QRhiTexture::R8SI;
+  if(f == "R32SI")    return QRhiTexture::R32SI;
+  if(f == "RG32SI")   return QRhiTexture::RG32SI;
+  if(f == "RGBA32SI") return QRhiTexture::RGBA32SI;
 #endif
+
   // Default to RGBA8 if format not recognized
   return QRhiTexture::RGBA8;
 }
@@ -140,7 +171,7 @@ RenderedCSFNode::RenderedCSFNode(const ISFNode& node) noexcept
 
 RenderedCSFNode::~RenderedCSFNode() { }
 
-void RenderedCSFNode::updateInputTexture(const Port& input, QRhiTexture* tex)
+void RenderedCSFNode::updateInputTexture(const Port& input, QRhiTexture* tex, QRhiTexture* depthTex)
 {
   int sampler_idx = 0;
   for(auto* p : node.input)
@@ -148,21 +179,36 @@ void RenderedCSFNode::updateInputTexture(const Port& input, QRhiTexture* tex)
     if(p == &input)
       break;
     if(p->type == Types::Image)
+    {
       sampler_idx++;
+      if((p->flags & Flag::SamplableDepth) == Flag::SamplableDepth)
+        sampler_idx++;
+    }
   }
 
-  if(sampler_idx < (int)m_inputSamplers.size())
+  auto replaceSampler = [&](Sampler& sampl, QRhiTexture* t)
   {
-    auto& sampl = m_inputSamplers[sampler_idx];
-    if(sampl.texture != tex)
+    if(sampl.texture != t)
     {
-      sampl.texture = tex;
+      sampl.texture = t;
       for(auto& [e, cp] : m_computePasses)
         if(cp.srb)
-          score::gfx::replaceTexture(*cp.srb, sampl.sampler, tex);
+          score::gfx::replaceTexture(*cp.srb, sampl.sampler, t);
       for(auto& [e, gp] : m_graphicsPasses)
         if(gp.pipeline.srb)
-          score::gfx::replaceTexture(*gp.pipeline.srb, sampl.sampler, tex);
+          score::gfx::replaceTexture(*gp.pipeline.srb, sampl.sampler, t);
+    }
+  };
+
+  if(sampler_idx < (int)m_inputSamplers.size())
+  {
+    replaceSampler(m_inputSamplers[sampler_idx], tex);
+
+    if(depthTex
+       && (input.flags & Flag::SamplableDepth) == Flag::SamplableDepth
+       && sampler_idx + 1 < (int)m_inputSamplers.size())
+    {
+      replaceSampler(m_inputSamplers[sampler_idx + 1], depthTex);
     }
   }
 }
@@ -204,57 +250,21 @@ struct is_output
   bool operator()(const auto& v) { return false; }
 };
 
+// Thin adapter over the canonical isf_input_port_count_vis (ISFVisitors.hpp) so
+// the existing call sites that do `ossia::visit(p, input.data)` keep working.
+// Use walk_descriptor_inputs() in new code; this shim preserves the
+// "inlet_i / outlet_i mid-loop" pattern these consumers rely on.
 struct port_indices
 {
   int inlet_i = 0;
   int outlet_i = 0;
-  void operator()(const isf::storage_input& v)
+  template <typename T>
+  void operator()(const T& v) noexcept
   {
-    if(v.access == "read_only")
-      inlet_i++;
-    else
-    {
-      inlet_i++;
-      outlet_i++;
-    }
-  }
-  void operator()(const isf::csf_image_input& v)
-  {
-    if(v.access == "read_only")
-      inlet_i++;
-    else
-      outlet_i++;
-  }
-  void operator()(const isf::geometry_input& v)
-  {
-    if(v.attributes.empty())
-    {
-      // Pure pass-through: one inlet + one outlet
-      inlet_i++;
-      outlet_i++;
-    }
-    else
-    {
-      // Inlet if any attribute needs upstream data (read_only or read_write)
-      for(const auto& attr : v.attributes)
-        if(attr.access == "read_only" || attr.access == "read_write") { inlet_i++; break; }
-      // Outlet if any attribute is writable (write_only or read_write)
-      for(const auto& attr : v.attributes)
-      {
-        if(attr.access == "write_only" || attr.access == "read_write")
-        {
-          outlet_i++; // one geometry output port if any attribute is writable
-          break;
-        }
-      }
-    }
-    // $USER ports for vertex_count, instance_count, aux.size
-    if(v.vertex_count.find("$USER") != std::string::npos) inlet_i++;
-    if(v.instance_count.find("$USER") != std::string::npos) inlet_i++;
-    for(const auto& aux : v.auxiliary)
-      if(aux.size.find("$USER") != std::string::npos) inlet_i++;
+    auto c = isf_input_port_count_vis{}(v);
+    inlet_i += c.inlets;
+    outlet_i += c.outlets;
   }
-  void operator()(const auto& v) { inlet_i++; }
 };
 QSize RenderedCSFNode::computeTextureSize(
     const isf::csf_image_input& pass) const noexcept
@@ -300,13 +310,32 @@ int RenderedCSFNode::resolveCountExpression(
   if(expr.empty())
     return 0;
 
-  // Try fixed integer first
-  try
-  {
-    return std::max(1, std::stoi(expr));
-  }
-  catch(...)
+  // Try fixed integer first — but only when the whole string is a pure
+  // integer literal. std::stoi greedily parses the leading digits and
+  // silently stops at the first non-digit, so "6 * $x * $x" would
+  // otherwise be accepted as the integer 6 and the expression evaluator
+  // never runs. Require every character after optional leading whitespace
+  // to be a digit before taking the fast path.
   {
+    std::size_t i = 0;
+    while(i < expr.size() && std::isspace((unsigned char)expr[i]))
+      ++i;
+    const std::size_t first_digit = i;
+    while(i < expr.size() && std::isdigit((unsigned char)expr[i]))
+      ++i;
+    std::size_t last_digit = i;
+    while(i < expr.size() && std::isspace((unsigned char)expr[i]))
+      ++i;
+    if(first_digit < last_digit && i == expr.size())
+    {
+      try
+      {
+        return std::max(1, std::stoi(expr));
+      }
+      catch(...)
+      {
+      }
+    }
   }
 
   // Build expression evaluator
@@ -387,38 +416,81 @@ void RenderedCSFNode::registerCommonExpressionVariables(
 {
   const auto& desc = n.descriptor();
 
-  // Register texture dimensions ($WIDTH_<name>, $HEIGHT_<name>)
+  // Register full geometry of each input image/texture:
+  //   $WIDTH_<name>, $HEIGHT_<name>, $DEPTH_<name>, $LAYERS_<name>
+  //
+  // DEPTH/LAYERS are sourced from the live QRhiTexture when available
+  // (tex->depth() for 3D, tex->arraySize() for arrays). Both fall back to 1
+  // for plain 2D textures so expressions like "$DEPTH_vol" remain defined
+  // regardless of whether the bound texture is actually volumetric — lets
+  // shaders write one size formula and have it parse cleanly in both cases.
+  //
+  // The first input image also exposes un-suffixed $WIDTH/$HEIGHT/$DEPTH/
+  // $LAYERS for the common "filter that inherits its input's size" case.
+  auto register_texture_size = [&](const std::string& name, QRhiTexture* tex,
+                                   bool& first) {
+    QSize px = tex ? tex->pixelSize() : QSize{1280, 720};
+    int depth = 1;
+    int layers = 1;
+    if(tex)
+    {
+      if((int)(tex->flags() & QRhiTexture::ThreeDimensional))
+        depth = std::max(1, tex->depth());
+      if((int)(tex->flags() & QRhiTexture::TextureArray))
+        layers = std::max(1, tex->arraySize());
+    }
+    if(px.width() <= 0)
+      px.setWidth(1280);
+    if(px.height() <= 0)
+      px.setHeight(720);
+
+    e.add_constant(fmt::format("var_WIDTH_{}", name), data.emplace_back(px.width()));
+    e.add_constant(fmt::format("var_HEIGHT_{}", name), data.emplace_back(px.height()));
+    e.add_constant(fmt::format("var_DEPTH_{}", name), data.emplace_back(depth));
+    e.add_constant(fmt::format("var_LAYERS_{}", name), data.emplace_back(layers));
+    if(first)
+    {
+      e.add_constant("var_WIDTH", data.emplace_back(px.width()));
+      e.add_constant("var_HEIGHT", data.emplace_back(px.height()));
+      e.add_constant("var_DEPTH", data.emplace_back(depth));
+      e.add_constant("var_LAYERS", data.emplace_back(layers));
+      first = false;
+    }
+  };
+
+  bool first_image = true;
   int input_image_index = 0;
   for(const auto& img : desc.inputs)
   {
     if(ossia::get_if<isf::texture_input>(&img.data))
     {
+      QRhiTexture* t = nullptr;
       if(input_image_index < (int)m_inputSamplers.size())
-      {
-        auto [s, t] = this->m_inputSamplers[input_image_index];
-        QSize tex_sz = t ? t->pixelSize() : QSize{1280, 720};
-        e.add_constant(
-            fmt::format("var_WIDTH_{}", img.name), data.emplace_back(tex_sz.width()));
-        e.add_constant(
-            fmt::format("var_HEIGHT_{}", img.name), data.emplace_back(tex_sz.height()));
-      }
+        t = this->m_inputSamplers[input_image_index].texture;
+      register_texture_size(img.name, t, first_image);
       input_image_index++;
     }
     else if(auto* img_input = ossia::get_if<isf::csf_image_input>(&img.data))
     {
+      // Resolve dimensions for ALL csf_image_input access modes:
+      //   - read_only: bound as sampled texture in m_inputSamplers
+      //   - write_only / read_write: bound as storage image in m_storageImages
+      QRhiTexture* t = nullptr;
       if(img_input->access == "read_only")
       {
         if(input_image_index < (int)m_inputSamplers.size())
-        {
-          auto [s, t] = this->m_inputSamplers[input_image_index];
-          QSize tex_sz = t ? t->pixelSize() : QSize{1280, 720};
-          e.add_constant(
-              fmt::format("var_WIDTH_{}", img.name), data.emplace_back(tex_sz.width()));
-          e.add_constant(
-              fmt::format("var_HEIGHT_{}", img.name), data.emplace_back(tex_sz.height()));
-        }
+          t = this->m_inputSamplers[input_image_index].texture;
         input_image_index++;
       }
+      else
+      {
+        auto it = std::find_if(
+            m_storageImages.begin(), m_storageImages.end(),
+            [&](const StorageImage& si) { return si.name.toStdString() == img.name; });
+        if(it != m_storageImages.end())
+          t = it->texture;
+      }
+      register_texture_size(img.name, t, first_image);
     }
   }
 
@@ -444,36 +516,151 @@ void RenderedCSFNode::registerCommonExpressionVariables(
 
   // Register named geometry vertex/instance counts
   // ($VERTEX_COUNT_<name>, $INSTANCE_COUNT_<name>, and first one as $VERTEX_COUNT, $INSTANCE_COUNT)
+  //
+  // Always register the symbol so the expression parses, even on the very
+  // first frame when no upstream geometry has flowed yet — fall back to the
+  // descriptor's static vertex_count/instance_count strings (parsed as int)
+  // and ultimately to 1. Without this fallback, $VERTEX_COUNT_<name> raises
+  // ERR232 - Undefined symbol on every dispatch evaluation that runs before
+  // updateGeometryBindings has populated geo_bind, breaking csf-copy-from /
+  // csf-geo-read-write and any CSF whose dispatch refers to a not-yet-bound
+  // geometry input.
+  auto parse_static_count = [](const std::string& s, int fallback) -> int {
+    if(s.empty()) return fallback;
+    try
+    {
+      int v = std::stoi(s);
+      return v > 0 ? v : fallback;
+    }
+    catch(...)
+    {
+      return fallback;
+    }
+  };
+
   int geo_idx = 0;
   bool first_geo = true;
   for(const auto& input : desc.inputs)
   {
-    if(ossia::get_if<isf::geometry_input>(&input.data))
+    if(auto* geo = ossia::get_if<isf::geometry_input>(&input.data))
     {
+      int vertex_count = 0;
+      int instance_count = 0;
       if(geo_idx < (int)m_geometryBindings.size())
       {
         const auto& geo_bind = m_geometryBindings[geo_idx];
-        if(geo_bind.vertex_count > 0)
-        {
-          e.add_constant(
-              fmt::format("var_VERTEX_COUNT_{}", input.name),
-              data.emplace_back(geo_bind.vertex_count));
-          if(first_geo)
-            e.add_constant("var_VERTEX_COUNT", data.emplace_back(geo_bind.vertex_count));
-        }
-        if(geo_bind.instance_count > 0)
-        {
-          e.add_constant(
-              fmt::format("var_INSTANCE_COUNT_{}", input.name),
-              data.emplace_back(geo_bind.instance_count));
-          if(first_geo)
-            e.add_constant("var_INSTANCE_COUNT", data.emplace_back(geo_bind.instance_count));
-        }
+        vertex_count = geo_bind.vertex_count;
+        instance_count = geo_bind.instance_count;
+      }
+      if(vertex_count <= 0)
+        vertex_count = parse_static_count(geo->vertex_count, 1);
+      if(instance_count <= 0)
+        instance_count = parse_static_count(geo->instance_count, 1);
+
+      e.add_constant(
+          fmt::format("var_VERTEX_COUNT_{}", input.name),
+          data.emplace_back(vertex_count));
+      e.add_constant(
+          fmt::format("var_INSTANCE_COUNT_{}", input.name),
+          data.emplace_back(instance_count));
+      if(first_geo)
+      {
+        e.add_constant("var_VERTEX_COUNT", data.emplace_back(vertex_count));
+        e.add_constant("var_INSTANCE_COUNT", data.emplace_back(instance_count));
         first_geo = false;
       }
       geo_idx++;
     }
   }
+
+  // Register $COUNT_<name> and $BYTESIZE_<name> for every addressable SSBO /
+  // UBO the node binds, input or output. Lets SIZE / TARGET / WIDTH / HEIGHT
+  // expressions size themselves to upstream buffer extents by name —
+  // removes the need for user-visible "max N" scalar inputs on filters
+  // whose output should always mirror their input size.
+  //
+  // Registration order matters when names collide (e.g. an upstream-
+  // provided nested aux shadowed by a top-level AUXILIARY of the same
+  // name in a replace-style shader): the nested (input-side) binding
+  // is registered first so the top-level (output-side) redundant
+  // re-registration is suppressed — semantically, when a user writes
+  // `$COUNT_scene_lights` they mean the upstream count, not the size
+  // of the output buffer they're about to overwrite.
+  //
+  // For UBOs, COUNT always resolves to 1 (a UBO is one struct instance);
+  // BYTESIZE resolves to the struct byte size. For SSBOs with a flexible
+  // array, stride is inferred from `calculateStorageBufferSize(layout, 1)
+  // - calculateStorageBufferSize(layout, 0)` and COUNT is the allocation's
+  // element count. For SSBOs without a flexible array, COUNT resolves to 1.
+  {
+    ossia::hash_set<std::string> registered;
+    const auto& eff_desc = n.descriptor();
+
+    auto register_buffer
+        = [&](const std::string& name, int64_t byte_size, bool is_uniform,
+              std::span<const isf::storage_input::layout_field> layout) {
+      if(name.empty() || registered.contains(name))
+        return;
+      int64_t element_count = 1;
+      if(is_uniform)
+      {
+        // UBO: single struct. $COUNT = 1, $BYTESIZE = struct size.
+        element_count = 1;
+      }
+      else
+      {
+        const int64_t fixed_part
+            = score::gfx::calculateStorageBufferSize(layout, 0, eff_desc);
+        const int64_t with_one
+            = score::gfx::calculateStorageBufferSize(layout, 1, eff_desc);
+        const int64_t stride = with_one - fixed_part;
+        if(stride > 0 && byte_size > fixed_part)
+          element_count = (byte_size - fixed_part) / stride;
+        else
+          element_count = 1;
+        if(element_count < 1)
+          element_count = 1;
+      }
+      e.add_constant(
+          fmt::format("var_COUNT_{}", name),
+          data.emplace_back((double)element_count));
+      e.add_constant(
+          fmt::format("var_BYTESIZE_{}", name),
+          data.emplace_back((double)byte_size));
+      registered.insert(name);
+    };
+
+    // Pass 1 — nested auxiliaries on every geometry input (the "upstream
+    // side" of filters; these are the buffers whose counts the user most
+    // often wants to size against). Registered first so collisions with
+    // top-level same-name overrides in Pass 2 fall through.
+    for(const auto& binding : m_geometryBindings)
+    {
+      for(const auto& aux : binding.auxiliary_ssbos)
+      {
+        register_buffer(aux.name, aux.size, aux.is_uniform, aux.layout);
+      }
+    }
+
+    // Pass 2 — top-level storage buffers (INPUTS storage_input +
+    // top-level AUXILIARY writes).
+    for(const auto& sb : m_storageBuffers)
+    {
+      // Whether this top-level buffer is a UBO or SSBO depends on the
+      // descriptor input it came from; look up by name.
+      bool is_uniform = false;
+      for(const auto& inp : eff_desc.inputs)
+      {
+        if(inp.name == sb.name.toStdString())
+        {
+          if(ossia::get_if<isf::uniform_input>(&inp.data))
+            is_uniform = true;
+          break;
+        }
+      }
+      register_buffer(sb.name.toStdString(), sb.size, is_uniform, sb.layout);
+    }
+  }
 }
 
 int RenderedCSFNode::resolveDispatchExpression(const std::string& expr) const
@@ -481,13 +668,28 @@ int RenderedCSFNode::resolveDispatchExpression(const std::string& expr) const
   if(expr.empty())
     return 1;
 
-  // Try fixed integer first
-  try
-  {
-    return std::max(1, std::stoi(expr));
-  }
-  catch(...)
+  // Pure integer literal fast-path. Same guard as resolveCountExpression:
+  // std::stoi would otherwise silently accept "6 * $x" as 6.
   {
+    std::size_t i = 0;
+    while(i < expr.size() && std::isspace((unsigned char)expr[i]))
+      ++i;
+    const std::size_t first_digit = i;
+    while(i < expr.size() && std::isdigit((unsigned char)expr[i]))
+      ++i;
+    std::size_t last_digit = i;
+    while(i < expr.size() && std::isspace((unsigned char)expr[i]))
+      ++i;
+    if(first_digit < last_digit && i == expr.size())
+    {
+      try
+      {
+        return std::max(1, std::stoi(expr));
+      }
+      catch(...)
+      {
+      }
+    }
   }
 
   // Build expression evaluator
@@ -537,8 +739,6 @@ BufferView RenderedCSFNode::createStorageBuffer(
   QRhi& rhi = *renderer.state.rhi;
   QRhiBuffer* buffer = rhi.newBuffer(
       QRhiBuffer::Static, QRhiBuffer::VertexBuffer | QRhiBuffer::StorageBuffer, size);
-  qWarning() << "CSF ALLOC [createStorageBuffer]" << name << "size=" << size;
-
   if(buffer)
   {
     buffer->setName(QStringLiteral("CSF_StorageBuffer_%1").arg(name).toLocal8Bit());
@@ -597,6 +797,8 @@ int RenderedCSFNode::getArraySizeFromUI(const QString& bufferName) const
   }
   
   // Default array size if not found
+  qWarning() << "RenderedCSFNode: storage size port not resolved (storageSizeInputIndex="
+             << storageSizeInputIndex << "); falling back to 1024.";
   return 1024;
 }
 
@@ -630,7 +832,7 @@ void RenderedCSFNode::updateStorageBuffers(RenderList& renderer, QRhiResourceUpd
     // Search all port geometries since storage buffers aren't tied to a specific port.
     const auto stdName = storageBuffer.name.toStdString();
     bool found_aux = false;
-    for(const auto& [port_idx, geo_spec] : m_portGeometries)
+    for(const auto& [port_key, geo_spec] : m_portGeometries)
     {
       if(!geo_spec.meshes || geo_spec.meshes->meshes.empty())
         continue;
@@ -711,7 +913,6 @@ void RenderedCSFNode::updateStorageBuffers(RenderList& renderer, QRhiResourceUpd
               QRhiBuffer::VertexBuffer | QRhiBuffer::StorageBuffer
                   | QRhiBuffer::IndirectBuffer,
               requiredSize);
-          qWarning() << "CSF ALLOC [updateStorage/indirect]" << storageBuffer.name << "size=" << requiredSize;
           if(storageBuffer.buffer)
           {
             storageBuffer.buffer->setName(
@@ -753,27 +954,21 @@ void RenderedCSFNode::updateStorageBuffers(RenderList& renderer, QRhiResourceUpd
   // intermediate SRBs that reference stale/dangling buffer pointers.
 }
 
-// Returns the byte size of a GLSL type for SoA SSBO element stride
-static int glslTypeSizeBytes(const std::string& type) noexcept
-{
-  if(type == "float" || type == "int" || type == "uint")
-    return 4;
-  if(type == "vec2" || type == "ivec2" || type == "uvec2")
-    return 8;
-  if(type == "vec3" || type == "ivec3" || type == "uvec3")
-    return 12;
-  if(type == "vec4" || type == "ivec4" || type == "uvec4")
-    return 16;
-  if(type == "mat4")
-    return 64;
-  return 16; // fallback
-}
-
-// Returns the byte size of an ossia::geometry attribute format
-static int geometryFormatSizeBytes(int format) noexcept
+// GLSL type → byte size lives in IsfBindingsBuilder.hpp
+// (score::gfx::glslTypeSizeBytes for the bare type, std430ArrayStride for
+// the per-element stride inside an std430 SSBO array — these differ for
+// vec3, see header doc for the rationale). All call sites below resolve
+// via ADL inside `namespace score::gfx`.
+
+// Returns the byte size of one upstream-side element of an
+// ossia::geometry attribute. For the user_struct format the producer
+// carries the size out-of-line on `element_byte_size` (sizeof of the
+// user-defined struct in std430); otherwise dispatches on the format
+// enum.
+static int geometryFormatSizeBytes(const ossia::geometry::attribute& a) noexcept
 {
   using F = ossia::geometry::attribute;
-  switch(format)
+  switch(a.format)
   {
     case F::float4: return 16;
     case F::float3: return 12;
@@ -794,6 +989,7 @@ static int geometryFormatSizeBytes(int format) noexcept
     case F::half3: return 6;
     case F::half2: return 4;
     case F::half1: return 2;
+    case F::user_struct: return (int)a.element_byte_size;
     default: return 4;
   }
 }
@@ -815,13 +1011,12 @@ void RenderedCSFNode::updateGeometryBindings(
       auto& binding = m_geometryBindings[pre_idx];
       if(binding.input_port_index >= 0 && !binding.has_vertex_count_spec)
       {
-        auto it = m_portGeometries.find(binding.input_port_index);
-        if(it != m_portGeometries.end()
-           && it->second.meshes && !it->second.meshes->meshes.empty())
+        if(auto* geo = findGeometryByPort(binding.input_port_index);
+           geo && geo->meshes && !geo->meshes->meshes.empty())
         {
-          binding.vertex_count = it->second.meshes->meshes[0].vertices;
-          if(it->second.meshes->meshes[0].instances > 0)
-            binding.instance_count = it->second.meshes->meshes[0].instances;
+          binding.vertex_count = geo->meshes->meshes[0].vertices;
+          if(geo->meshes->meshes[0].instances > 0)
+            binding.instance_count = geo->meshes->meshes[0].instances;
         }
       }
       pre_idx++;
@@ -846,12 +1041,11 @@ void RenderedCSFNode::updateGeometryBindings(
     const ossia::geometry* upstream_mesh = nullptr;
     if(binding.input_port_index >= 0)
     {
-      auto it = m_portGeometries.find(binding.input_port_index);
-      if(it != m_portGeometries.end()
-         && it->second.meshes && !it->second.meshes->meshes.empty())
+      if(auto* geo = findGeometryByPort(binding.input_port_index);
+         geo && geo->meshes && !geo->meshes->meshes.empty())
       {
         binding_has_upstream = true;
-        upstream_mesh = &it->second.meshes->meshes[0];
+        upstream_mesh = &geo->meshes->meshes[0];
       }
     }
 
@@ -898,7 +1092,6 @@ void RenderedCSFNode::updateGeometryBindings(
           auto* buf = renderer.state.rhi->newBuffer(
               QRhiBuffer::Static,
               QRhiBuffer::StorageBuffer, requiredSize);
-          qWarning() << "CSF ALLOC [auxResize]" << aux.name.c_str() << "size=" << requiredSize;
           buf->setName(QByteArray("CSF_GeoAux_") + aux.name.c_str());
           buf->create();
           aux.buffer = buf;
@@ -907,6 +1100,15 @@ void RenderedCSFNode::updateGeometryBindings(
         QByteArray zero(requiredSize, 0);
         res.uploadStaticBuffer(aux.buffer, 0, requiredSize, zero.constData());
         aux.size = requiredSize;
+
+        // Keep read_buffer in sync for feedback receivers
+        if(aux.read_buffer)
+        {
+          aux.read_buffer->destroy();
+          aux.read_buffer->setSize(requiredSize);
+          aux.read_buffer->create();
+          res.uploadStaticBuffer(aux.read_buffer, 0, requiredSize, zero.constData());
+        }
       }
     }
 
@@ -962,15 +1164,14 @@ void RenderedCSFNode::updateGeometryBindings(
           auto& ssbo = binding.attribute_ssbos[attr_idx];
           if(req.access == "read_write" && !ssbo.read_buffer)
           {
-            const int elem_size = glslTypeSizeBytes(req.type);
+            const int64_t elem_stride = std430ArrayStride(req.type, n.m_descriptor);
             const int count = ssbo.per_instance ? binding.instance_count : binding.vertex_count;
-            const int64_t buf_size = (int64_t)elem_size * count;
+            const int64_t buf_size = elem_stride * count;
             if(buf_size > 0)
             {
               auto* buf = renderer.state.rhi->newBuffer(
                   QRhiBuffer::Static,
                   QRhiBuffer::StorageBuffer | QRhiBuffer::VertexBuffer, buf_size);
-              qWarning() << "CSF ALLOC [feedbackPingPong]" << req.name.c_str() << "size=" << buf_size;
               buf->setName(QByteArray("CSF_GeomPP_") + req.name.c_str());
               buf->create();
               QByteArray zero(buf_size, 0);
@@ -990,7 +1191,6 @@ void RenderedCSFNode::updateGeometryBindings(
             auto* buf = renderer.state.rhi->newBuffer(
                 QRhiBuffer::Static,
                 QRhiBuffer::StorageBuffer, aux.size);
-            qWarning() << "CSF ALLOC [feedbackPingPongAux]" << aux.name.c_str() << "size=" << aux.size;
             buf->setName(QByteArray("CSF_GeomPPAux_") + aux.name.c_str());
             buf->create();
             QByteArray zero(aux.size, 0);
@@ -1040,25 +1240,28 @@ void RenderedCSFNode::updateGeometryBindings(
         const auto& req = geo_input->attributes[attr_idx];
         auto& ssbo = binding.attribute_ssbos[attr_idx];
 
-        // Match by semantic
-        const ossia::attribute_semantic sem = ossia::name_to_semantic(req.semantic);
-        const ossia::geometry::attribute* geo_attr = nullptr;
-        if(sem != ossia::attribute_semantic::custom)
-          geo_attr = mesh.find(sem);
-        else
-          geo_attr = mesh.find(req.name);
+        // Match against upstream geometry — same 3-stage cascade as raw
+        // raster (findGeometryAttribute in Utils.cpp). The display_name
+        // stage handles `{ NAME: "position", SEMANTIC: "custom" }` falling
+        // back to the real position attribute when no shadowing custom one
+        // exists.
+        const ossia::geometry::attribute* geo_attr
+            = score::gfx::findGeometryAttribute(mesh, req.name, req.semantic);
 
         if(!geo_attr)
         {
-          // Create or keep a zero-filled fallback buffer
-          const int elem_size = glslTypeSizeBytes(req.type);
+          // Create or keep a zero-filled fallback buffer. std430ArrayStride
+          // ensures vec3 attributes get 16-byte stride to match what the
+          // shader's `T array[]` SSBO actually reads in std430.
+          const int64_t elem_stride = std430ArrayStride(req.type, n.m_descriptor);
           const int fallback_count = ssbo.per_instance ? std::max(1, mesh.instances) : std::max(1, mesh.vertices);
-          const int64_t needed = (int64_t)elem_size * fallback_count;
+          const int64_t needed = elem_stride * fallback_count;
           if(!ssbo.buffer || ssbo.size < needed)
           {
             if(req.required && req.access == "read_only")
-              qWarning() << "CSF geometry: required read_only attribute" << req.name.c_str() << "not found"
-                         << "(semantic=" << (int)sem << ")";
+              qWarning() << "CSF geometry: required read_only attribute"
+                         << req.name.c_str() << "not found"
+                         << "(semantic=" << req.semantic.c_str() << ")";
             else
               qDebug() << "  attr" << req.name.c_str() << "not in upstream — creating fallback buffer";
 
@@ -1069,7 +1272,6 @@ void RenderedCSFNode::updateGeometryBindings(
             auto* buf = renderer.state.rhi->newBuffer(
                 QRhiBuffer::Static,
                 QRhiBuffer::StorageBuffer | QRhiBuffer::VertexBuffer, needed);
-            qWarning() << "CSF ALLOC [geomFallback]" << req.name.c_str() << "size=" << needed;
             buf->setName(QByteArray("CSF_GeomFallback_") + req.name.c_str());
             buf->create();
             QByteArray zero(needed, 0);
@@ -1077,6 +1279,15 @@ void RenderedCSFNode::updateGeometryBindings(
             ssbo.buffer = buf;
             ssbo.size = needed;
             ssbo.owned = true;
+
+            // Keep read_buffer in sync for feedback receivers
+            if(ssbo.read_buffer)
+            {
+              ssbo.read_buffer->destroy();
+              ssbo.read_buffer->setSize(needed);
+              ssbo.read_buffer->create();
+              res.uploadStaticBuffer(ssbo.read_buffer, 0, needed, zero.constData());
+            }
           }
           continue;
         }
@@ -1099,14 +1310,21 @@ void RenderedCSFNode::updateGeometryBindings(
                                    ? mesh.bindings[binding_idx]
                                    : mesh.bindings[0];
 
-        const int attr_size = geometryFormatSizeBytes(geo_attr->format);
+        const int attr_size = geometryFormatSizeBytes(*geo_attr);
+        const int64_t csf_elem_stride = std430ArrayStride(req.type, n.m_descriptor);
         const int stride = geo_bind.byte_stride;
-        const bool is_soa = (stride == 0 || stride == attr_size);
+        // SoA upstream is bindable directly when the binding stride matches
+        // either the bare attribute size (tightly-packed mesh vertex buffer)
+        // or the std430 element stride (CSF SSBO output, vec3-padded). Both
+        // shapes are valid sources for an std430 SSBO consumer.
+        const bool is_soa = (stride == 0 || stride == attr_size
+                             || stride == (int)csf_elem_stride);
 
         if(auto* gpu = ossia::get_if<ossia::geometry::gpu_buffer>(&geo_buf.data))
         {
-          const int elem_size = glslTypeSizeBytes(req.type);
-          if(is_soa && gpu->handle && attr_size == elem_size)
+          const int elem_size = glslTypeSizeBytes(req.type, n.m_descriptor);
+          if(is_soa && gpu->handle
+             && (attr_size == elem_size || stride == (int)csf_elem_stride))
           {
             // SoA GPU buffer with matching element size: bind directly (zero-copy)
             auto* rhi_buf = static_cast<QRhiBuffer*>(gpu->handle);
@@ -1117,9 +1335,8 @@ void RenderedCSFNode::updateGeometryBindings(
             // feedback loop when the downstream node hasn't produced data yet).
             if(binding.has_vertex_count_spec && ssbo.owned && ssbo.buffer)
             {
-              const int elem_size = glslTypeSizeBytes(req.type);
               const int attr_count = ssbo.per_instance ? binding.instance_count : binding.vertex_count;
-              const int64_t needed = (int64_t)elem_size * attr_count;
+              const int64_t needed = csf_elem_stride * attr_count;
               if(needed > 0 && gpu->byte_size < needed)
               {
                 continue;
@@ -1163,9 +1380,10 @@ void RenderedCSFNode::updateGeometryBindings(
             continue;
 
           const auto* src = static_cast<const char*>(cpu->raw_data.get());
-          const int64_t elem_size = glslTypeSizeBytes(req.type);
+          const int64_t elem_size = glslTypeSizeBytes(req.type, n.m_descriptor);
+          const int64_t elem_stride = std430ArrayStride(req.type, n.m_descriptor);
           const int data_count = ssbo.per_instance ? mesh.instances : mesh.vertices;
-          const int64_t needed = elem_size * data_count;
+          const int64_t needed = elem_stride * data_count;
 
           // Skip re-upload if we already own a correctly-sized buffer
           // and the upstream data hasn't changed (same CPU pointer as last upload).
@@ -1183,20 +1401,35 @@ void RenderedCSFNode::updateGeometryBindings(
             auto* buf = renderer.state.rhi->newBuffer(
                 QRhiBuffer::Static,
                 QRhiBuffer::StorageBuffer | QRhiBuffer::VertexBuffer, needed);
-            qWarning() << "CSF ALLOC [geomUpload]" << req.name.c_str() << "size=" << needed;
             buf->setName(QByteArray("CSF_Geom_") + req.name.c_str());
             buf->create();
             ssbo.buffer = buf;
             ssbo.size = needed;
             ssbo.owned = true;
+
+            // For feedback receivers, also resize read_buffer to keep both
+            // ping-pong buffers the same size. Otherwise after the swap,
+            // ssbo.buffer would be the old undersized read_buffer while
+            // ssbo.size reflects the new size, causing buffer overruns.
+            if(ssbo.read_buffer)
+            {
+              ssbo.read_buffer->destroy();
+              ssbo.read_buffer->setSize(needed);
+              ssbo.read_buffer->create();
+            }
           }
 
           // Total byte offset into the buffer: input entry offset + attribute offset within stride
           const int64_t base_offset = input_byte_offset + geo_attr->byte_offset;
 
-          if(is_soa && attr_size == (int)elem_size)
+          // Direct upload only when source and destination strides match
+          // exactly. For vec3 attributes, that means upstream must already
+          // be std430-strided (16 bytes per element) — a tightly-packed
+          // upstream vec3 (stride 12) routes through scatter so the
+          // destination's 4-byte trailing padding stays zeroed.
+          if(is_soa && (int64_t)stride == elem_stride)
           {
-            // SoA CPU buffer with matching element size: upload directly
+            // SoA CPU buffer with matching stride: upload directly
             const int64_t upload_size = std::min(needed, cpu->byte_size - base_offset);
             if(upload_size > 0)
               res.uploadStaticBuffer(ssbo.buffer, 0, upload_size, src + base_offset);
@@ -1222,7 +1455,13 @@ void RenderedCSFNode::updateGeometryBindings(
             const int64_t upload_size = std::min(staging_needed, cpu->byte_size);
             res.uploadStaticBuffer(ssbo.scatterStaging, 0, upload_size, src);
 
-            // Prepare the scatter dispatch (will execute in runInitialPasses)
+            // The scatter compute lays out destination elements at
+            // dst_components * sizeof(float) per slot — for vec3 in std430
+            // that's 3 floats of data + 1 float of padding implicit in the
+            // 16-byte stride. dst_components is 3 for vec3, so the compute
+            // writes 12 bytes per element and the buffer's std430 padding
+            // bytes stay at their zero-initialised value. That matches
+            // what a well-behaved compute shader would produce.
             ssbo.scatterParams = GPUBufferScatter::Params{
                 .staging = ssbo.scatterStaging,
                 .output = ssbo.buffer,
@@ -1241,14 +1480,16 @@ void RenderedCSFNode::updateGeometryBindings(
           else
           {
             // CPU fallback: scatter per-element with format conversion
-            // (used when compute shaders are not available)
+            // (used when compute shaders are not available). Destination
+            // slots are elem_stride bytes apart; the first elem_size bytes
+            // hold the data, any trailing std430 padding stays zero.
             QByteArray scattered(needed, 0);
 
             if(elem_size > attr_size && elem_size >= (int)sizeof(float))
             {
               const float one = 1.0f;
               for(int i = 0; i < data_count; i++)
-                std::memcpy(scattered.data() + (int64_t)i * elem_size + elem_size - sizeof(float),
+                std::memcpy(scattered.data() + (int64_t)i * elem_stride + elem_size - sizeof(float),
                             &one, sizeof(float));
             }
 
@@ -1257,7 +1498,7 @@ void RenderedCSFNode::updateGeometryBindings(
             {
               const int64_t src_off = (int64_t)i * stride + base_offset;
               if(src_off + copy_size <= cpu->byte_size)
-                std::memcpy(scattered.data() + (int64_t)i * elem_size, src + src_off, copy_size);
+                std::memcpy(scattered.data() + (int64_t)i * elem_stride, src + src_off, copy_size);
             }
             res.uploadStaticBuffer(ssbo.buffer, 0, needed, scattered.constData());
           }
@@ -1319,10 +1560,12 @@ void RenderedCSFNode::updateGeometryBindings(
             {
               renderer.releaseBuffer(aux.buffer);
             }
+            // Usage flag matches the aux kind so the created buffer can
+            // be bound as the intended descriptor type.
+            const auto usage = aux.is_uniform ? QRhiBuffer::UniformBuffer
+                                              : QRhiBuffer::StorageBuffer;
             auto* buf = renderer.state.rhi->newBuffer(
-                QRhiBuffer::Static,
-                QRhiBuffer::StorageBuffer, requiredSize);
-            qWarning() << "CSF ALLOC [geoAuxNoMatch]" << aux.name.c_str() << "size=" << requiredSize;
+                QRhiBuffer::Static, usage, requiredSize);
             buf->setName(QByteArray("CSF_GeoAux_") + aux.name.c_str());
             buf->create();
             QByteArray zero(requiredSize, 0);
@@ -1334,6 +1577,29 @@ void RenderedCSFNode::updateGeometryBindings(
         }
       }
 
+      // Auxiliary textures: match by name against the mesh's
+      // auxiliary_textures list. Fall back to the shape-matched
+      // placeholder when no match — same safety model as the raster
+      // path (never leave a stale upstream handle that may have been
+      // freed). SRB rebuild on handle change is driven by the existing
+      // initComputeSRBAndPasses / recreateSRB cycle; we only update
+      // the cached texture pointer here.
+      for(auto& at : binding.auxiliary_textures)
+      {
+        // Owned textures (auto-allocated writable storage images) are
+        // never overwritten by upstream resolution — we own the data,
+        // there's no upstream contributor.
+        if(at.owned)
+          continue;
+        const auto* aux = mesh.find_auxiliary_texture(at.name);
+        auto* tex = aux
+            ? static_cast<QRhiTexture*>(aux->native_handle)
+            : nullptr;
+        if(!tex)
+          tex = at.placeholder;
+        at.texture = tex;
+      }
+
       // When has_vertex_count_spec AND the upstream is a feedback loop (our own
       // SSBOs came back as gpu handles, identity check kept them owned), we must
       // still resize if $USER changed. Without this, the SSBOs stay at whatever
@@ -1350,9 +1616,9 @@ void RenderedCSFNode::updateGeometryBindings(
           {
             continue;
           }
-          const int elem_size = glslTypeSizeBytes(geo_input->attributes[attr_idx].type);
+          const int64_t elem_stride = std430ArrayStride(geo_input->attributes[attr_idx].type, n.m_descriptor);
           const int attr_count = ssbo.per_instance ? binding.instance_count : binding.vertex_count;
-          const int64_t needed = (int64_t)elem_size * attr_count;
+          const int64_t needed = elem_stride * attr_count;
           if(needed > 0 && ssbo.size != needed)
           {
             ssbo.buffer->destroy();
@@ -1410,8 +1676,8 @@ void RenderedCSFNode::updateGeometryBindings(
           const int count = ssbo.per_instance ? binding.instance_count : binding.vertex_count;
           if(count <= 0)
             continue;
-          const int elem_size = glslTypeSizeBytes(req.type);
-          const int64_t needed = (int64_t)elem_size * count;
+          const int64_t elem_stride = std430ArrayStride(req.type, n.m_descriptor);
+          const int64_t needed = elem_stride * count;
 
           if(!ssbo.buffer || ssbo.size != needed)
           {
@@ -1426,7 +1692,6 @@ void RenderedCSFNode::updateGeometryBindings(
               auto* buf = renderer.state.rhi->newBuffer(
                   QRhiBuffer::Static,
                   QRhiBuffer::StorageBuffer | QRhiBuffer::VertexBuffer, needed);
-              qWarning() << "CSF ALLOC [geomSpecResize]" << req.name.c_str() << "size=" << needed;
               buf->setName(QByteArray("CSF_GeomSpec_") + req.name.c_str());
               buf->create();
               ssbo.buffer = buf;
@@ -1496,11 +1761,10 @@ void RenderedCSFNode::pushOutputGeometry(RenderList& renderer, QRhiResourceUpdat
     const ossia::geometry* binding_upstream = nullptr;
     if(binding.input_port_index >= 0)
     {
-      auto it = m_portGeometries.find(binding.input_port_index);
-      if(it != m_portGeometries.end()
-         && it->second.meshes && !it->second.meshes->meshes.empty())
+      if(auto* geo = findGeometryByPort(binding.input_port_index);
+         geo && geo->meshes && !geo->meshes->meshes.empty())
       {
-        binding_upstream = &it->second.meshes->meshes[0];
+        binding_upstream = &geo->meshes->meshes[0];
       }
     }
 
@@ -1556,10 +1820,18 @@ void RenderedCSFNode::pushOutputGeometry(RenderList& renderer, QRhiResourceUpdat
       if(binding_upstream)
       {
         out_geo.bounds = binding_upstream->bounds;
-        // Inherit topology from upstream for filter-type nodes
-        out_geo.topology = (decltype(out_geo.topology))binding_upstream->topology;
-        out_geo.cull_mode = (decltype(out_geo.cull_mode))binding_upstream->cull_mode;
+        // Inherit topology / cull / face / blend / depth-write / filter
+        // metadata from upstream for filter-type nodes. Anything the CSF
+        // doesn't explicitly produce on its own should pass through —
+        // otherwise inserting a CSF between ScenePreprocessor and a
+        // rasterizer silently drops state the rasterizer relies on.
+        out_geo.topology   = (decltype(out_geo.topology))binding_upstream->topology;
+        out_geo.cull_mode  = (decltype(out_geo.cull_mode))binding_upstream->cull_mode;
         out_geo.front_face = (decltype(out_geo.front_face))binding_upstream->front_face;
+        out_geo.blend       = binding_upstream->blend;
+        out_geo.depth_write = binding_upstream->depth_write;
+        out_geo.filter_tag            = binding_upstream->filter_tag;
+        out_geo.filter_material_index = binding_upstream->filter_material_index;
       }
 
       for(int attr_idx = 0; attr_idx < (int)geo_input->attributes.size(); attr_idx++)
@@ -1574,7 +1846,12 @@ void RenderedCSFNode::pushOutputGeometry(RenderList& renderer, QRhiResourceUpdat
           continue;
 
         const int buf_index = (int)out_geo.buffers.size();
-        const int elem_size = glslTypeSizeBytes(req.type);
+        // The buffer underneath is sized at std430 stride (16 bytes per
+        // vec3 element); declaring the binding stride to match is what
+        // lets a downstream raw-raster vertex shader read these
+        // attributes without the silent vec3-padding drift that left
+        // every fourth splat misaligned.
+        const int64_t elem_stride = std430ArrayStride(req.type, n.m_descriptor);
 
         ossia::geometry::buffer buf{
             .data = ossia::geometry::gpu_buffer{ssbo.buffer, ssbo.size},
@@ -1582,7 +1859,7 @@ void RenderedCSFNode::pushOutputGeometry(RenderList& renderer, QRhiResourceUpdat
         out_geo.buffers.push_back(std::move(buf));
 
         ossia::geometry::binding bind;
-        bind.byte_stride = elem_size;
+        bind.byte_stride = (uint32_t)elem_stride;
         bind.classification = ssbo.per_instance
             ? ossia::geometry::binding::per_instance
             : ossia::geometry::binding::per_vertex;
@@ -1720,6 +1997,62 @@ void RenderedCSFNode::pushOutputGeometry(RenderList& renderer, QRhiResourceUpdat
                 .byte_offset = in_aux.byte_offset, .byte_size = in_aux.byte_size});
           }
         }
+
+        // First: publish THIS CSF's own writable storage images so they
+        // ride the geometry cable downstream and ExtractTexture / flat
+        // AUXILIARY rasterizer reads can resolve them by name. Mirrors
+        // the m_storageBuffers → out_geo.buffers forward done above.
+        for(const auto& si : m_storageImages)
+        {
+          if(si.access == "read_only" || !si.texture)
+            continue;
+          out_geo.auxiliary_textures.push_back(
+              ossia::geometry::auxiliary_texture{
+                  .name = si.name.toStdString(),
+                  .native_handle = si.texture,
+                  .sampler_handle = nullptr});
+        }
+
+        // Same forward for nested-aux storage images this binding
+        // auto-allocated (at.owned == true). Lets a CSF declare its
+        // writable storage image under the geometry-input AUXILIARY
+        // block and have it published to downstream consumers
+        // identically to the top-level csf_image_input case.
+        for(const auto& at : binding.auxiliary_textures)
+        {
+          if(!at.owned || !at.texture)
+            continue;
+          bool already_present = false;
+          for(const auto& existing : out_geo.auxiliary_textures)
+            if(existing.name == at.name) { already_present = true; break; }
+          if(already_present)
+            continue;
+          out_geo.auxiliary_textures.push_back(
+              ossia::geometry::auxiliary_texture{
+                  .name = at.name,
+                  .native_handle = at.texture,
+                  .sampler_handle = nullptr});
+        }
+
+        // Forward upstream auxiliary TEXTURES (skybox, irradiance_map,
+        // baseColorArray*, normalArray*, shadow_map_array, …). Without
+        // this, classic_pbr_full / classic_pbr_openpbr / any rasterizer
+        // that samples material texture arrays via sample_slot_* finds
+        // the bindings empty (or fallback-placeholder), every textureRef
+        // resolves to placeholder-black, and every textured fragment
+        // renders fully black. Same name-collision skip rule as the
+        // buffer forward — if THIS CSF declared an aux texture of the
+        // same name (RESOURCES.auxiliary_textures or similar), keep its
+        // binding and skip the upstream re-add.
+        for(const auto& in_atx : binding_upstream->auxiliary_textures)
+        {
+          bool already_present = false;
+          for(const auto& existing : out_geo.auxiliary_textures)
+            if(existing.name == in_atx.name) { already_present = true; break; }
+          if(already_present)
+            continue;
+          out_geo.auxiliary_textures.push_back(in_atx);
+        }
       }
 
       // Explicit COPY_FROM: forward auxiliary buffers from other geometries
@@ -1741,11 +2074,13 @@ void RenderedCSFNode::pushOutputGeometry(RenderList& renderer, QRhiResourceUpdat
             = aux_req.forward->auxiliary.empty() ? aux_req.name : aux_req.forward->auxiliary;
 
         // Search all input port geometries for the source
-        for(const auto& [port_idx, geo_spec] : m_portGeometries)
+        for(const auto& [port_key, geo_spec] : m_portGeometries)
         {
           if(!geo_spec.meshes || geo_spec.meshes->meshes.empty())
             continue;
 
+          const int port_idx = port_key.first;
+
           // Match by geometry resource name → find the binding with that name
           int src_binding_idx = 0;
           bool found_geo = false;
@@ -1806,11 +2141,13 @@ void RenderedCSFNode::pushOutputGeometry(RenderList& renderer, QRhiResourceUpdat
         const std::string& src_geo_name = attr_req.forward->geometry;
         const std::string& src_attr_name = attr_req.forward->attribute;
 
-        for(const auto& [port_idx, geo_spec] : m_portGeometries)
+        for(const auto& [port_key, geo_spec] : m_portGeometries)
         {
           if(!geo_spec.meshes || geo_spec.meshes->meshes.empty())
             continue;
 
+          const int port_idx = port_key.first;
+
           // Find the matching source geometry binding
           int src_binding_idx = 0;
           bool found_geo = false;
@@ -1945,16 +2282,51 @@ void RenderedCSFNode::pushOutputGeometry(RenderList& renderer, QRhiResourceUpdat
         out_geo.indices = binding_upstream->indices;
       }
 
-#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0)
-      if(binding.uses_indirect_draw && binding.indirectDrawBuffer)
+      if(binding.uses_indirect_draw && binding.indirectBuffer)
       {
         out_geo.indirect_count = ossia::geometry::gpu_buffer{
-            binding.indirectDrawBuffer,
-            binding.indirect_draw_indexed
-                ? (int64_t)sizeof(QRhiIndexedIndirectDrawCommand)
-                : (int64_t)sizeof(QRhiIndirectDrawCommand)};
+            binding.indirectBuffer,
+            binding.indirectBufferSize};
+      }
+      else if(binding_upstream
+              && binding_upstream->indirect_count.handle)
+      {
+        // Forward upstream's indirect-draw buffer when this CSF doesn't
+        // produce its own. ScenePreprocessor sets indirect_count to the
+        // MDI indirect_draw_cmds buffer (ScenePreprocessorNode.cpp:2329);
+        // an MDI rasterizer downstream reads from out_geo.indirect_count
+        // for vkCmdDrawIndexedIndirect dispatch. Without this forward,
+        // every passthrough CSF inserted between Preprocessor and an MDI
+        // rasterizer hands the rasterizer a null indirect buffer →
+        // garbage indexCount / firstIndex / baseVertex → triangles
+        // render at wild positions / wrong index ranges.
+        out_geo.indirect_count = binding_upstream->indirect_count;
+      }
+
+      // Forward CPU-side draw commands too. ScenePreprocessor populates
+      // these (`cpu_draw_commands`, ScenePreprocessorNode.cpp:2334) for
+      // the Qt < 6.12 / non-GPU-indirect fallback path. Without this
+      // forward, CustomMesh::update sees an empty vector and skips the
+      // assign() at line 370 — leaving `output_meshbuf.cpuDrawCommands`
+      // with stale data from a previous frame OR uninitialised
+      // small-vector contents, which the CPU draw fallback then issues
+      // as drawIndexed(garbage, garbage, ...). Symptom: Vulkan
+      // VUID-vkCmdDrawIndexed-robustBufferAccess2-08798 with huge
+      // firstIndex/indexCount values that look like pointer low bits.
+      if(binding_upstream && !binding_upstream->cpu_draw_commands.empty())
+      {
+        out_geo.cpu_draw_commands.assign(
+            binding_upstream->cpu_draw_commands.begin(),
+            binding_upstream->cpu_draw_commands.end());
       }
-#endif
+
+      // Stamp format_id from the descriptor's RESOURCES[geoOut] so a
+      // CSF that produces a primitive-cloud-shaped output declares its
+      // format identity in JSON and downstream FlattenedSceneFilter
+      // mode-12 can route it. Same hash + truncation as the
+      // ScenePreprocessor splat-bucket stamp.
+      if(!geo_input->format_id.empty())
+        out_geo.filter_tag = (uint32_t)ossia::hash_string(geo_input->format_id);
 
       meshes->meshes.push_back(std::move(out_geo));
       meshes->dirty_index = 1; // Initial structural build
@@ -2125,16 +2497,117 @@ void RenderedCSFNode::pushOutputGeometry(RenderList& renderer, QRhiResourceUpdat
         }
       }
 
-#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0)
-      if(binding.uses_indirect_draw && binding.indirectDrawBuffer)
+      if(binding.uses_indirect_draw && binding.indirectBuffer)
       {
         out_geo.indirect_count = ossia::geometry::gpu_buffer{
-            binding.indirectDrawBuffer,
-            binding.indirect_draw_indexed
-                ? (int64_t)sizeof(QRhiIndexedIndirectDrawCommand)
-                : (int64_t)sizeof(QRhiIndirectDrawCommand)};
+            binding.indirectBuffer,
+            binding.indirectBufferSize};
+      }
+      else if(binding_upstream
+              && binding_upstream->indirect_count.handle)
+      {
+        // Mirror the full-rebuild path: forward upstream's indirect-
+        // draw buffer when this CSF doesn't produce its own. Required
+        // for any passthrough CSF inserted in front of an MDI
+        // rasterizer (ScenePreprocessor → CSF → classic_pbr_mdi /
+        // openpbr / debug_lights). Without this, the fast path keeps
+        // the previously-published indirect_count handle, which is
+        // empty for compute passes that never set it themselves.
+        if(out_geo.indirect_count.handle != binding_upstream->indirect_count.handle
+           || out_geo.indirect_count.byte_size != binding_upstream->indirect_count.byte_size)
+        {
+          out_geo.indirect_count = binding_upstream->indirect_count;
+          any_handle_changed = true;
+        }
+      }
+
+      // Re-forward upstream's CPU draw commands every frame. The vector
+      // contents are immutable in the typical scene flow but the
+      // binding's outputGeometry mesh holds a copy that can drift if
+      // upstream rebuilds (e.g. a scene reload). Cheap re-assign each
+      // frame; ScenePreprocessor's command list is at most ~1k entries.
+      if(binding_upstream && !binding_upstream->cpu_draw_commands.empty())
+      {
+        out_geo.cpu_draw_commands.assign(
+            binding_upstream->cpu_draw_commands.begin(),
+            binding_upstream->cpu_draw_commands.end());
+      }
+
+      // Re-forward upstream metadata that the rasterizer reads but the
+      // CSF doesn't override: pipeline-state hints (blend, depth_write)
+      // and filter metadata (filter_tag, filter_material_index).
+      // Identity assignments — the upstream values either stayed the
+      // same since the structural pass or shifted (scene reload), and
+      // we want the latter to propagate.
+      if(binding_upstream)
+      {
+        out_geo.blend                 = binding_upstream->blend;
+        out_geo.depth_write           = binding_upstream->depth_write;
+        out_geo.filter_tag            = binding_upstream->filter_tag;
+        out_geo.filter_material_index = binding_upstream->filter_material_index;
+
+        // Re-forward upstream auxiliary TEXTURES (skybox, baseColorArray,
+        // shadow_map_array, …). Same forward as the structural-rebuild
+        // path; needed every frame in case upstream rebakes (CubemapLoader
+        // refresh, IBL bake, etc.). Skip names already declared by this
+        // CSF or already pushed in this frame.
+        out_geo.auxiliary_textures.clear();
+
+        // Publish THIS CSF's own writable storage images (write_only and
+        // read_write csf_image_input declarations) into the geometry
+        // cable's auxiliary_textures so downstream consumers (ExtractTexture
+        // node, rasterizers reading them as flat AUXILIARY) can resolve
+        // them by name. Without this push, the texture exists in this
+        // CSF's m_storageImages but is invisible to the world — the
+        // mirror of how m_storageBuffers is forwarded into out_geo.buffers
+        // a few lines above.
+        for(const auto& si : m_storageImages)
+        {
+          if(si.access == "read_only" || !si.texture)
+            continue;
+          out_geo.auxiliary_textures.push_back(
+              ossia::geometry::auxiliary_texture{
+                  .name = si.name.toStdString(),
+                  .native_handle = si.texture,
+                  .sampler_handle = nullptr});
+        }
+
+        // Same forward for write_only / read_write storage images
+        // declared as nested aux on the geometry input (auto-allocated
+        // in the binding setup with at.owned = true). Required for
+        // voxelize_scene_aabb.csf's `voxel_grid` to ship downstream
+        // when declared as a nested aux on the scene geometry input
+        // rather than as a top-level csf_image_input.
+        for(const auto& at : binding.auxiliary_textures)
+        {
+          if(!at.owned || !at.texture)
+            continue;
+          bool already_present = false;
+          for(const auto& existing : out_geo.auxiliary_textures)
+            if(existing.name == at.name) { already_present = true; break; }
+          if(already_present)
+            continue;
+          out_geo.auxiliary_textures.push_back(
+              ossia::geometry::auxiliary_texture{
+                  .name = at.name,
+                  .native_handle = at.texture,
+                  .sampler_handle = nullptr});
+        }
+
+        // Then forward upstream auxiliary textures, skipping any name
+        // this CSF already published above so producer-side overrides
+        // win over upstream defaults (consistent with the buffer-forward
+        // shadowing rule).
+        for(const auto& in_atx : binding_upstream->auxiliary_textures)
+        {
+          bool already_present = false;
+          for(const auto& existing : out_geo.auxiliary_textures)
+            if(existing.name == in_atx.name) { already_present = true; break; }
+          if(already_present)
+            continue;
+          out_geo.auxiliary_textures.push_back(in_atx);
+        }
       }
-#endif
 
       // Only bump dirty_index if any handle actually changed,
       // so downstream acquireMesh picks up the new buffers.
@@ -2188,7 +2661,7 @@ void RenderedCSFNode::pushOutputGeometry(RenderList& renderer, QRhiResourceUpdat
           continue;
 
         int port_idx = it - sink->node->input.begin();
-        rendered->second->process(port_idx, binding.outputGeometry);
+        rendered->second->process(port_idx, binding.outputGeometry, out_edge->source);
       }
     }
 
@@ -2197,79 +2670,284 @@ void RenderedCSFNode::pushOutputGeometry(RenderList& renderer, QRhiResourceUpdat
   }
 }
 
-void RenderedCSFNode::initComputePass(
+
+void RenderedCSFNode::createGraphicsPass(
     const TextureRenderTarget& rt, RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res)
 {
-  QRhi& rhi = *renderer.state.rhi;
-  
-  if(!m_computePipeline)
-  {
-    createComputePipeline(renderer);
-  }
-  
-  if(!m_computePipeline)
-    return;
-    
-  // Ensure storage buffers are created before setting up bindings
-  updateStorageBuffers(renderer, res);
-
-  // Eagerly populate geometry bindings so we can detect buffer aliasing across
-  // attribute/auxiliary SSBOs (caused by feedback edges sharing the same
-  // physical buffer with conflicting access modes) BEFORE we emit any binding.
-  updateGeometryBindings(renderer, res);
+  // Create a graphics pass to render our compute output texture to the render target
+  static const constexpr auto vertex_shader = R"_(#version 450
+layout(location = 0) in vec2 position;
+layout(location = 1) in vec2 texcoord;
 
-  // Pre-pass: collect physical buffers used with conflicting access modes
-  // (read on one binding, write on another) so we can promote them to
-  // bufferLoadStore. The Qt RHI / Vulkan validation layer rejects bindings
-  // that reference the same buffer with different access flags within a pass.
-  std::unordered_set<QRhiBuffer*> aliased_buffers;
-  {
-    std::unordered_map<QRhiBuffer*, int> access_flags; // 1=read, 2=write, 3=both
-    int gb_idx = 0;
-    for(const auto& inp : n.m_descriptor.inputs)
-    {
-      auto* g = ossia::get_if<isf::geometry_input>(&inp.data);
-      if(!g)
-        continue;
-      if(gb_idx >= (int)m_geometryBindings.size())
-        break;
-      const auto& gb = m_geometryBindings[gb_idx++];
+layout(location = 0) out vec2 v_texcoord;
 
-      for(int ai = 0; ai < (int)g->attributes.size() && ai < (int)gb.attribute_ssbos.size(); ai++)
-      {
-        const auto& req = g->attributes[ai];
-        const auto& ssbo = gb.attribute_ssbos[ai];
-        if(req.access == "none" || !ssbo.buffer)
-          continue;
-        int f = (req.access == "read_only") ? 1 : (req.access == "write_only") ? 2 : 3;
-        access_flags[ssbo.buffer] |= f;
-        if(req.access == "read_write" && ssbo.read_buffer && ssbo.read_buffer != ssbo.buffer)
-          access_flags[ssbo.read_buffer] |= 1;
-      }
-      for(const auto& aux : gb.auxiliary_ssbos)
-      {
-        if(!aux.buffer)
-          continue;
-        int f = (aux.access == "read_only") ? 1 : (aux.access == "write_only") ? 2 : 3;
-        access_flags[aux.buffer] |= f;
-        if(aux.read_buffer && aux.read_buffer != aux.buffer)
-          access_flags[aux.read_buffer] |= 1;
-      }
-    }
-    for(const auto& [buf, flags] : access_flags)
-      if(flags == 3)
-        aliased_buffers.insert(buf);
-  }
+layout(std140, binding = 0) uniform renderer_t {
+  mat4 clipSpaceCorrMatrix;
+  vec2 renderSize;
+} renderer;
 
-  // Create shader resource bindings
-  QList<QRhiShaderResourceBinding> bindings;
+out gl_PerVertex { vec4 gl_Position; };
 
-  // Binding 0: Renderer UBO (part of ProcessUBO in defaultUniforms)
+void main()
+{
+  v_texcoord = texcoord;
+  gl_Position = renderer.clipSpaceCorrMatrix * vec4(position.xy, 0.0, 1.);
+#if defined(QSHADER_SPIRV) || defined(QSHADER_HLSL) || defined(QSHADER_MSL)
+  gl_Position.y = - gl_Position.y;
+#endif
+}
+)_";
+
+  static const constexpr auto fragment_shader_rgba = R"_(#version 450
+layout(std140, binding = 0) uniform renderer_t {
+  mat4 clipSpaceCorrMatrix;
+  vec2 renderSize;
+} renderer;
+
+layout(binding = 3) uniform sampler2D outputTexture;
+
+layout(location = 0) in vec2 v_texcoord;
+layout(location = 0) out vec4 fragColor;
+
+void main() { fragColor = texture(outputTexture, v_texcoord); }
+)_";
+  static const constexpr auto fragment_shader_r = R"_(#version 450
+layout(std140, binding = 0) uniform renderer_t {
+  mat4 clipSpaceCorrMatrix;
+  vec2 renderSize;
+} renderer;
+
+layout(binding = 3) uniform sampler2D outputTexture;
+
+layout(location = 0) in vec2 v_texcoord;
+layout(location = 0) out vec4 fragColor;
+
+void main() { fragColor = vec4(texture(outputTexture, v_texcoord).rrr, 1.0); }
+)_";
+
+  // Get the mesh for rendering a fullscreen quad
+  const auto& mesh = renderer.defaultTriangle();
+
+  // Find the texture for the specific output port this edge is connected to
+  QRhiTexture* textureToRender = textureForOutput(*edge.source);
+  // If we still don't have a texture, we can't create the graphics pass
+  if(!textureToRender)
+  {
+    qWarning() << "No output texture available for graphics pass";
+    return;
+  }
+
+  auto fmt = textureToRender->format();
+  const char* fragment_shader{};
+  switch(fmt)
+  {
+    case QRhiTexture::Format::R8:
+    case QRhiTexture::Format::RED_OR_ALPHA8:
+#if QT_VERSION >= QT_VERSION_CHECK(6, 9, 0)
+    case QRhiTexture::Format::R8UI:
+    case QRhiTexture::Format::R32UI:
+#endif
+    case QRhiTexture::Format::R16:
+    case QRhiTexture::Format::R16F:
+    case QRhiTexture::Format::R32F:
+    case QRhiTexture::Format::D16:
+#if QT_VERSION >= QT_VERSION_CHECK(6, 4, 0)
+    case QRhiTexture::Format::D24:
+    case QRhiTexture::Format::D24S8:
+#endif
+    case QRhiTexture::Format::D32F:
+#if QT_VERSION >= QT_VERSION_CHECK(6, 9, 0)
+    case QRhiTexture::Format::D32FS8:
+#endif
+      fragment_shader = fragment_shader_r;
+      break;
+    default:
+      fragment_shader = fragment_shader_rgba;
+      break;
+  }
+
+  // Compile shaders
+  auto [vertexS, fragmentS] = score::gfx::makeShaders(renderer.state, vertex_shader, fragment_shader);
+
+  // Create a sampler for our output texture
+  QRhiSampler* outputSampler = renderer.state.rhi->newSampler(
+    QRhiSampler::Linear, QRhiSampler::Linear, QRhiSampler::None,
+    QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge);
+  outputSampler->setName("RenderedCSFNode::OutputSampler");
+  outputSampler->create();
+    
+  // Initialize mesh buffers
+  MeshBuffers meshBuffers = renderer.initMeshBuffer(mesh, res);
+  
+  // Build the pipeline to render our compute result
+  auto pip = score::gfx::buildPipeline(
+      renderer, mesh, vertexS, fragmentS, rt, nullptr, nullptr, 
+      std::array<Sampler, 1>{Sampler{outputSampler, textureToRender}});
+      
+  if(pip.pipeline)
+  {
+    m_graphicsPasses.emplace_back(&edge, GraphicsPass{pip, outputSampler, meshBuffers});
+  }
+  else
+  {
+    delete outputSampler;
+  }
+}
+
+QString RenderedCSFNode::updateShaderWithImageFormats(QString current)
+{
+  int sampler_index = 0;
+  for(const auto& input : n.m_descriptor.inputs)
+  {
+    if(auto tex_input = ossia::get_if<isf::texture_input>(&input.data))
+    {
+      sampler_index++;
+    }
+    if(auto image = ossia::get_if<isf::csf_image_input>(&input.data))
+    {
+      if(image->access == "read_only")
+      {
+        SCORE_ASSERT(sampler_index < m_inputSamplers.size());
+        auto tex_n = m_inputSamplers[sampler_index].texture;
+        if(!tex_n)
+          return current;
+
+        const auto fmt = tex_n->format();
+        const auto layout_fmt = rhiTextureFormatToShaderLayoutFormatString(fmt);
+
+        const auto before = QStringLiteral(", rgba8) readonly uniform image2D %1;").arg(input.name.c_str());
+        const auto after = QStringLiteral(", %1) readonly uniform image2D %2;").arg(layout_fmt).arg(input.name.c_str());
+
+        current.replace(before, after);
+        sampler_index++;
+      }
+    }
+  }
+  return current;
+
+}
+
+void RenderedCSFNode::createComputePipeline(RenderList& renderer)
+{
+  QRhi& rhi = *renderer.state.rhi;
+  
+  if(!rhi.isFeatureSupported(QRhi::Compute))
+  {
+    qWarning() << "Compute shaders not supported on this backend";
+    return;
+  }
+  
+  try
+  {
+    // Prepare the shader template with image format substitution.
+    // LOCAL_SIZE placeholders will be substituted per-pass below.
+    m_computeShaderSource = updateShaderWithImageFormats(n.m_computeS);
+
+    // Compile one pipeline per unique LOCAL_SIZE, reuse when passes share the same size.
+    m_perPassPipelines.clear();
+    std::map<std::array<int,3>, QRhiComputePipeline*> pipelineCache;
+
+    for(std::size_t passIdx = 0; passIdx < n.m_descriptor.csf_passes.size(); passIdx++)
+    {
+      const auto& passDesc = n.m_descriptor.csf_passes[passIdx];
+      const auto key = passDesc.local_size;
+
+      auto it = pipelineCache.find(key);
+      if(it != pipelineCache.end())
+      {
+        // Reuse existing pipeline
+        m_perPassPipelines.push_back(it->second);
+      }
+      else
+      {
+        // Compile new pipeline for this local_size
+        QString src = m_computeShaderSource;
+        src.replace("ISF_LOCAL_SIZE_X", QString::number(key[0]));
+        src.replace("ISF_LOCAL_SIZE_Y", QString::number(key[1]));
+        src.replace("ISF_LOCAL_SIZE_Z", QString::number(key[2]));
+
+        QShader compiled = score::gfx::makeCompute(renderer.state, src);
+
+        auto* pipeline = rhi.newComputePipeline();
+        pipeline->setShaderStage(QRhiShaderStage(QRhiShaderStage::Compute, compiled));
+
+        pipelineCache[key] = pipeline;
+        m_perPassPipelines.push_back(pipeline);
+      }
+    }
+
+    // Store unique pipelines for cleanup
+    m_ownedPipelines.clear();
+    for(auto& [k, v] : pipelineCache)
+      m_ownedPipelines.push_back(v);
+
+    // For backward compat
+    m_computePipeline = m_perPassPipelines.empty() ? nullptr : m_perPassPipelines[0];
+    if(!m_perPassPipelines.empty())
+      m_computeShader = m_perPassPipelines[0]->shaderStage().shader();
+  }
+  catch(const std::exception& e)
+  {
+    qWarning() << "Failed to create compute shader:" << e.what();
+    m_computePipeline = nullptr;
+  }
+}
+
+void RenderedCSFNode::buildComputeSrbBindings(
+    RenderList& renderer, QRhiResourceUpdateBatch& res,
+    QList<QRhiShaderResourceBinding>& bindings)
+{
+  QRhi& rhi = *renderer.state.rhi;
+
+  // Pre-pass: collect physical buffers used with conflicting access modes
+  // (read on one binding, write on another) so we can promote them to
+  // bufferLoadStore. The Qt RHI / Vulkan validation layer rejects bindings
+  // that reference the same buffer with different access flags within a pass.
+  std::unordered_set<QRhiBuffer*> aliased_buffers;
+  {
+    std::unordered_map<QRhiBuffer*, int> access_flags; // 1=read, 2=write, 3=both
+    int gb_idx = 0;
+    for(const auto& inp : n.m_descriptor.inputs)
+    {
+      auto* g = ossia::get_if<isf::geometry_input>(&inp.data);
+      if(!g)
+        continue;
+      if(gb_idx >= (int)m_geometryBindings.size())
+        break;
+      const auto& gb = m_geometryBindings[gb_idx++];
+
+      for(int ai = 0; ai < (int)g->attributes.size() && ai < (int)gb.attribute_ssbos.size(); ai++)
+      {
+        const auto& req = g->attributes[ai];
+        const auto& ssbo = gb.attribute_ssbos[ai];
+        if(req.access == "none" || !ssbo.buffer)
+          continue;
+        int f = (req.access == "read_only") ? 1 : (req.access == "write_only") ? 2 : 3;
+        access_flags[ssbo.buffer] |= f;
+        if(req.access == "read_write" && ssbo.read_buffer && ssbo.read_buffer != ssbo.buffer)
+          access_flags[ssbo.read_buffer] |= 1;
+      }
+      for(const auto& aux : gb.auxiliary_ssbos)
+      {
+        if(!aux.buffer)
+          continue;
+        int f = (aux.access == "read_only") ? 1 : (aux.access == "write_only") ? 2 : 3;
+        access_flags[aux.buffer] |= f;
+        if(aux.read_buffer && aux.read_buffer != aux.buffer)
+          access_flags[aux.read_buffer] |= 1;
+      }
+    }
+    for(const auto& [buf, flags] : access_flags)
+      if(flags == 3)
+        aliased_buffers.insert(buf);
+  }
+
+  // Binding 0: Renderer UBO (part of ProcessUBO in defaultUniforms)
   bindings.append(QRhiShaderResourceBinding::uniformBuffer(
       0, QRhiShaderResourceBinding::ComputeStage, &renderer.outputUBO()));
 
   // Binding 1: Process UBO (time, passIndex, etc.)
-  // Per-pass: actual pointer will be set later
+  // Per-pass: actual pointer is patched by each caller after this returns.
   bindings.append(
       QRhiShaderResourceBinding::uniformBuffer(
           1, QRhiShaderResourceBinding::ComputeStage, nullptr));
@@ -2292,14 +2970,14 @@ void RenderedCSFNode::initComputePass(
   for(const auto& input : n.m_descriptor.inputs)
   {
     // Storage buffers
-    if(ossia::get_if<isf::storage_input>(&input.data))
+    if(auto* storage_in = ossia::get_if<isf::storage_input>(&input.data))
     {
       // Find the corresponding storage buffer
       auto it = std::find_if(m_storageBuffers.begin(), m_storageBuffers.end(),
-          [&input](const StorageBuffer& sb) { 
-            return sb.name == QString::fromStdString(input.name); 
+          [&input](const StorageBuffer& sb) {
+            return sb.name == QString::fromStdString(input.name);
           });
-      
+
       if(it != m_storageBuffers.end() && it->buffer)
       {
         if(it->access == "read_only")
@@ -2322,42 +3000,73 @@ void RenderedCSFNode::initComputePass(
         else if(it->access == "write_only")
         {
           bindings.append(QRhiShaderResourceBinding::bufferStore(
-              bindingIndex++, QRhiShaderResourceBinding::ComputeStage, 
+              bindingIndex++, QRhiShaderResourceBinding::ComputeStage,
               it->buffer));
           output_port_index++;
         }
         else // read_write
         {
           bindings.append(QRhiShaderResourceBinding::bufferLoadStore(
-              bindingIndex++, QRhiShaderResourceBinding::ComputeStage, 
+              bindingIndex++, QRhiShaderResourceBinding::ComputeStage,
               it->buffer));
           output_port_index++;
         }
       }
-      else if(it != m_storageBuffers.end())
+      else
       {
-        if(!it->buffer) {
-          qDebug() << "CSF: cannot bind null buffer";
-        }
+        // Missing storage buffer: warn (used to be silent on the recreate
+        // path / qDebug on the init path — unify to qWarning) and bump
+        // bindingIndex so the rest of the layout stays in sync with the
+        // shader's expected slots.
+        if(it == m_storageBuffers.end())
+          qWarning() << "CSF: storage buffer not found for input"
+                     << QString::fromStdString(input.name);
+        else
+          qWarning() << "CSF: cannot bind null buffer for input"
+                     << QString::fromStdString(input.name);
         bindingIndex++;
       }
-      else
+
+      // Write-access buffers whose layout ends in a flexible-array member get a
+      // synthesized "size" INPUT port on the model (setupCSF / isf_input_port_-
+      // vis). The read_only branch advanced input_port_index for its own inlet,
+      // but the write branches above only touched output_port_index — so this
+      // sizing inlet was never skipped and every later storage input resolved
+      // the wrong port (its upstream buffer silently never bound). The geometry
+      // branch already does the equivalent for its $USER ports. Advance here
+      // under the SAME flex-array condition used everywhere else.
+      if(storage_in->access.contains("write") && !storage_in->layout.empty()
+         && storage_in->layout.back().type.find("[]") != std::string::npos)
       {
-        qDebug() << "CSF: storage buffer not found";
-        bindingIndex++;
+        input_port_index++;
       }
     }
     // Regular textures (sampled)
     else if(ossia::get_if<isf::texture_input>(&input.data))
     {
       // Regular sampled textures from m_inputSamplers
-      SCORE_ASSERT(input_image_index < m_inputSamplers.size());
-      auto [sampler, tex] = m_inputSamplers[input_image_index];
-      SCORE_ASSERT(sampler);
-      SCORE_ASSERT(tex);
-      bindings.append(
-          QRhiShaderResourceBinding::sampledTexture(
-              bindingIndex++, QRhiShaderResourceBinding::ComputeStage, tex, sampler));
+      if(input_image_index < m_inputSamplers.size())
+      {
+        auto [sampler, tex, fb_] = m_inputSamplers[input_image_index];
+        if(sampler && tex)
+        {
+          bindings.append(
+              QRhiShaderResourceBinding::sampledTexture(
+                  bindingIndex, QRhiShaderResourceBinding::ComputeStage, tex, sampler));
+        }
+        else
+        {
+          qWarning() << "CSF: sampler/texture missing for texture_input"
+                     << QString::fromStdString(input.name);
+        }
+      }
+      else
+      {
+        qWarning() << "CSF: input_samplers under-allocated for texture_input"
+                   << QString::fromStdString(input.name);
+      }
+      // Always bump bindingIndex to keep the shader-layout slot count stable.
+      bindingIndex++;
       input_port_index++;
       input_image_index++;
     }
@@ -2366,23 +3075,35 @@ void RenderedCSFNode::initComputePass(
     {
       // Find the corresponding storage image
       auto it = std::find_if(m_storageImages.begin(), m_storageImages.end(),
-          [&input](const StorageImage& si) { 
-            return si.name == QString::fromStdString(input.name); 
+          [&input](const StorageImage& si) {
+            return si.name == QString::fromStdString(input.name);
           });
-      
+
       if(it != m_storageImages.end())
       {
         if(it->access == "read_only")
         {
-          SCORE_ASSERT(input_image_index < m_inputSamplers.size());
-          auto [sampler, tex] = m_inputSamplers[input_image_index];
-          SCORE_ASSERT(sampler);
-          SCORE_ASSERT(tex);
-
-          bindings.append(
-              QRhiShaderResourceBinding::imageLoad(
-                  bindingIndex++, QRhiShaderResourceBinding::ComputeStage, tex, 0));
-
+          if(input_image_index < m_inputSamplers.size())
+          {
+            auto [sampler, tex, fb_] = m_inputSamplers[input_image_index];
+            if(tex)
+            {
+              bindings.append(
+                  QRhiShaderResourceBinding::imageLoad(
+                      bindingIndex, QRhiShaderResourceBinding::ComputeStage, tex, 0));
+            }
+            else
+            {
+              qWarning() << "CSF: missing read_only image texture for"
+                         << QString::fromStdString(input.name);
+            }
+          }
+          else
+          {
+            qWarning() << "CSF: input_samplers under-allocated for csf_image_input"
+                       << QString::fromStdString(input.name);
+          }
+          bindingIndex++;
           input_port_index++;
           input_image_index++;
         }
@@ -2396,48 +3117,69 @@ void RenderedCSFNode::initComputePass(
           if(imageSize.width() < 1 || imageSize.height() < 1)
             imageSize = renderer.state.renderSize;
 
-          if(!it->texture)
-          {
-            QRhiTexture* texture{};
-            if(image->is3D())
+          // Lazy-allocate the storage-image texture (and its persistent
+          // _prev twin) on first emission. After init this branch is a
+          // no-op (it->texture is already set), so the recreate path
+          // re-emits against the existing handle.
+          auto make_tex = [&](const char* suffix) -> QRhiTexture* {
+            QRhiTexture* t{};
+            if(image->isCube())
+            {
+              const int edge
+                  = std::max(imageSize.width(), imageSize.height());
+              QRhiTexture::Flags flags
+                  = QRhiTexture::CubeMap | QRhiTexture::UsedWithLoadStore;
+              t = rhi.newTexture(format, QSize(edge, edge), 1, flags);
+            }
+            else if(image->is3D())
             {
-              // 3D texture
               int depth = !image->depth_expression.empty()
                   ? resolveDispatchExpression(image->depth_expression)
-                  : imageSize.height(); // Default: cubic if only DIMENSIONS:3
-
+                  : imageSize.height();
               QRhiTexture::Flags flags
                   = QRhiTexture::ThreeDimensional | QRhiTexture::UsedWithLoadStore;
-              texture = rhi.newTexture(format, imageSize.width(), imageSize.height(), depth, 1, flags);
-              qWarning() << "CSF ALLOC [storageImage3D]" << input.name.c_str() << "size=" << imageSize.width() << "x" << imageSize.height() << "x" << depth;
+              t = rhi.newTexture(
+                  format, imageSize.width(), imageSize.height(), depth, 1, flags);
+            }
+            else if(image->is_array)
+            {
+              int layers = !image->layers_expression.empty()
+                  ? resolveDispatchExpression(image->layers_expression)
+                  : 1;
+              if(layers < 1) layers = 1;
+              QRhiTexture::Flags flags = QRhiTexture::UsedWithLoadStore;
+              t = rhi.newTextureArray(format, layers, imageSize, 1, flags);
             }
             else
             {
-              // 2D texture
               QRhiTexture::Flags flags
                   = QRhiTexture::RenderTarget | QRhiTexture::UsedWithLoadStore
                     | QRhiTexture::MipMapped | QRhiTexture::UsedWithGenerateMips;
-              texture = rhi.newTexture(format, imageSize, 1, flags);
-              qWarning() << "CSF ALLOC [storageImage2D]" << input.name.c_str() << "size=" << imageSize;
+              t = rhi.newTexture(format, imageSize, 1, flags);
             }
-            texture->setName(("RenderedCSFNode::storageImage::" + input.name).c_str());
-
-            if(texture && texture->create())
+            t->setName(
+                ("RenderedCSFNode::storageImage::" + input.name + suffix).c_str());
+            if(!t->create())
             {
-              // If this is the first write-only or read-write image, use it as the output
-              if(!m_outputTexture)
-              {
-                m_outputTexture = texture;
-                m_outputFormat = format;
-              }
-              it->texture = texture;
+              delete t;
+              return nullptr;
             }
-            else
+            return t;
+          };
+
+          if(!it->texture)
+          {
+            it->texture = make_tex("");
+            if(it->texture && !m_outputTexture)
             {
-              delete texture;
+              m_outputTexture = it->texture;
+              m_outputFormat = format;
             }
           }
+          if(it->persistent && !it->read_texture)
+            it->read_texture = make_tex("_prev");
 
+          it->binding = bindingIndex;
           if(it->access == "write_only" && it->texture)
           {
             bindings.append(
@@ -2454,12 +3196,47 @@ void RenderedCSFNode::initComputePass(
           }
           else
           {
+            if(!it->texture)
+              qWarning() << "CSF: missing storage-image texture for"
+                         << QString::fromStdString(input.name);
             bindingIndex++; // keep indices synchronized with shader layout
           }
+
+          // Persistent pair: `<name>_prev` readonly at the adjacent slot.
+          // First frame aliases back to `texture` (no prior frame to read).
+          if(it->persistent)
+          {
+            QRhiTexture* prev_tex
+                = it->pending_initial_copy ? it->texture : it->read_texture;
+            if(!prev_tex)
+              prev_tex = it->texture;
+            it->prev_binding = bindingIndex;
+            if(prev_tex)
+            {
+              bindings.append(
+                  QRhiShaderResourceBinding::imageLoad(
+                      bindingIndex++, QRhiShaderResourceBinding::ComputeStage,
+                      prev_tex, 0));
+            }
+            else
+            {
+              qWarning() << "CSF: missing persistent _prev texture for"
+                         << QString::fromStdString(input.name);
+              bindingIndex++;
+            }
+          }
           output_port_index++;
           output_image_index++;
         }
       }
+      else
+      {
+        qWarning() << "CSF: storage image not found for"
+                   << QString::fromStdString(input.name);
+        bindingIndex++;
+        if(image->persistent)
+          bindingIndex++;
+      }
     }
     // Geometry inputs: bind per-attribute SSBOs
     else if(auto* geo_input = ossia::get_if<isf::geometry_input>(&input.data))
@@ -2505,15 +3282,16 @@ void RenderedCSFNode::initComputePass(
 
           if(!ssbo.buffer)
           {
-            // Create a minimal fallback buffer so we don't crash
-            const int elem_size = glslTypeSizeBytes(req.type);
+            // Create a minimal fallback buffer so we don't skip a binding
+            // index. Same fallback shape for both init and re-emit paths
+            // (the buffer name encodes the call site for debug clarity).
+            const int64_t elem_stride = std430ArrayStride(req.type, n.m_descriptor);
             ssbo.buffer = rhi.newBuffer(
                 QRhiBuffer::Static,
-                QRhiBuffer::StorageBuffer | QRhiBuffer::VertexBuffer, elem_size);
-            qWarning() << "CSF ALLOC [geomInit]" << req.name.c_str() << "size=" << elem_size;
-            ssbo.buffer->setName(QByteArray("CSF_GeomInit_") + req.name.c_str());
+                QRhiBuffer::StorageBuffer | QRhiBuffer::VertexBuffer, elem_stride);
+            ssbo.buffer->setName(QByteArray("CSF_GeomFB_") + req.name.c_str());
             ssbo.buffer->create();
-            ssbo.size = elem_size;
+            ssbo.size = elem_stride;
             ssbo.owned = true;
           }
 
@@ -2525,7 +3303,7 @@ void RenderedCSFNode::initComputePass(
           {
             // On the first feedback frame (pending_initial_copy), use the same
             // buffer for both _in and _out so the shader can init + simulate
-            // in the same frame.  After the frame we copy buffer→read_buffer.
+            // in the same frame.  After the frame we copy buffer->read_buffer.
             QRhiBuffer* read_buf = (ssbo.read_buffer && !binding.pending_initial_copy)
                 ? ssbo.read_buffer : ssbo.buffer;
             if(read_buf == ssbo.buffer)
@@ -2551,56 +3329,158 @@ void RenderedCSFNode::initComputePass(
         {
           if(!aux.buffer)
           {
-            // Create a minimal fallback buffer so we don't skip a binding index
+            // Create a minimal fallback buffer so we don't skip a binding
+            // index. Usage flag must match the aux kind — binding a
+            // StorageBuffer-only buffer as a UBO (or vice versa) is
+            // rejected by the Vulkan validation layer.
+            const auto fallback_usage = aux.is_uniform
+                ? QRhiBuffer::UniformBuffer
+                : QRhiBuffer::StorageBuffer;
+            const quint32 fallback_size = aux.is_uniform ? 256u : 16u;
             aux.buffer = rhi.newBuffer(
-                QRhiBuffer::Static, QRhiBuffer::StorageBuffer, 16);
-            qWarning() << "CSF ALLOC [auxInit]" << aux.name.c_str() << "size=16";
-            aux.buffer->setName(QByteArray("CSF_AuxInit_") + aux.name.c_str());
+                QRhiBuffer::Static, fallback_usage, fallback_size);
+            aux.buffer->setName(QByteArray("CSF_AuxFB_") + aux.name.c_str());
             aux.buffer->create();
-            aux.size = 16;
+            aux.size = fallback_size;
             aux.owned = true;
           }
 
-          appendBufBinding(aux.buffer, aux.access);
+          if(aux.is_uniform)
+          {
+            // std140 UBO kind: bind as uniform, not load/store. Access
+            // field is ignored (UBOs are read-only in GLSL).
+            bindings.append(
+                QRhiShaderResourceBinding::uniformBuffer(
+                    bindingIndex++, QRhiShaderResourceBinding::ComputeStage,
+                    aux.buffer));
+          }
+          else
+          {
+            appendBufBinding(aux.buffer, aux.access);
+          }
         }
 
-#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0)
-        // Bind indirect draw buffer as read-write SSBO
-        if(binding.uses_indirect_draw && binding.indirectDrawBuffer)
+        // Auxiliary textures for this geometry input — placed right
+        // after aux SSBOs, matching the GLSL emission order in
+        // parse_csf. Sampled entries → sampledTexture binding; storage
+        // entries → imageLoad / imageStore / imageLoadStore per access.
+        for(auto& at : binding.auxiliary_textures)
+        {
+          if(!at.texture)
+            at.texture = at.placeholder;
+
+          QRhiShaderResourceBinding b;
+          if(at.is_storage)
+          {
+            if(at.access == "read_only")
+              b = QRhiShaderResourceBinding::imageLoad(
+                  bindingIndex, QRhiShaderResourceBinding::ComputeStage,
+                  at.texture, 0);
+            else if(at.access == "write_only")
+              b = QRhiShaderResourceBinding::imageStore(
+                  bindingIndex, QRhiShaderResourceBinding::ComputeStage,
+                  at.texture, 0);
+            else
+              b = QRhiShaderResourceBinding::imageLoadStore(
+                  bindingIndex, QRhiShaderResourceBinding::ComputeStage,
+                  at.texture, 0);
+          }
+          else
+          {
+            b = QRhiShaderResourceBinding::sampledTexture(
+                bindingIndex, QRhiShaderResourceBinding::ComputeStage,
+                at.texture, at.sampler);
+          }
+          bindings.append(b);
+          at.binding = bindingIndex;
+          bindingIndex++;
+        }
+
+        if(binding.uses_indirect_draw && binding.indirectBuffer)
         {
           bindings.append(QRhiShaderResourceBinding::bufferLoadStore(
               bindingIndex++, QRhiShaderResourceBinding::ComputeStage,
-              binding.indirectDrawBuffer));
+              binding.indirectBuffer));
         }
-#endif
 
         geo_binding_index++;
       }
-      // Inlet port if any attribute reads from upstream
-      for(const auto& attr : geo_input->attributes)
-        if(attr.access == "read_only" || attr.access == "read_write") { input_port_index++; break; }
+      // Inlet port for upstream geometry. Two cases create one:
+      //   - Empty ATTRIBUTES => pure pass-through: ISFNode unconditionally
+      //     pushes an input port (the visitor at ISFNode.cpp's
+      //     `if(in.attributes.empty())` branch).
+      //   - Non-empty ATTRIBUTES with at least one read_only / read_write
+      //     attribute => an upstream-feeding inlet.
+      // Either way the geometry input owns ONE entry in node.input,
+      // which subsequent storage_input / texture_input / etc. address by
+      // position. Without this increment the very next read_only
+      // storage_input picks up node.input[0] (the geometry port) by
+      // mistake — its edges point to upstream geometry, bufferForInput
+      // returns empty, and the storage_input falls back to its own
+      // zero-initialised dummy buffer. Symptom: storage data from the
+      // upstream cable never reaches the compute shader.
+      bool geo_creates_inlet = geo_input->attributes.empty();
+      if(!geo_creates_inlet)
+      {
+        for(const auto& attr : geo_input->attributes)
+        {
+          if(attr.access == "read_only" || attr.access == "read_write")
+          {
+            geo_creates_inlet = true;
+            break;
+          }
+        }
+      }
+      if(geo_creates_inlet)
+        input_port_index++;
       // Skip $USER ports for this geometry input
       if(geo_input->vertex_count.find("$USER") != std::string::npos) input_port_index++;
       if(geo_input->instance_count.find("$USER") != std::string::npos) input_port_index++;
       for(const auto& aux : geo_input->auxiliary)
         if(aux.size.find("$USER") != std::string::npos) input_port_index++;
+      if(geo_input->indirect && geo_input->indirect->count.find("$USER") != std::string::npos) input_port_index++;
     }
     else
     {
       input_port_index++;
     }
   }
+}
+
+void RenderedCSFNode::initComputeSRBAndPasses(
+    RenderList& renderer, QRhiResourceUpdateBatch& res)
+{
+  QRhi& rhi = *renderer.state.rhi;
+
+  if(!m_computePipeline)
+  {
+    createComputePipeline(renderer);
+  }
+
+  if(!m_computePipeline)
+    return;
+
+  // Ensure storage buffers are created before setting up bindings
+  updateStorageBuffers(renderer, res);
+
+  // Eagerly populate geometry bindings so we can detect buffer aliasing across
+  // attribute/auxiliary SSBOs (caused by feedback edges sharing the same
+  // physical buffer with conflicting access modes) BEFORE we emit any binding.
+  updateGeometryBindings(renderer, res);
+
+  // Single source of truth for the bindings list (also used by
+  // recreateShaderResourceBindings — see buildComputeSrbBindings).
+  QList<QRhiShaderResourceBinding> bindings;
+  buildComputeSrbBindings(renderer, res, bindings);
 
   // Set the SRB on the pipeline and create it
   {
-    QRhiShaderResourceBindings* passSRB{};
     // Create one ComputePass entry for each CSF pass, each with their own pipeline, ProcessUBO and SRB
     for(std::size_t passIdx = 0; passIdx < n.m_descriptor.csf_passes.size(); passIdx++)
     {
       // Create a separate ProcessUBO for this pass
       QRhiBuffer* passProcessUBO = rhi.newBuffer(
           QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, sizeof(ProcessUBO));
-      qWarning() << "CSF ALLOC [passProcessUBO] pass=" << passIdx << "size=" << sizeof(ProcessUBO);
       passProcessUBO->setName(QStringLiteral("RenderedCSFNode::pass%1::processUBO")
                                   .arg(passIdx)
                                   .toLocal8Bit());
@@ -2612,8 +3492,7 @@ void RenderedCSFNode::initComputePass(
       }
 
       // Create separate SRB for this pass with the specific ProcessUBO
-      passSRB = rhi.newShaderResourceBindings();
-      qWarning() << "CSF ALLOC [passSRB] pass=" << passIdx;
+      QRhiShaderResourceBindings* passSRB = rhi.newShaderResourceBindings();
       passSRB->setName(QString("passSRB.%1").arg(passIdx).toUtf8());
 
       // Replace the ProcessUBO binding (binding 1) with this pass's ProcessUBO
@@ -2629,7 +3508,6 @@ void RenderedCSFNode::initComputePass(
         qWarning() << "Failed to create SRB for CSF pass" << passIdx;
         delete passSRB;
         delete passProcessUBO;
-        passSRB = nullptr;
         continue;
       }
 
@@ -2648,510 +3526,617 @@ void RenderedCSFNode::initComputePass(
       }
 
       m_computePasses.emplace_back(
-          &edge, ComputePass{passPipeline, passSRB, passProcessUBO});
-    }
-
-    if(rt.renderTarget)
-    {
-      // Create the graphics pass for rendering this output to the render target
-      createGraphicsPass(rt, renderer, edge, res);
+          nullptr, ComputePass{passPipeline, passSRB, passProcessUBO});
     }
   }
 }
 
-void RenderedCSFNode::createGraphicsPass(
-    const TextureRenderTarget& rt, RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res)
+void RenderedCSFNode::initState(RenderList& renderer, QRhiResourceUpdateBatch& res)
 {
-  // Create a graphics pass to render our compute output texture to the render target
-  static const constexpr auto vertex_shader = R"_(#version 450
-layout(location = 0) in vec2 position;
-layout(location = 1) in vec2 texcoord;
-
-layout(location = 0) out vec2 v_texcoord;
+  QRhi& rhi = *renderer.state.rhi;
 
-layout(std140, binding = 0) uniform renderer_t {
-  mat4 clipSpaceCorrMatrix;
-  vec2 renderSize;
-} renderer;
+  // Reset the "first frame" gate so that generateMips() in update() waits
+  // for the upstream pass to actually write the input textures before being
+  // called -- see the matching comment in update().
+  m_inputsHaveBeenWritten = false;
 
-out gl_PerVertex { vec4 gl_Position; };
+  // Check for compute support
+  if(!rhi.isFeatureSupported(QRhi::Compute))
+  {
+    qWarning() << "Compute shaders not supported on this backend";
+    return;
+  }
 
-void main()
-{
-  v_texcoord = texcoord;
-  gl_Position = renderer.clipSpaceCorrMatrix * vec4(position.xy, 0.0, 1.);
-#if defined(QSHADER_SPIRV) || defined(QSHADER_HLSL) || defined(QSHADER_MSL)
-  gl_Position.y = - gl_Position.y;
-#endif
-}
-)_";
+  // ProcessUBO will be created per-pass in initComputeSRBAndPasses
 
-  static const constexpr auto fragment_shader_rgba = R"_(#version 450
-layout(std140, binding = 0) uniform renderer_t {
-  mat4 clipSpaceCorrMatrix;
-  vec2 renderSize;
-} renderer;
+  // Initialize GPU buffer scatter for format conversion
+  m_gpuScatterAvailable = m_gpuScatter.init(renderer.state);
 
-layout(binding = 3) uniform sampler2D outputTexture;
+  // Create the material UBO
+  m_materialSize = n.m_materialSize;
+  if(m_materialSize > 0)
+  {
+    m_materialUBO = rhi.newBuffer(
+        QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, m_materialSize);
+    m_materialUBO->setName("RenderedCSFNode::init::m_materialUBO");
+    if(!m_materialUBO->create())
+    {
+      qWarning() << "Failed to create uniform buffer";
+      delete m_materialUBO;
+      m_materialUBO = nullptr;
+    }
+    else if(n.m_material_data)
+    {
+      res.updateDynamicBuffer(m_materialUBO, 0, m_materialSize, n.m_material_data.get());
+    }
+  }
 
-layout(location = 0) in vec2 v_texcoord;
-layout(location = 0) out vec4 fragColor;
+  // Initialize input samplers
+  SCORE_ASSERT(m_computePasses.empty());
+  SCORE_ASSERT(m_inputSamplers.empty());
 
-void main() { fragColor = texture(outputTexture, v_texcoord); }
-)_";
-  static const constexpr auto fragment_shader_r = R"_(#version 450
-layout(std140, binding = 0) uniform renderer_t {
-  mat4 clipSpaceCorrMatrix;
-  vec2 renderSize;
-} renderer;
+  // Create samplers for input textures
+  m_inputSamplers = initInputSamplers(this->n, renderer, n.input, &n.descriptor());
 
-layout(binding = 3) uniform sampler2D outputTexture;
+  // Parse descriptor to create storage buffers and determine output texture requirements.
+  // We also track the input port index to build the geometry-binding-to-port mapping.
+  // The input port index mirrors the order in which ISFNode's visitor calls
+  // self.input.push_back() for each descriptor input.
+  int sb_index = 0;
+  int outlet_index = 0;
+  int input_port_index = 0; // tracks which input port we're at
+  auto& outlets = n.output;
+  for(const auto& input : n.m_descriptor.inputs)
+  {
+    // Handle storage buffers
+    if(auto* storage = ossia::get_if<isf::storage_input>(&input.data))
+    {
+      // Create storage buffer entry - actual buffer will be created/sized in updateStorageBuffers
+      StorageBuffer sb;
+      sb.buffer = nullptr; // Will be created in updateStorageBuffers
+      sb.size = 0;
+      sb.lastKnownSize = 0; // Force initial creation
+      sb.name = QString::fromStdString(input.name);
+      sb.buffer_usage = storage->buffer_usage;
+      sb.access = QString::fromStdString(storage->access);
+      sb.layout = storage->layout; // Store layout for size calculation
+      m_storageBuffers.push_back(sb);
 
-layout(location = 0) in vec2 v_texcoord;
-layout(location = 0) out vec4 fragColor;
+      if(sb.access.contains("write")) {
+        m_outStorageBuffers.push_back({outlets[outlet_index], sb_index});
+        outlet_index++;
+      }
+      // read_only storage creates an input port
+      if(storage->access == "read_only")
+        input_port_index++;
+      sb_index++;
+    }
+    // Handle CSF images
+    else if(auto* image = ossia::get_if<isf::csf_image_input>(&input.data))
+    {
+      QRhiTexture::Format format = getTextureFormat(QString::fromStdString(image->format));
+      StorageImage si;
+      si.name = QString::fromStdString(input.name);
+      si.access = QString::fromStdString(image->access);
+      si.format = format;
+      si.is3D = image->is3D();
+      si.isCube = image->isCube();
+      si.persistent = image->persistent;
+      si.pending_initial_copy = image->persistent;
+      // generateMips is only meaningful on plain 2D images — QRhi doesn't
+      // define a mip chain for 3D, cubemaps would need per-face generation
+      // that QRhi::generateMips doesn't promise across backends, and 2D
+      // arrays similarly have per-layer semantics that aren't guaranteed.
+      // Silently disable the flag outside of plain 2D so downstream samplers
+      // don't hit a no-op they might have expected to work.
+      si.generate_mips = image->generate_mips && !image->is3D()
+                         && !image->isCube() && !image->is_array;
+      m_storageImages.push_back(si);
 
-void main() { fragColor = vec4(texture(outputTexture, v_texcoord).rrr, 1.0); }
-)_";
+      if(m_storageImages.back().access.contains("write")) {
+        int img_index = (int)m_storageImages.size() - 1;
+        m_outStorageImages.push_back({outlets[outlet_index], img_index});
+        outlet_index++;
+      }
+      // read_only CSF image creates an input port
+      if(image->access == "read_only")
+        input_port_index++;
+    }
+    // Handle geometry inputs
+    else if(auto* geo = ossia::get_if<isf::geometry_input>(&input.data))
+    {
+      // Determine if this geometry_input creates an input port
+      // (mirrors ISFNode visitor logic: input port if any attribute is read_only or read_write)
+      bool needs_input = geo->attributes.empty(); // empty = pass-through, always has input
+      if(!needs_input)
+      {
+        for(const auto& attr : geo->attributes)
+          if(attr.access == "read_only" || attr.access == "read_write")
+          { needs_input = true; break; }
+      }
 
-  // Get the mesh for rendering a fullscreen quad
-  const auto& mesh = renderer.defaultTriangle();
+      GeometryBinding binding;
+      binding.input_name = input.name;
+      binding.input_port_index = needs_input ? input_port_index : -1;
+      binding.has_output = geo->attributes.empty(); // Empty attributes = pure pass-through with output
+      binding.has_vertex_count_spec = !geo->vertex_count.empty();
+      binding.has_instance_count_spec = !geo->instance_count.empty();
 
-  // Find the texture for the specific output port this edge is connected to
-  QRhiTexture* textureToRender = textureForOutput(*edge.source);
-  // If we still don't have a texture, we can't create the graphics pass
-  if(!textureToRender)
-  {
-    qWarning() << "No output texture available for graphics pass";
-    return;
-  }
+      for(const auto& attr : geo->attributes)
+      {
+        GeometryBinding::AttributeSSBO ssbo;
+        ssbo.name = attr.name;
+        ssbo.access = attr.access;
+        ssbo.per_instance = (attr.rate == "instance");
+        binding.attribute_ssbos.push_back(std::move(ssbo));
 
-  auto fmt = textureToRender->format();
-  const char* fragment_shader{};
-  switch(fmt)
-  {
-    case QRhiTexture::Format::R8:
-    case QRhiTexture::Format::RED_OR_ALPHA8:
-#if QT_VERSION >= QT_VERSION_CHECK(6, 9, 0)
-    case QRhiTexture::Format::R8UI:
-    case QRhiTexture::Format::R32UI:
-#endif
-    case QRhiTexture::Format::R16:
-    case QRhiTexture::Format::R16F:
-    case QRhiTexture::Format::R32F:
-    case QRhiTexture::Format::D16:
-#if QT_VERSION >= QT_VERSION_CHECK(6, 4, 0)
-    case QRhiTexture::Format::D24:
-    case QRhiTexture::Format::D24S8:
-#endif
-    case QRhiTexture::Format::D32F:
-#if QT_VERSION >= QT_VERSION_CHECK(6, 9, 0)
-    case QRhiTexture::Format::D32FS8:
-#endif
-      fragment_shader = fragment_shader_r;
-      break;
-    default:
-      fragment_shader = fragment_shader_rgba;
-      break;
-  }
+        if(attr.access != "read_only" && attr.access != "none")
+          binding.has_output = true;
+      }
 
-  // Compile shaders
-  auto [vertexS, fragmentS] = score::gfx::makeShaders(renderer.state, vertex_shader, fragment_shader);
+      // If vertex_count is specified, resolve and pre-allocate attribute SSBOs
+      if(binding.has_vertex_count_spec)
+      {
+        int count = resolveCountExpression(geo->vertex_count, *geo, "vertex_count");
+        if(count > 0)
+          binding.vertex_count = count;
+      }
 
-  // Create a sampler for our output texture
-  QRhiSampler* outputSampler = renderer.state.rhi->newSampler(
-    QRhiSampler::Linear, QRhiSampler::Linear, QRhiSampler::None,
-    QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge);
-  outputSampler->setName("RenderedCSFNode::OutputSampler");
-  outputSampler->create();
-    
-  // Initialize mesh buffers
-  MeshBuffers meshBuffers = renderer.initMeshBuffer(mesh, res);
-  
-  // Build the pipeline to render our compute result
-  auto pip = score::gfx::buildPipeline(
-      renderer, mesh, vertexS, fragmentS, rt, nullptr, nullptr, 
-      std::array<Sampler, 1>{Sampler{outputSampler, textureToRender}});
-      
-  if(pip.pipeline)
-  {
-    m_graphicsPasses.emplace_back(&edge, GraphicsPass{pip, outputSampler, meshBuffers});
-  }
-  else
-  {
-    delete outputSampler;
-  }
-}
+      // Resolve instance_count if specified
+      if(binding.has_instance_count_spec)
+      {
+        int ic = resolveCountExpression(geo->instance_count, *geo, "instance_count");
+        if(ic > 0)
+          binding.instance_count = ic;
+      }
 
-QString RenderedCSFNode::updateShaderWithImageFormats(QString current)
-{
-  int sampler_index = 0;
-  for(const auto& input : n.m_descriptor.inputs)
-  {
-    if(auto tex_input = ossia::get_if<isf::texture_input>(&input.data))
-    {
-      sampler_index++;
-    }
-    if(auto image = ossia::get_if<isf::csf_image_input>(&input.data))
-    {
-      if(image->access == "read_only")
+      // Pre-allocate attribute SSBOs using the correct count based on rate
       {
-        SCORE_ASSERT(sampler_index < m_inputSamplers.size());
-        auto tex_n = m_inputSamplers[sampler_index].texture;
-        if(!tex_n)
-          return current;
+        for(int attr_idx = 0; attr_idx < (int)geo->attributes.size(); attr_idx++)
+        {
+          if(attr_idx >= (int)binding.attribute_ssbos.size())
+            break;
+          auto& ssbo = binding.attribute_ssbos[attr_idx];
+          if(ssbo.access == "none")
+            continue;
+          const int count = ssbo.per_instance ? binding.instance_count : binding.vertex_count;
+          if(count <= 0)
+            continue;
+          const int64_t elem_stride = std430ArrayStride(geo->attributes[attr_idx].type, n.m_descriptor);
+          const int64_t needed = elem_stride * count;
+          auto* buf = rhi.newBuffer(
+              QRhiBuffer::Static,
+              QRhiBuffer::StorageBuffer | QRhiBuffer::VertexBuffer, needed);
+          buf->setName(QByteArray("CSF_GeomSpec_") + ssbo.name.c_str());
+          buf->create();
+          QByteArray zero(needed, 0);
+          res.uploadStaticBuffer(buf, 0, needed, zero.constData());
+          ssbo.buffer = buf;
+          ssbo.size = needed;
+          ssbo.owned = true;
+        }
+      }
 
-        const auto fmt = tex_n->format();
-        const auto layout_fmt = rhiTextureFormatToShaderLayoutFormatString(fmt);
+      for(const auto& aux : geo->auxiliary)
+      {
+        // COPY_FROM auxiliaries are forwarded in pushOutputGeometry, no SSBO needed
+        if(aux.forward)
+          continue;
 
-        const auto before = QStringLiteral(", rgba8) readonly uniform image2D %1;").arg(input.name.c_str());
-        const auto after = QStringLiteral(", %1) readonly uniform image2D %2;").arg(layout_fmt).arg(input.name.c_str());
+        GeometryBinding::AuxiliarySSBO ssbo;
+        ssbo.name = aux.name;
+        ssbo.access = aux.access;
+        ssbo.is_uniform = aux.is_uniform;
+        ssbo.layout = aux.layout;
+        ssbo.size_expr = aux.size;
 
-        current.replace(before, after);
-        sampler_index++;
+        // Create the buffer immediately so it's available for the first dispatch.
+        // Usage flag matches the aux kind — UBO path uses UniformBuffer,
+        // SSBO path uses StorageBuffer. Using the wrong usage flag is a
+        // Vulkan validation error at bind time.
+        int arrayCount = 0;
+        if(!aux.size.empty())
+          arrayCount = resolveCountExpression(aux.size, *geo, aux.name);
+
+        const int64_t requiredSize = score::gfx::calculateStorageBufferSize(
+            aux.layout, arrayCount, this->n.descriptor());
+        if(requiredSize > 0)
+        {
+          const auto usage = aux.is_uniform ? QRhiBuffer::UniformBuffer
+                                            : QRhiBuffer::StorageBuffer;
+          auto* buf = rhi.newBuffer(QRhiBuffer::Static, usage, requiredSize);
+          buf->setName(QByteArray("CSF_GeoAux_") + aux.name.c_str());
+          buf->create();
+          QByteArray zero(requiredSize, 0);
+          res.uploadStaticBuffer(buf, 0, requiredSize, zero.constData());
+          ssbo.buffer = buf;
+          ssbo.size = requiredSize;
+          ssbo.owned = true;
+        }
+
+        binding.auxiliary_ssbos.push_back(std::move(ssbo));
+
+        // UBOs are inherently read-only from GLSL, so they never flag
+        // has_output. For SSBOs, any non-read_only access opts in.
+        if(!aux.is_uniform && aux.access != "read_only")
+          binding.has_output = true;
       }
-    }
-  }
-  return current;
 
-}
+      // Auxiliary textures: one entry per geometry_input AUXILIARY
+      // texture declaration. Sampler allocated now (or skipped for
+      // storage-image entries); placeholder texture picked from the
+      // RenderList empties so the SRB is always valid even before an
+      // upstream resolution happens. Per-frame resolution against
+      // ossia::geometry::auxiliary_textures happens in
+      // updateGeometryBindings.
+      //
+      // For write_only / read_write storage-image entries this binding
+      // ALSO allocates the actual texture itself (analog of the
+      // m_storageImages allocation that top-level csf_image_input
+      // entries get). Without this auto-alloc the binding stays glued
+      // to the RGBA8-typed sample-only emptyTexture3D placeholder and
+      // any imageStore / imageAtomicOr against an integer-formatted
+      // shader (uimage3D r32ui) trips Vulkan validation 00339 (no
+      // STORAGE_BIT) + 07753 (UINT vs UNORM) + 02691 (no atomic
+      // format feature).
+      for(const auto& atx : geo->auxiliary_textures)
+      {
+        RenderedCSFNode::GeometryBinding::AuxiliaryTexture at;
+        at.name = atx.name;
+        at.is_storage = atx.is_storage;
+        at.access = atx.access;
 
-void RenderedCSFNode::createComputePipeline(RenderList& renderer)
-{
-  QRhi& rhi = *renderer.state.rhi;
-  
-  if(!rhi.isFeatureSupported(QRhi::Compute))
-  {
-    qWarning() << "Compute shaders not supported on this backend";
-    return;
-  }
-  
-  try
-  {
-    // Prepare the shader template with image format substitution.
-    // LOCAL_SIZE placeholders will be substituted per-pass below.
-    m_computeShaderSource = updateShaderWithImageFormats(n.m_computeS);
+        if(!atx.is_storage)
+        {
+          at.sampler = score::gfx::makeSampler(rhi, atx.sampler);
+          at.sampler->setName(
+              QByteArray("CSF_AuxTex_sampler::") + atx.name.c_str());
+        }
 
-    // Compile one pipeline per unique LOCAL_SIZE, reuse when passes share the same size.
-    m_perPassPipelines.clear();
-    std::map<std::array<int,3>, QRhiComputePipeline*> pipelineCache;
+        if(atx.is_cubemap)
+          at.placeholder = &renderer.emptyTextureCube();
+        else if(atx.dimensions == 3)
+          at.placeholder = &renderer.emptyTexture3D();
+        else if(atx.is_array)
+          at.placeholder = &renderer.emptyTextureArray();
+        else
+          at.placeholder = &renderer.emptyTexture();
+        at.texture = at.placeholder;
 
-    for(std::size_t passIdx = 0; passIdx < n.m_descriptor.csf_passes.size(); passIdx++)
-    {
-      const auto& passDesc = n.m_descriptor.csf_passes[passIdx];
-      const auto key = passDesc.local_size;
+        // Auto-allocate writable storage image. Resolves the size
+        // expressions (WIDTH/HEIGHT/DEPTH/LAYERS) the same way
+        // computeTextureSize does for top-level csf_image_input entries.
+        if(atx.is_storage && atx.access != "read_only")
+        {
+          QRhiTexture::Format format = getTextureFormat(
+              QString::fromStdString(atx.format));
+
+          int w = !atx.width_expression.empty()
+              ? std::max(1, resolveDispatchExpression(atx.width_expression))
+              : renderer.state.renderSize.width();
+          int h = !atx.height_expression.empty()
+              ? std::max(1, resolveDispatchExpression(atx.height_expression))
+              : renderer.state.renderSize.height();
+
+          QRhiTexture* alloc = nullptr;
+          if(atx.is_cubemap)
+          {
+            const int edge = std::max(w, h);
+            alloc = rhi.newTexture(
+                format, QSize(edge, edge), 1,
+                QRhiTexture::CubeMap | QRhiTexture::UsedWithLoadStore);
+          }
+          else if(atx.dimensions == 3)
+          {
+            int d = !atx.depth_expression.empty()
+                ? std::max(1, resolveDispatchExpression(atx.depth_expression))
+                : h;  // square cube fallback
+            alloc = rhi.newTexture(
+                format, w, h, d, 1,
+                QRhiTexture::ThreeDimensional | QRhiTexture::UsedWithLoadStore);
+          }
+          else if(atx.is_array)
+          {
+            int layers = !atx.layers_expression.empty()
+                ? std::max(1, resolveDispatchExpression(atx.layers_expression))
+                : 1;
+            alloc = rhi.newTextureArray(
+                format, layers, QSize(w, h), 1,
+                QRhiTexture::UsedWithLoadStore);
+          }
+          else
+          {
+            alloc = rhi.newTexture(
+                format, QSize(w, h), 1,
+                QRhiTexture::UsedWithLoadStore);
+          }
 
-      auto it = pipelineCache.find(key);
-      if(it != pipelineCache.end())
-      {
-        // Reuse existing pipeline
-        m_perPassPipelines.push_back(it->second);
+          if(alloc)
+          {
+            alloc->setName(
+                ("CSF::auxStorageImage::" + atx.name).c_str());
+            if(alloc->create())
+            {
+              at.texture = alloc;
+              at.owned = true;
+            }
+            else
+            {
+              delete alloc;
+            }
+          }
+        }
+
+        binding.auxiliary_textures.push_back(std::move(at));
       }
-      else
+
+      if(geo->indirect)
       {
-        // Compile new pipeline for this local_size
-        QString src = m_computeShaderSource;
-        src.replace("ISF_LOCAL_SIZE_X", QString::number(key[0]));
-        src.replace("ISF_LOCAL_SIZE_Y", QString::number(key[1]));
-        src.replace("ISF_LOCAL_SIZE_Z", QString::number(key[2]));
+        binding.uses_indirect_draw = true;
+        binding.indirectCountExpr = geo->indirect->count;
 
-        QShader compiled = score::gfx::makeCompute(renderer.state, src);
+        int count = resolveCountExpression(geo->indirect->count, *geo, "__indirect_count__");
+        if(count <= 0) count = 1;
+        binding.indirectCountResult = count;
 
-        auto* pipeline = rhi.newComputePipeline();
-        pipeline->setShaderStage(QRhiShaderStage(QRhiShaderStage::Compute, compiled));
+        const int64_t indirectSize = (int64_t)count * 5 * sizeof(uint32_t);
 
-        pipelineCache[key] = pipeline;
-        m_perPassPipelines.push_back(pipeline);
+        QRhiBuffer::UsageFlags usageFlags = QRhiBuffer::StorageBuffer;
+        usageFlags = usageFlags | QRhiBuffer::StorageBuffer;
+#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0)
+        usageFlags = usageFlags | QRhiBuffer::IndirectBuffer;
+#endif
+
+        auto* buf = rhi.newBuffer(QRhiBuffer::Static, usageFlags, indirectSize);
+        buf->setName(QByteArray("CSF_Indirect_") + input.name.c_str());
+        buf->create();
+
+        QByteArray zero(indirectSize, 0);
+        res.uploadStaticBuffer(buf, 0, indirectSize, zero.constData());
+
+        binding.indirectBuffer = buf;
+        binding.indirectBufferSize = indirectSize;
       }
-    }
 
-    // Store unique pipelines for cleanup
-    m_ownedPipelines.clear();
-    for(auto& [k, v] : pipelineCache)
-      m_ownedPipelines.push_back(v);
+      const bool geo_has_output = binding.has_output;
+      m_geometryBindings.push_back(std::move(binding));
 
-    // For backward compat
-    m_computePipeline = m_perPassPipelines.empty() ? nullptr : m_perPassPipelines[0];
-    if(!m_perPassPipelines.empty())
-      m_computeShader = m_perPassPipelines[0]->shaderStage().shader();
-  }
-  catch(const std::exception& e)
-  {
-    qWarning() << "Failed to create compute shader:" << e.what();
-    m_computePipeline = nullptr;
+      if(needs_input)
+        input_port_index++;
+      if(geo_has_output)
+        outlet_index++;
+
+      // $USER ports also create input ports (IntSpinBox), track them
+      if(geo->vertex_count.find("$USER") != std::string::npos)
+        input_port_index++;
+      if(geo->instance_count.find("$USER") != std::string::npos)
+        input_port_index++;
+      for(const auto& aux : geo->auxiliary)
+        if(aux.size.find("$USER") != std::string::npos)
+          input_port_index++;
+      if(geo->indirect && geo->indirect->count.find("$USER") != std::string::npos)
+        input_port_index++;
+    }
+    else
+    {
+      // All other input types (float, long, bool, event, color, point2D, point3D,
+      // image, audio, audioFFT, audioHist, cubemap, texture) create one input port each.
+      input_port_index++;
+    }
   }
+
+  m_outputTexture = nullptr;
+
+  // Create the compute passes (edge-independent: SRB, pipelines, processUBOs)
+  initComputeSRBAndPasses(renderer, res);
+
+  m_initialized = true;
 }
 
-void RenderedCSFNode::init(RenderList& renderer, QRhiResourceUpdateBatch& res)
+void RenderedCSFNode::addOutputPass(
+    RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res)
 {
-  QRhi& rhi = *renderer.state.rhi;
+  if(!m_initialized)
+    return;
 
-  // Reset the "first frame" gate so that generateMips() in update() waits
-  // for the upstream pass to actually write the input textures before being
-  // called — see the matching comment in update().
-  m_inputsHaveBeenWritten = false;
+  const auto& rt = renderer.renderTargetForOutput(edge);
+  if(rt.renderTarget)
+  {
+    createGraphicsPass(rt, renderer, edge, res);
+  }
+}
 
-  // Check for compute support
-  if(!rhi.isFeatureSupported(QRhi::Compute))
+void RenderedCSFNode::removeOutputPass(RenderList& renderer, Edge& edge)
+{
+  auto it = ossia::find_if(
+      m_graphicsPasses, [&](const auto& p) { return p.first == &edge; });
+  if(it != m_graphicsPasses.end())
   {
-    qWarning() << "Compute shaders not supported on this backend";
-    return;
+    it->second.pipeline.release();
+    delete it->second.outputSampler;
+    m_graphicsPasses.erase(it);
   }
-  
-  // ProcessUBO will be created per-pass in initComputePass
+}
 
-  // Initialize GPU buffer scatter for format conversion
-  m_gpuScatterAvailable = m_gpuScatter.init(renderer.state);
+bool RenderedCSFNode::hasOutputPassForEdge(Edge& edge) const
+{
+  return ossia::find_if(
+             m_graphicsPasses, [&](const auto& p) { return p.first == &edge; })
+         != m_graphicsPasses.end();
+}
 
-  // Create the material UBO
-  m_materialSize = n.m_materialSize;
-  if(m_materialSize > 0)
+void RenderedCSFNode::releaseState(RenderList& r)
+{
+  if(!m_initialized)
+    return;
+
+  // Clean up remaining graphics passes
+  for(auto& [edge, pass] : m_graphicsPasses)
   {
-    m_materialUBO = rhi.newBuffer(
-        QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, m_materialSize);
-    qWarning() << "CSF ALLOC [materialUBO] size=" << m_materialSize;
-    m_materialUBO->setName("RenderedCSFNode::init::m_materialUBO");
-    if(!m_materialUBO->create())
+    pass.pipeline.release();
+    delete pass.outputSampler;
+  }
+  m_graphicsPasses.clear();
+
+  // Clean up compute passes
+  for(auto& [edge, pass] : m_computePasses)
+  {
+    delete pass.srb;
+    if(pass.processUBO)
     {
-      qWarning() << "Failed to create uniform buffer";
-      delete m_materialUBO;
-      m_materialUBO = nullptr;
+      pass.processUBO->deleteLater();
     }
   }
+  m_computePasses.clear();
 
-  // Initialize input samplers
-  SCORE_ASSERT(m_computePasses.empty());
-  SCORE_ASSERT(m_inputSamplers.empty());
+  // Clean up pipelines (m_ownedPipelines has unique entries, m_perPassPipelines may have duplicates)
+  for(auto* pip : m_ownedPipelines)
+    delete pip;
+  m_ownedPipelines.clear();
+  m_perPassPipelines.clear();
+  m_computePipeline = nullptr;
 
-  // Create samplers for input textures
-  m_inputSamplers = initInputSamplers(this->n, renderer, n.input);
+  // Clean up storage buffers
+  for(auto& storageBuffer : m_storageBuffers)
+  {
+    if(storageBuffer.owned)
+      r.releaseBuffer(storageBuffer.buffer);
+  }
+  m_storageBuffers.clear();
 
-  // Parse descriptor to create storage buffers and determine output texture requirements.
-  // We also track the input port index to build the geometry-binding-to-port mapping.
-  // The input port index mirrors the order in which ISFNode's visitor calls
-  // self.input.push_back() for each descriptor input.
-  int sb_index = 0;
-  int outlet_index = 0;
-  int input_port_index = 0; // tracks which input port we're at
-  auto& outlets = n.output;
-  for(const auto& input : n.m_descriptor.inputs)
+  // Clean up GPU scatter
+  m_gpuScatter.release();
+  m_gpuScatterAvailable = false;
+
+  // Clean up geometry bindings
+  for(auto& binding : m_geometryBindings)
   {
-    // Handle storage buffers
-    if(auto* storage = ossia::get_if<isf::storage_input>(&input.data))
+    for(auto& ssbo : binding.attribute_ssbos)
     {
-      // Create storage buffer entry - actual buffer will be created/sized in updateStorageBuffers
-      StorageBuffer sb;
-      sb.buffer = nullptr; // Will be created in updateStorageBuffers
-      sb.size = 0;
-      sb.lastKnownSize = 0; // Force initial creation
-      sb.name = QString::fromStdString(input.name);
-      sb.buffer_usage = storage->buffer_usage;
-      sb.access = QString::fromStdString(storage->access);
-      sb.layout = storage->layout; // Store layout for size calculation
-      m_storageBuffers.push_back(sb);
-
-      if(sb.access.contains("write")) {
-        m_outStorageBuffers.push_back({outlets[outlet_index], sb_index});
-        outlet_index++;
+      if(ssbo.read_buffer)
+      {
+        r.releaseBuffer(ssbo.read_buffer);
+        ssbo.read_buffer = nullptr;
       }
-      // read_only storage creates an input port
-      if(storage->access == "read_only")
-        input_port_index++;
-      sb_index++;
-    }
-    // Handle CSF images
-    else if(auto* image = ossia::get_if<isf::csf_image_input>(&input.data))
-    {
-      QRhiTexture::Format format = getTextureFormat(QString::fromStdString(image->format));
-      m_storageImages.push_back(
-          StorageImage{
-              nullptr, QString::fromStdString(input.name),
-              QString::fromStdString(image->access), format});
-
-      if(m_storageImages.back().access.contains("write")) {
-        int img_index = (int)m_storageImages.size() - 1;
-        m_outStorageImages.push_back({outlets[outlet_index], img_index});
-        outlet_index++;
+      if(ssbo.owned && ssbo.buffer)
+      {
+        r.releaseBuffer(ssbo.buffer);
       }
-      // read_only CSF image creates an input port
-      if(image->access == "read_only")
-        input_port_index++;
+      ssbo.buffer = nullptr;
+      delete ssbo.scatterStaging;
+      ssbo.scatterStaging = nullptr;
+      delete ssbo.scatterOp.srb;
+      ssbo.scatterOp.srb = nullptr;
+      delete ssbo.scatterOp.paramsUBO;
+      ssbo.scatterOp.paramsUBO = nullptr;
     }
-    // Handle geometry inputs
-    else if(auto* geo = ossia::get_if<isf::geometry_input>(&input.data))
+    for(auto& aux : binding.auxiliary_ssbos)
     {
-      // Determine if this geometry_input creates an input port
-      // (mirrors ISFNode visitor logic: input port if any attribute is read_only or read_write)
-      bool needs_input = geo->attributes.empty(); // empty = pass-through, always has input
-      if(!needs_input)
+      if(aux.owned && aux.buffer)
       {
-        for(const auto& attr : geo->attributes)
-          if(attr.access == "read_only" || attr.access == "read_write")
-          { needs_input = true; break; }
+        r.releaseBuffer(aux.buffer);
       }
+      aux.buffer = nullptr;
+    }
+    for(auto& at : binding.auxiliary_textures)
+    {
+      if(at.sampler)
+        at.sampler->deleteLater();
+      at.sampler = nullptr;
+      // For owned textures (auto-allocated writable storage images),
+      // we created the QRhiTexture and must release it here. Sampled
+      // entries point to either a RenderList-owned placeholder or an
+      // upstream-geometry-owned handle — those we don't free.
+      if(at.owned && at.texture)
+        at.texture->deleteLater();
+      at.texture = nullptr;
+      at.owned = false;
+    }
+    binding.auxiliary_textures.clear();
+    for(auto* buf : binding.copyFromBuffers)
+      r.releaseBuffer(buf);
+    binding.copyFromBuffers.clear();
+    if(binding.indirectBuffer)
+    {
+      r.releaseBuffer(binding.indirectBuffer);
+      binding.indirectBuffer = nullptr;
+    }
+  }
+  m_geometryBindings.clear();
 
-      GeometryBinding binding;
-      binding.input_port_index = needs_input ? input_port_index : -1;
-      binding.has_output = geo->attributes.empty(); // Empty attributes = pure pass-through with output
-      binding.has_vertex_count_spec = !geo->vertex_count.empty();
-      binding.has_instance_count_spec = !geo->instance_count.empty();
+  // Clean up storage images (including persistent ping-pong pair)
+  for(auto& storageImage : m_storageImages)
+  {
+    if(storageImage.texture)
+      storageImage.texture->deleteLater();
+    if(storageImage.read_texture)
+      storageImage.read_texture->deleteLater();
+  }
+  m_storageImages.clear();
 
-      for(const auto& attr : geo->attributes)
-      {
-        GeometryBinding::AttributeSSBO ssbo;
-        ssbo.name = attr.name;
-        ssbo.access = attr.access;
-        ssbo.per_instance = (attr.rate == "instance");
-        binding.attribute_ssbos.push_back(std::move(ssbo));
+  m_outStorageImages.clear();
+  m_outStorageBuffers.clear();
+  m_outputTexture = nullptr;
 
-        if(attr.access != "read_only" && attr.access != "none")
-          binding.has_output = true;
-      }
+  // Clean up buffers and textures
+  delete m_materialUBO;
+  m_materialUBO = nullptr;
 
-      // If vertex_count is specified, resolve and pre-allocate attribute SSBOs
-      if(binding.has_vertex_count_spec)
-      {
-        int count = resolveCountExpression(geo->vertex_count, *geo, "vertex_count");
-        if(count > 0)
-          binding.vertex_count = count;
-      }
+  // Clean up samplers
+  for(auto sampler : m_inputSamplers)
+  {
+    delete sampler.sampler;
+    // texture is deleted elsewhere
+  }
+  m_inputSamplers.clear();
 
-      // Resolve instance_count if specified
-      if(binding.has_instance_count_spec)
-      {
-        int ic = resolveCountExpression(geo->instance_count, *geo, "instance_count");
-        if(ic > 0)
-          binding.instance_count = ic;
-      }
+  m_initialized = false;
+}
 
-      // Pre-allocate attribute SSBOs using the correct count based on rate
+void RenderedCSFNode::addInputEdge(
+    RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res)
+{
+  if(edge.sink->type == Types::Image)
+  {
+    // Find upstream texture
+    if(auto it = edge.source->node->renderedNodes.find(&renderer);
+       it != edge.source->node->renderedNodes.end())
+    {
+      if(auto* tex = it->second->textureForOutput(*edge.source))
       {
-        for(int attr_idx = 0; attr_idx < (int)geo->attributes.size(); attr_idx++)
-        {
-          if(attr_idx >= (int)binding.attribute_ssbos.size())
-            break;
-          auto& ssbo = binding.attribute_ssbos[attr_idx];
-          if(ssbo.access == "none")
-            continue;
-          const int count = ssbo.per_instance ? binding.instance_count : binding.vertex_count;
-          if(count <= 0)
-            continue;
-          const int elem_size = glslTypeSizeBytes(geo->attributes[attr_idx].type);
-          const int64_t needed = (int64_t)elem_size * count;
-          auto* buf = rhi.newBuffer(
-              QRhiBuffer::Static,
-              QRhiBuffer::StorageBuffer | QRhiBuffer::VertexBuffer, needed);
-          qWarning() << "CSF ALLOC [geomSpecInit]" << ssbo.name.c_str() << "size=" << needed;
-          buf->setName(QByteArray("CSF_GeomSpec_") + ssbo.name.c_str());
-          buf->create();
-          QByteArray zero(needed, 0);
-          res.uploadStaticBuffer(buf, 0, needed, zero.constData());
-          ssbo.buffer = buf;
-          ssbo.size = needed;
-          ssbo.owned = true;
-        }
+        auto rt = renderer.renderTargetForInputPort(*edge.sink);
+        updateInputTexture(*edge.sink, tex, rt.depthTexture);
       }
+    }
+  }
+  // Geometry input edges will be picked up by updateGeometryBindings in update()
+}
 
-      for(const auto& aux : geo->auxiliary)
-      {
-        // COPY_FROM auxiliaries are forwarded in pushOutputGeometry, no SSBO needed
-        if(aux.forward)
-          continue;
-
-        GeometryBinding::AuxiliarySSBO ssbo;
-        ssbo.name = aux.name;
-        ssbo.access = aux.access;
-        ssbo.layout = aux.layout;
-        ssbo.size_expr = aux.size;
-
-        // Create the buffer immediately so it's available for the first dispatch
-        int arrayCount = 0;
-        if(!aux.size.empty())
-          arrayCount = resolveCountExpression(aux.size, *geo, aux.name);
-
-        const int64_t requiredSize = score::gfx::calculateStorageBufferSize(
-            aux.layout, arrayCount, this->n.descriptor());
-        if(requiredSize > 0)
-        {
-          auto* buf = rhi.newBuffer(
-              QRhiBuffer::Static,
-              QRhiBuffer::StorageBuffer, requiredSize);
-          qWarning() << "CSF ALLOC [geoAuxInit]" << aux.name.c_str() << "size=" << requiredSize;
-          buf->setName(QByteArray("CSF_GeoAux_") + aux.name.c_str());
-          buf->create();
-          QByteArray zero(requiredSize, 0);
-          res.uploadStaticBuffer(buf, 0, requiredSize, zero.constData());
-          ssbo.buffer = buf;
-          ssbo.size = requiredSize;
-          ssbo.owned = true;
-        }
-
-        binding.auxiliary_ssbos.push_back(std::move(ssbo));
-
-        if(aux.access != "read_only")
-          binding.has_output = true;
-      }
-
-#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0)
-      // Allocate indirect draw buffer if requested
-      if(geo->indirect_draw && renderer.state.caps.drawIndirect)
-      {
-        binding.uses_indirect_draw = true;
-        binding.indirect_draw_indexed = (geo->indirect_draw_type == "draw_indexed");
-
-        const int64_t indirectSize = binding.indirect_draw_indexed
-            ? (int64_t)sizeof(QRhiIndexedIndirectDrawCommand)
-            : (int64_t)sizeof(QRhiIndirectDrawCommand);
-
-        auto* buf = rhi.newBuffer(
-            QRhiBuffer::Static,
-            QRhiBuffer::StorageBuffer | QRhiBuffer::IndirectBuffer,
-            indirectSize);
-        qWarning() << "CSF ALLOC [indirectDraw]" << input.name.c_str() << "size=" << indirectSize;
-        buf->setName(QByteArray("CSF_IndirectDraw_") + input.name.c_str());
-        buf->create();
-
-        // Initialize with zeros (vertexCount=0, instanceCount=0)
-        QByteArray zero(indirectSize, 0);
-        res.uploadStaticBuffer(buf, 0, indirectSize, zero.constData());
-
-        binding.indirectDrawBuffer = buf;
-      }
-#endif
-
-      const bool geo_has_output = binding.has_output;
-      m_geometryBindings.push_back(std::move(binding));
-
-      if(needs_input)
-        input_port_index++;
-      if(geo_has_output)
-        outlet_index++;
-
-      // $USER ports also create input ports (IntSpinBox), track them
-      if(geo->vertex_count.find("$USER") != std::string::npos)
-        input_port_index++;
-      if(geo->instance_count.find("$USER") != std::string::npos)
-        input_port_index++;
-      for(const auto& aux : geo->auxiliary)
-        if(aux.size.find("$USER") != std::string::npos)
-          input_port_index++;
-    }
-    else
-    {
-      // All other input types (float, long, bool, event, color, point2D, point3D,
-      // image, audio, audioFFT, audioHist, cubemap, texture) create one input port each.
-      input_port_index++;
-    }
+void RenderedCSFNode::removeInputEdge(RenderList& renderer, Edge& edge)
+{
+  if(edge.sink->type == Types::Image)
+  {
+    // See SimpleRenderedISFNode::removeInputEdge — same dangling-depth-
+    // sampler issue applies here when DEPTH: true inputs get disconnected.
+    const bool hasDepthCompanion
+        = (edge.sink->flags & Flag::SamplableDepth) == Flag::SamplableDepth;
+    QRhiTexture* depthFallback
+        = hasDepthCompanion ? &renderer.emptyTexture() : nullptr;
+    updateInputTexture(*edge.sink, &renderer.emptyTexture(), depthFallback);
   }
+  // Geometry input edges will be picked up by updateGeometryBindings in update()
+}
 
-  m_outputTexture = nullptr;
+void RenderedCSFNode::init(RenderList& renderer, QRhiResourceUpdateBatch& res)
+{
+  initState(renderer, res);
 
-  // Create the compute passes for each output edge (across all output ports)
+  // Create graphics passes for each output edge
   for(auto* output_port : n.output)
   {
     for(Edge* edge : output_port->edges)
     {
-      const auto& rt = renderer.renderTargetForOutput(*edge);
-      initComputePass(rt, renderer, *edge, res);
+      addOutputPass(renderer, *edge, res);
     }
   }
 }
@@ -3203,9 +4188,13 @@ void RenderedCSFNode::update(
   if(m_materialUBO && n.m_material_data)
   {
     res.updateDynamicBuffer(m_materialUBO, 0, m_materialSize, n.m_material_data.get());
+    // CSF uploads the material UBO every frame (no materialChanged gate),
+    // so resetting event ports here is enough — the zero value will
+    // propagate to the GPU on the next frame's update().
+    (void)n.resetEventPortsAfterFrame();
   }
 
-  for(auto& [sampler, texture] : this->m_inputSamplers)
+  for(auto& [sampler, texture, fb_] : this->m_inputSamplers)
   {
     // Skip generateMips on textures that have not yet been written to.
     // Their Vulkan layout is still VK_IMAGE_LAYOUT_PREINITIALIZED, and Qt RHI's
@@ -3230,332 +4219,62 @@ void RenderedCSFNode::update(
   // TODO: Check if texture size inputs have changed and recreate texture if needed
 }
 
-void RenderedCSFNode::recreateShaderResourceBindings(RenderList& renderer, QRhiResourceUpdateBatch& res)
+// Hash the bindings list to detect frame-to-frame drift. Two binding
+// lists hash to the same value iff every entry's descriptor identity
+// matches — recreateShaderResourceBindings then skips the
+// destroy+setBindings+create dance when the per-pass binding list
+// hasn't actually changed since the previous frame (steady state for
+// a static scene; every frame would otherwise thrash the SRB pool slot).
+// Use Qt's own qHash(QRhiShaderResourceBinding) so the equivalence
+// matches QRhi's internal canonical representation — no need to pack
+// the private Data union by hand and risk drift on a Qt minor update.
+// Per-binding hashes are seeded by the binding's index so two
+// otherwise-equal bindings at different slots hash differently;
+// combined via ossia::hash_bytes over the per-binding hash vector.
+namespace
 {
-  QRhi& rhi = *renderer.state.rhi;
-
-  // Pre-pass: collect physical buffers used with conflicting access modes
-  // (read on one binding, write on another) so we can promote them to
-  // bufferLoadStore. The Qt RHI / Vulkan validation layer rejects bindings
-  // that reference the same buffer with different access flags within a pass.
-  // (geometry bindings are assumed up-to-date here — recreateShaderResourceBindings
-  // is called after the geometry update path)
-  std::unordered_set<QRhiBuffer*> aliased_buffers;
-  {
-    std::unordered_map<QRhiBuffer*, int> access_flags; // 1=read, 2=write, 3=both
-    int gb_idx = 0;
-    for(const auto& inp : n.m_descriptor.inputs)
-    {
-      auto* g = ossia::get_if<isf::geometry_input>(&inp.data);
-      if(!g)
-        continue;
-      if(gb_idx >= (int)m_geometryBindings.size())
-        break;
-      const auto& gb = m_geometryBindings[gb_idx++];
-
-      for(int ai = 0; ai < (int)g->attributes.size() && ai < (int)gb.attribute_ssbos.size(); ai++)
-      {
-        const auto& req = g->attributes[ai];
-        const auto& ssbo = gb.attribute_ssbos[ai];
-        if(req.access == "none" || !ssbo.buffer)
-          continue;
-        int f = (req.access == "read_only") ? 1 : (req.access == "write_only") ? 2 : 3;
-        access_flags[ssbo.buffer] |= f;
-        if(req.access == "read_write" && ssbo.read_buffer && ssbo.read_buffer != ssbo.buffer)
-          access_flags[ssbo.read_buffer] |= 1;
-      }
-      for(const auto& aux : gb.auxiliary_ssbos)
-      {
-        if(!aux.buffer)
-          continue;
-        int f = (aux.access == "read_only") ? 1 : (aux.access == "write_only") ? 2 : 3;
-        access_flags[aux.buffer] |= f;
-        if(aux.read_buffer && aux.read_buffer != aux.buffer)
-          access_flags[aux.read_buffer] |= 1;
-      }
-    }
-    for(const auto& [buf, flags] : access_flags)
-      if(flags == 3)
-        aliased_buffers.insert(buf);
-  }
-
-  // Build the bindings list (same as in initComputePass)
-  QList<QRhiShaderResourceBinding> bindings;
-
-  // Binding 0: Renderer UBO
-  bindings.append(QRhiShaderResourceBinding::uniformBuffer(
-      0, QRhiShaderResourceBinding::ComputeStage, &renderer.outputUBO()));
-
-  // Binding 1: Process UBO (will be set per-pass)
-  bindings.append(
-      QRhiShaderResourceBinding::uniformBuffer(
-          1, QRhiShaderResourceBinding::ComputeStage, nullptr));
-
-  // Binding 2: Material UBO (custom inputs)
-  int bindingIndex = 2;
-  if(m_materialUBO)
-  {
-    bindings.append(QRhiShaderResourceBinding::uniformBuffer(
-        bindingIndex++, QRhiShaderResourceBinding::ComputeStage, m_materialUBO));
-  }
-
-  int input_port_index = 0;
-  int input_image_index = 0;
-  int output_port_index = 0;
-  int output_image_index = 0;
-  int geo_binding_index = 0;
-
-  // Process all resources in the order they appear in the descriptor
-  for(const auto& input : n.m_descriptor.inputs)
-  {
-    // Storage buffers
-    if(ossia::get_if<isf::storage_input>(&input.data))
-    {
-      // Find the corresponding storage buffer
-      auto it = std::find_if(m_storageBuffers.begin(), m_storageBuffers.end(),
-          [&input](const StorageBuffer& sb) {
-            return sb.name == QString::fromStdString(input.name);
-          });
-
-      if(it != m_storageBuffers.end() && it->buffer)
-      {
-        if(it->access == "read_only")
-        {
-          QRhiBuffer* buf = it->buffer; // Default dummy buffer
-          auto port = this->node.input[input_port_index];
-          if(!port->edges.empty())
-          {
-            auto input_buf = renderer.bufferForInput(*port->edges.front());
-            if(input_buf)
-            {
-              buf = input_buf.handle;
-            }
-          }
-          bindings.append(
-              QRhiShaderResourceBinding::bufferLoad(
-                  bindingIndex++, QRhiShaderResourceBinding::ComputeStage, buf));
-          input_port_index++;
-        }
-        else if(it->access == "write_only")
-        {
-          bindings.append(QRhiShaderResourceBinding::bufferStore(
-              bindingIndex++, QRhiShaderResourceBinding::ComputeStage,
-              it->buffer));
-          output_port_index++;
-        }
-        else // read_write
-        {
-          bindings.append(QRhiShaderResourceBinding::bufferLoadStore(
-              bindingIndex++, QRhiShaderResourceBinding::ComputeStage,
-              it->buffer));
-          output_port_index++;
-        }
-      }
-      else
-      {
-        bindingIndex++; // keep indices synchronized with shader layout
-      }
-    }
-    // Regular textures (sampled)
-    else if(ossia::get_if<isf::texture_input>(&input.data))
-    {
-      // Regular sampled textures from m_inputSamplers
-      if(input_image_index < m_inputSamplers.size())
-      {
-        auto [sampler, tex] = m_inputSamplers[input_image_index];
-        if(sampler && tex)
-        {
-          bindings.append(
-              QRhiShaderResourceBinding::sampledTexture(
-                  bindingIndex++, QRhiShaderResourceBinding::ComputeStage, tex, sampler));
-        }
-      }
-      input_port_index++;
-      input_image_index++;
-    }
-    // CSF storage images
-    else if(auto image = ossia::get_if<isf::csf_image_input>(&input.data))
-    {
-      // Find the corresponding storage image
-      auto it = std::find_if(m_storageImages.begin(), m_storageImages.end(),
-          [&input](const StorageImage& si) { 
-            return si.name == QString::fromStdString(input.name); 
-          });
-      
-      if(it != m_storageImages.end())
-      {
-        if(it->access == "read_only")
-        {
-          if(input_image_index < m_inputSamplers.size())
-          {
-            auto [sampler, tex] = m_inputSamplers[input_image_index];
-            if(sampler && tex)
-            {
-              bindings.append(
-                  QRhiShaderResourceBinding::imageLoad(
-                      bindingIndex++, QRhiShaderResourceBinding::ComputeStage, tex, 0));
-            }
-          }
-          input_port_index++;
-          input_image_index++;
-        }
-        else if(it->texture)
-        {
-          if(it->access == "write_only")
-          {
-            bindings.append(
-                QRhiShaderResourceBinding::imageStore(
-                    bindingIndex++, QRhiShaderResourceBinding::ComputeStage, it->texture,
-                    0));
-          }
-          else if(it->access == "read_write")
-          {
-            bindings.append(
-                QRhiShaderResourceBinding::imageLoadStore(
-                    bindingIndex++, QRhiShaderResourceBinding::ComputeStage, it->texture,
-                    0));
-          }
-          output_port_index++;
-          output_image_index++;
-        }
-        else
-        {
-          bindingIndex++; // keep indices synchronized with shader layout
-          output_port_index++;
-          output_image_index++;
-        }
-      }
-    }
-    // Geometry inputs: rebind per-attribute SSBOs
-    else if(auto* geo_input = ossia::get_if<isf::geometry_input>(&input.data))
-    {
-      if(geo_binding_index < (int)m_geometryBindings.size())
-      {
-        auto& binding = m_geometryBindings[geo_binding_index];
-
-        // Helper: emit a binding for buf with the given access mode, promoting
-        // to bufferLoadStore when the buffer is aliased across multiple bindings
-        // with conflicting accesses (avoids Vulkan validation warnings).
-        auto appendBufBinding = [&](QRhiBuffer* buf, const std::string& access)
-        {
-          const bool aliased = aliased_buffers.count(buf) > 0;
-          if(access == "read_write" || aliased)
-          {
-            bindings.append(QRhiShaderResourceBinding::bufferLoadStore(
-                bindingIndex++, QRhiShaderResourceBinding::ComputeStage, buf));
-          }
-          else if(access == "read_only")
-          {
-            bindings.append(QRhiShaderResourceBinding::bufferLoad(
-                bindingIndex++, QRhiShaderResourceBinding::ComputeStage, buf));
-          }
-          else // write_only
-          {
-            bindings.append(QRhiShaderResourceBinding::bufferStore(
-                bindingIndex++, QRhiShaderResourceBinding::ComputeStage, buf));
-          }
-        };
-
-        for(int attr_idx = 0; attr_idx < (int)geo_input->attributes.size(); attr_idx++)
-        {
-          if(attr_idx >= (int)binding.attribute_ssbos.size())
-            break;
-
-          const auto& req = geo_input->attributes[attr_idx];
-          auto& ssbo = binding.attribute_ssbos[attr_idx];
-
-          // "none" access: forwarded via COPY_FROM, no binding needed
-          if(req.access == "none")
-            continue;
-
-          if(!ssbo.buffer)
-          {
-            // Create a minimal fallback buffer so we don't skip a binding index
-            const int elem_size = glslTypeSizeBytes(req.type);
-            ssbo.buffer = rhi.newBuffer(
-                QRhiBuffer::Static,
-                QRhiBuffer::StorageBuffer | QRhiBuffer::VertexBuffer, elem_size);
-            qWarning() << "CSF ALLOC [geomFBFallback]" << req.name.c_str() << "size=" << elem_size;
-            ssbo.buffer->setName(QByteArray("CSF_GeomFB_") + req.name.c_str());
-            ssbo.buffer->create();
-            ssbo.size = elem_size;
-            ssbo.owned = true;
-          }
-
-          if(req.access == "read_only" || req.access == "write_only")
-          {
-            appendBufBinding(ssbo.buffer, req.access);
-          }
-          else // read_write -> 2 bindings: _in (readonly) + _out (read-write)
-          {
-            QRhiBuffer* read_buf = (ssbo.read_buffer && !binding.pending_initial_copy)
-                ? ssbo.read_buffer : ssbo.buffer;
-            if(read_buf == ssbo.buffer)
-            {
-              // Same physical buffer for both _in and _out (non-feedback in-place).
-              bindings.append(QRhiShaderResourceBinding::bufferLoadStore(
-                  bindingIndex++, QRhiShaderResourceBinding::ComputeStage, ssbo.buffer));
-              bindings.append(QRhiShaderResourceBinding::bufferLoadStore(
-                  bindingIndex++, QRhiShaderResourceBinding::ComputeStage, ssbo.buffer));
-            }
-            else
-            {
-              // Distinct buffers (feedback receiver): _in readonly, _out read-write
-              appendBufBinding(read_buf, "read_only");
-              bindings.append(QRhiShaderResourceBinding::bufferLoadStore(
-                  bindingIndex++, QRhiShaderResourceBinding::ComputeStage, ssbo.buffer));
-            }
-          }
-        }
-
-        // Auxiliary SSBOs for this geometry input
-        for(auto& aux : binding.auxiliary_ssbos)
-        {
-          if(!aux.buffer)
-          {
-            // Create a minimal fallback buffer so we don't skip a binding index
-            aux.buffer = rhi.newBuffer(
-                QRhiBuffer::Static, QRhiBuffer::StorageBuffer, 16);
-            qWarning() << "CSF ALLOC [auxFBFallback]" << aux.name.c_str() << "size=16";
-            aux.buffer->setName(QByteArray("CSF_AuxFB_") + aux.name.c_str());
-            aux.buffer->create();
-            aux.size = 16;
-            aux.owned = true;
-          }
-
-          appendBufBinding(aux.buffer, aux.access);
-        }
+uint64_t hashBindings(const QList<QRhiShaderResourceBinding>& bindings) noexcept
+{
+  std::vector<size_t> per;
+  per.reserve(bindings.size());
+  size_t i = 0;
+  for(const auto& b : bindings)
+    per.push_back(qHash(b, /*seed=*/i++));
+  return ossia::hash_bytes(per.data(), per.size() * sizeof(size_t));
+}
+} // namespace
 
-#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0)
-        // Rebind indirect draw buffer
-        if(binding.uses_indirect_draw && binding.indirectDrawBuffer)
-        {
-          bindings.append(QRhiShaderResourceBinding::bufferLoadStore(
-              bindingIndex++, QRhiShaderResourceBinding::ComputeStage,
-              binding.indirectDrawBuffer));
-        }
-#endif
+void RenderedCSFNode::recreateShaderResourceBindings(RenderList& renderer, QRhiResourceUpdateBatch& res)
+{
+  QRhi& rhi = *renderer.state.rhi;
 
-        geo_binding_index++;
-      }
-      // Inlet port if any attribute reads from upstream
-      for(const auto& attr : geo_input->attributes)
-        if(attr.access == "read_only" || attr.access == "read_write") { input_port_index++; break; }
-      // Skip $USER ports for this geometry input
-      if(geo_input->vertex_count.find("$USER") != std::string::npos) input_port_index++;
-      if(geo_input->instance_count.find("$USER") != std::string::npos) input_port_index++;
-      for(const auto& aux : geo_input->auxiliary)
-        if(aux.size.find("$USER") != std::string::npos) input_port_index++;
-    }
-    else
+  // Single source of truth for the bindings list (also used by
+  // initComputeSRBAndPasses — see buildComputeSrbBindings). Geometry bindings
+  // are assumed up-to-date here: the caller (update()) runs
+  // updateGeometryBindings before calling this function.
+  QList<QRhiShaderResourceBinding> bindings;
+  buildComputeSrbBindings(renderer, res, bindings);
+
+  // Recreate SRBs for each compute pass — but only when the per-pass
+  // binding list actually changed. Hash the bindings (post per-pass
+  // ProcessUBO patch) and compare to the cached hash from the previous
+  // frame: identical → skip the destroy+setBindings+create cycle, which
+  // would otherwise thrash the QRhi SRB pool slot every frame on a
+  // static scene.
+  for(auto& [edge, pass] : m_computePasses)
+  {
+    // Set the ProcessUBO binding for this pass — must happen BEFORE
+    // hashing so a change in pass.processUBO triggers a rebuild.
+    if(pass.processUBO)
     {
-      input_port_index++;
+      bindings[1] = QRhiShaderResourceBinding::uniformBuffer(
+          1, QRhiShaderResourceBinding::ComputeStage, pass.processUBO);
     }
-  }
 
-  // Recreate SRBs for each compute pass
-  for(auto& [edge, pass] : m_computePasses)
-  {
+    const uint64_t newHash = hashBindings(bindings);
+    if(pass.srb && pass.srbBindingsHash == newHash && newHash != 0)
+      continue; // bindings unchanged from last frame
+
     if(pass.srb)
     {
       // Delete old SRB
@@ -3565,25 +4284,20 @@ void RenderedCSFNode::recreateShaderResourceBindings(RenderList& renderer, QRhiR
     {
       // Create new SRB
       pass.srb = rhi.newShaderResourceBindings();
-      qWarning() << "CSF ALLOC [recreateSRB] new SRB for pass";
     }
 
-    // Set the ProcessUBO binding for this pass
-    if(pass.processUBO)
-    {
-      bindings[1] = QRhiShaderResourceBinding::uniformBuffer(
-          1, QRhiShaderResourceBinding::ComputeStage, pass.processUBO);
-    }
-    
     pass.srb->setBindings(bindings.cbegin(), bindings.cend());
     if(!pass.srb->create())
     {
       qWarning() << "Failed to recreate SRB for compute pass";
       delete pass.srb;
       pass.srb = nullptr;
+      pass.srbBindingsHash = 0;
+      continue;
     }
+    pass.srbBindingsHash = newHash;
   }
-  
+
   // Update the pipeline with one of the SRBs (they're all compatible)
   if(!m_computePasses.empty() && m_computePasses[0].second.srb)
   {
@@ -3593,111 +4307,7 @@ void RenderedCSFNode::recreateShaderResourceBindings(RenderList& renderer, QRhiR
 
 void RenderedCSFNode::release(RenderList& r)
 {
-  // Clean up compute passes
-  for(auto& [edge, pass] : m_computePasses)
-  {
-    delete pass.srb;
-    if(pass.processUBO)
-    {
-      pass.processUBO->deleteLater();
-    }
-  }
-  m_computePasses.clear();
-  
-  // Clean up graphics passes
-  for(auto& [edge, pass] : m_graphicsPasses)
-  {
-    pass.pipeline.release();
-    delete pass.outputSampler;
-  }
-  m_graphicsPasses.clear();
-  
-  // Clean up pipelines (m_ownedPipelines has unique entries, m_perPassPipelines may have duplicates)
-  for(auto* pip : m_ownedPipelines)
-    delete pip;
-  m_ownedPipelines.clear();
-  m_perPassPipelines.clear();
-  m_computePipeline = nullptr;
-  
-  // Clean up storage buffers
-  for(auto& storageBuffer : m_storageBuffers)
-  {
-    if(storageBuffer.owned)
-      r.releaseBuffer(storageBuffer.buffer);
-  }
-  m_storageBuffers.clear();
-
-  // Clean up GPU scatter
-  m_gpuScatter.release();
-  m_gpuScatterAvailable = false;
-
-  // Clean up geometry bindings
-  for(auto& binding : m_geometryBindings)
-  {
-    for(auto& ssbo : binding.attribute_ssbos)
-    {
-      if(ssbo.read_buffer)
-      {
-        r.releaseBuffer(ssbo.read_buffer);
-        ssbo.read_buffer = nullptr;
-      }
-      if(ssbo.owned && ssbo.buffer)
-      {
-        r.releaseBuffer(ssbo.buffer);
-      }
-      ssbo.buffer = nullptr;
-      delete ssbo.scatterStaging;
-      ssbo.scatterStaging = nullptr;
-      delete ssbo.scatterOp.srb;
-      ssbo.scatterOp.srb = nullptr;
-      delete ssbo.scatterOp.paramsUBO;
-      ssbo.scatterOp.paramsUBO = nullptr;
-    }
-    for(auto& aux : binding.auxiliary_ssbos)
-    {
-      if(aux.owned && aux.buffer)
-      {
-        r.releaseBuffer(aux.buffer);
-      }
-      aux.buffer = nullptr;
-    }
-    for(auto* buf : binding.copyFromBuffers)
-      r.releaseBuffer(buf);
-    binding.copyFromBuffers.clear();
-#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0)
-    if(binding.indirectDrawBuffer)
-    {
-      r.releaseBuffer(binding.indirectDrawBuffer);
-      binding.indirectDrawBuffer = nullptr;
-    }
-#endif
-  }
-  m_geometryBindings.clear();
-  
-  // Clean up storage images
-  for(auto& storageImage : m_storageImages)
-  {
-    if(storageImage.texture)
-    {
-      storageImage.texture->deleteLater();
-    }
-  }
-  m_storageImages.clear();
-  m_outStorageImages.clear();
-  m_outStorageBuffers.clear();
-  m_outputTexture = nullptr;
-
-  // Clean up buffers and textures
-  delete m_materialUBO;
-  m_materialUBO = nullptr;
-
-  // Clean up samplers
-  for(auto sampler : m_inputSamplers)
-  {
-    delete sampler.sampler;
-    // texture isdeleted elsewhere
-  }
-  m_inputSamplers.clear();
+  releaseState(r);
 }
 
 void RenderedCSFNode::runRenderPass(
@@ -3730,6 +4340,14 @@ void RenderedCSFNode::runInitialPasses(
     RenderList& renderer, QRhiCommandBuffer& commands, QRhiResourceUpdateBatch*& res,
     Edge& edge)
 {
+  // Plan 09 S6: debug marker for capture-tool readability.
+  commands.debugMarkBegin(QByteArrayLiteral("CSF"));
+  struct MarkEnd
+  {
+    QRhiCommandBuffer* c;
+    ~MarkEnd() { c->debugMarkEnd(); }
+  } _me{&commands};
+
   // Dispatch pending GPU scatter operations (format conversion) before user passes.
   // These convert raw CPU data (e.g. float3) uploaded to staging SSBOs into the
   // format expected by the CSF shader (e.g. vec4), entirely on the GPU.
@@ -3789,24 +4407,11 @@ void RenderedCSFNode::runInitialPasses(
     
     const auto& pass = m_computePasses[passIndex].second;
 
-    // Begin compute pass with ExternalContent flag so we can insert
-    // native memory barriers between dispatches via beginExternal/endExternal.
-    commands.beginComputePass(res, QRhiCommandBuffer::BeginPassFlag::ExternalContent);
-    res = nullptr;
-
-    // Set compute pipeline
-    commands.setComputePipeline(pass.pipeline);
-    
-    // Set shader resources
-    commands.setShaderResources(pass.srb);
-    
-    // Calculate dispatch size based on pass configuration
-    
     // Use pass-specific local sizes
     int localX = passDesc.local_size[0];
     int localY = passDesc.local_size[1];
     int localZ = passDesc.local_size[2];
-    
+
     int dispatchX{}, dispatchY{}, dispatchZ{};
 
     // Resolve per-axis stride expressions
@@ -3814,22 +4419,62 @@ void RenderedCSFNode::runInitialPasses(
     const int strideY = resolveDispatchExpression(passDesc.stride[1]);
     const int strideZ = resolveDispatchExpression(passDesc.stride[2]);
 
+    // Resolve the texture that drives 2D_IMAGE / 3D_IMAGE dispatch sizing.
+    // Priority: pass's explicit TARGET (matches by name against both storage
+    // images and input samplers) → m_outputTexture fallback.
+    auto resolveDispatchTexture
+        = [&]() -> QRhiTexture* {
+      const auto& target = passDesc.target_resource;
+      if(!target.empty())
+      {
+        const QString qtarget = QString::fromStdString(target);
+        for(const auto& si : m_storageImages)
+          if(si.name == qtarget && si.texture)
+            return si.texture;
+
+        // INPUTS entry: walk descriptor.inputs looking for a named image/texture
+        // input and map it to the corresponding sampled texture.
+        const auto& desc = n.descriptor();
+        int input_image_index = 0;
+        for(const auto& inp : desc.inputs)
+        {
+          const bool is_texture = ossia::get_if<isf::texture_input>(&inp.data);
+          const auto* ci = ossia::get_if<isf::csf_image_input>(&inp.data);
+          const bool is_img_sampled = ci && ci->access == "read_only";
+          if(is_texture || is_img_sampled)
+          {
+            if(inp.name == target
+               && input_image_index < (int)m_inputSamplers.size()
+               && m_inputSamplers[input_image_index].texture)
+              return m_inputSamplers[input_image_index].texture;
+            input_image_index++;
+          }
+          else if(ossia::get_if<isf::image_input>(&inp.data))
+          {
+            // ISF image_input is also bound as a sampler
+            input_image_index++;
+          }
+        }
+      }
+      return m_outputTexture;
+    };
+
     // Calculate dispatch size based on execution model
     if(passDesc.execution_type == "2D_IMAGE")
     {
-      // For 2D image execution, dispatch based on image size, workgroup size and stride
-      QSize textureSize = m_outputTexture ? m_outputTexture->pixelSize() : QSize(1280, 720);
+      QRhiTexture* tex = resolveDispatchTexture();
+      QSize textureSize = tex ? tex->pixelSize() : QSize(1280, 720);
       dispatchX = (textureSize.width() + localX * strideX - 1) / (localX * strideX);
       dispatchY = (textureSize.height() + localY * strideY - 1) / (localY * strideY);
       dispatchZ = 1;
     }
     else if(passDesc.execution_type == "3D_IMAGE")
     {
-      // For 3D image execution, dispatch based on volume dimensions and strides
-      if(m_outputTexture)
+      QRhiTexture* tex = resolveDispatchTexture();
+      if(tex)
       {
-        QSize sz = m_outputTexture->pixelSize();
-        int depth = m_outputTexture->depth();
+        QSize sz = tex->pixelSize();
+        int depth = std::max(1, tex->depth());
         dispatchX = (sz.width() + localX * strideX - 1) / (localX * strideX);
         dispatchY = (sz.height() + localY * strideY - 1) / (localY * strideY);
         dispatchZ = (depth + localZ * strideZ - 1) / (localZ * strideZ);
@@ -3873,48 +4518,143 @@ void RenderedCSFNode::runInitialPasses(
     {
       int n = 1;
 
-      if(passDesc.execution_type == "PER_VERTEX")
+      if(passDesc.execution_type == "PER_VERTEX"
+         || passDesc.execution_type == "PER_INSTANCE")
       {
-        // Dispatch one thread per vertex in the target geometry
-        for(const auto& geo_bind : m_geometryBindings)
+        const bool per_instance = (passDesc.execution_type == "PER_INSTANCE");
+        const std::string& tgt = passDesc.target_resource;
+        auto count_of = [per_instance](const auto& b) {
+          return per_instance ? b.instance_count : b.vertex_count;
+        };
+
+        // Recommended: TARGET names the geometry resource explicitly.
+        // Order-independent and self-documenting; should be set on every
+        // bundled preset (presets without it fall through to the legacy
+        // first-binding-with-positive-count form below).
+        bool resolved = false;
+        if(!tgt.empty())
         {
-          if(geo_bind.vertex_count > 0)
+          for(const auto& geo_bind : m_geometryBindings)
           {
-            n = geo_bind.vertex_count;
-            break;
+            if(geo_bind.input_name == tgt)
+            {
+              const int c = count_of(geo_bind);
+              if(c > 0)
+              {
+                n = c;
+                resolved = true;
+              }
+              break;
+            }
+          }
+          if(!resolved)
+          {
+            qWarning() << "CSF" << passDesc.execution_type.c_str()
+                       << "TARGET" << tgt.c_str()
+                       << "not found among geometry bindings, or has zero"
+                       << (per_instance ? "instance_count" : "vertex_count");
           }
         }
-      }
-      else if(passDesc.execution_type == "PER_INSTANCE")
-      {
-        // Dispatch one thread per instance in the target geometry
-        for(const auto& geo_bind : m_geometryBindings)
+
+        // Legacy / TARGET-less fallback: first binding with count > 0.
+        if(!resolved)
         {
-          if(geo_bind.instance_count > 0)
+          for(const auto& geo_bind : m_geometryBindings)
           {
-            n = geo_bind.instance_count;
-            break;
+            const int c = count_of(geo_bind);
+            if(c > 0)
+            {
+              n = c;
+              break;
+            }
           }
         }
       }
       else
       {
-        // 1D_BUFFER: try storage buffer size first, then geometry element count
-        for(auto& [port, index] : this->m_outStorageBuffers) {
-          if(port == edge.source) {
-            n = this->m_storageBuffers[index].size;
-            break;
+        // 1D_BUFFER resolution has three forms, chosen by what the shader
+        // author wrote as TARGET:
+        //
+        //   TARGET = "$expression" or "literal * literal" or "literal":
+        //     Treat as an expression. Evaluate through the common resolver
+        //     (same variables as SIZE / WIDTH / HEIGHT / STRIDE_*, including
+        //     the new $COUNT_<buffer> / $BYTESIZE_<buffer> surface). The
+        //     result is the total thread count `n`, which the spreading
+        //     logic below distributes across x/y/z workgroups — behaves
+        //     like MANUAL but without making the user pick an axis split.
+        //
+        //   TARGET = "bufferName" (a bare identifier, legacy form):
+        //     Dispatch over the buffer's element count. Equivalent to
+        //     "$COUNT_bufferName" but kept as shorthand and for backward
+        //     compatibility with any existing score that wrote a plain
+        //     buffer name.
+        //
+        //   TARGET empty (no TARGET key in JSON, or empty string):
+        //     Fall back to the legacy behaviour — size by the output
+        //     storage buffer matching the current edge (in BYTES, which
+        //     is a long-standing quirk: dispatches over raw bytes rather
+        //     than elements), then by the first geometry's vertex_count.
+        //     Left unchanged so existing scores without explicit TARGET
+        //     still dispatch the same as before.
+        const std::string& target = passDesc.target_resource;
+
+        auto looks_like_expression = [&]() -> bool {
+          if(target.empty())
+            return false;
+          for(char c : target)
+          {
+            if(c == '$' || c == '+' || c == '-' || c == '*' || c == '/'
+               || c == '%' || c == '(' || c == ')')
+              return true;
           }
-        }
+          // Pure integer literal counts as an expression (evaluator's
+          // fast-path handles it). Anything else that's a valid identifier
+          // character stream is treated as a bare buffer name.
+          bool all_numeric = !target.empty();
+          for(char c : target)
+          {
+            if(!std::isdigit((unsigned char)c)
+               && !std::isspace((unsigned char)c))
+            {
+              all_numeric = false;
+              break;
+            }
+          }
+          return all_numeric;
+        };
 
-        if(n <= 1)
+        if(looks_like_expression())
         {
-          for(const auto& geo_bind : m_geometryBindings)
+          n = resolveDispatchExpression(target);
+        }
+        else if(!target.empty())
+        {
+          // Bare buffer name → resolve as "$COUNT_<name>". The common
+          // resolver will look it up in m_storageBuffers / auxiliary_ssbos
+          // and return the element count. Falls back to 1 on miss.
+          const std::string count_expr = "$COUNT_" + target;
+          n = resolveDispatchExpression(count_expr);
+        }
+        else
+        {
+          // Legacy empty-TARGET fallback — preserved verbatim for
+          // compatibility with existing scores.
+          for(auto& [port, index] : this->m_outStorageBuffers) {
+            if(port == edge.source) {
+              n = this->m_storageBuffers[index].size;
+              break;
+            }
+          }
+
+          if(n <= 1)
           {
-            if(geo_bind.vertex_count > 0)
+            for(const auto& geo_bind : m_geometryBindings)
             {
-              n = geo_bind.vertex_count;
-              break;
+              if(geo_bind.vertex_count > 0)
+              {
+                n = geo_bind.vertex_count;
+                break;
+              }
             }
           }
         }
@@ -3928,8 +4668,14 @@ void RenderedCSFNode::runInitialPasses(
 
       if(totalWorkgroups > maxWorkgroups * maxWorkgroups * maxWorkgroups)
       {
-        commands.endComputePass();
-        return;
+        // Workgroup count overflow: skip THIS pass only. We haven't yet
+        // opened a compute pass at this point (the begin/end for this
+        // dispatch is now hoisted *after* the size calculation), so
+        // there is nothing to close — continue to the next pass. Using
+        // `return` here aborted every remaining pass and desynced the
+        // ping-pong buffer swaps; mirror the dispatch(0,0,0) guard below
+        // which already uses `continue`.
+        continue;
       }
       if(totalWorkgroups > maxWorkgroups * maxWorkgroups)
       {
@@ -3960,24 +4706,46 @@ void RenderedCSFNode::runInitialPasses(
       dispatchZ = 1;
     }
 
-    // Guard against dispatch(0,0,0) which is invalid per Vulkan spec
+    // Guard against dispatch(0,0,0) which is invalid per Vulkan spec.
+    // Pass not yet opened, so we just skip without closing anything.
     if(dispatchX <= 0 || dispatchY <= 0 || dispatchZ <= 0)
-    {
-      commands.endComputePass();
       continue;
-    }
 
-    // Dispatch compute shader
-    commands.dispatch(dispatchX, dispatchY, dispatchZ);
+    // Publish the workgroup count to the per-pass ProcessUBO so the
+    // shader can read gl_NumWorkGroups via the libisf-injected
+    // uniform alias. SPIRV-Cross's HLSL backend cannot emit code for
+    // the GLSL NumWorkgroups built-in directly (D3D11/D3D12 bake fails
+    // outright), so this routing is what makes compute shaders that
+    // reference gl_NumWorkGroups portable across all backends.
+    //
+    // Must happen before beginComputePass — updateDynamicBuffer is
+    // applied as part of the resource update batch that beginComputePass
+    // consumes; mid-pass updates are not allowed.
+    if(pass.processUBO)
+    {
+      if(!res)
+        res = renderer.state.rhi->nextResourceUpdateBatch();
+      n.standardUBO.passIndex = static_cast<int32_t>(passIndex);
+      n.standardUBO.numWorkgroups[0] = static_cast<uint32_t>(dispatchX);
+      n.standardUBO.numWorkgroups[1] = static_cast<uint32_t>(dispatchY);
+      n.standardUBO.numWorkgroups[2] = static_cast<uint32_t>(dispatchZ);
+      res->updateDynamicBuffer(
+          pass.processUBO, 0, sizeof(ProcessUBO), &n.standardUBO);
+    }
 
-    // End compute pass
+    // Each CSF pass issues exactly ONE dispatch in its own begin/endComputePass.
+    // QRhi automatically inserts the compute→compute memory barrier between
+    // consecutive passes that touch the same SSBO/image, so the previous
+    // per-pass ExternalContent flag + native barrier was redundant here — and
+    // ExternalContent needlessly forced Vulkan secondary command buffers. The
+    // native-barrier path stays for the genuinely multi-dispatch scatter loop
+    // (above), which issues several dispatches inside a single pass.
+    commands.beginComputePass(res);
+    res = nullptr;
 
-    // Insert a compute→compute memory barrier so that SSBO writes from
-    // this dispatch are visible to the next dispatch. QRhi does not
-    // insert these automatically between consecutive compute passes.
-    commands.beginExternal();
-    insertComputeBarrier(*renderer.state.rhi, commands);
-    commands.endExternal();
+    commands.setComputePipeline(pass.pipeline);
+    commands.setShaderResources(pass.srb);
+    commands.dispatch(dispatchX, dispatchY, dispatchZ);
 
     commands.endComputePass();
   }
@@ -4017,9 +4785,136 @@ void RenderedCSFNode::runInitialPasses(
           if(geo_input->attributes[ai].access == "read_write" && ssbo.read_buffer)
             std::swap(ssbo.buffer, ssbo.read_buffer);
         }
+        for(auto& aux : gb.auxiliary_ssbos)
+        {
+          if(aux.access == "read_write" && aux.read_buffer)
+            std::swap(aux.buffer, aux.read_buffer);
+        }
       }
       gb_idx++;
     }
   }
+
+  // Ping-pong swap for persistent storage images: the primary binding
+  // holds the current-frame target, the `_prev` binding reads the
+  // previous frame's data. After the frame renders, swap pointers so the
+  // next frame reads what we just wrote, and patch every compute SRB
+  // that holds these bindings via the indices recorded at build time.
+  {
+    bool any_swap = false;
+    for(auto& si : m_storageImages)
+    {
+      if(!si.persistent || !si.texture || !si.read_texture)
+        continue;
+      std::swap(si.texture, si.read_texture);
+      si.pending_initial_copy = false;
+      any_swap = true;
+    }
+    if(any_swap)
+    {
+      for(auto& [e, cp] : m_computePasses)
+      {
+        if(!cp.srb)
+          continue;
+        for(const auto& si : m_storageImages)
+        {
+          if(!si.persistent)
+            continue;
+          if(si.binding >= 0 && si.texture)
+            score::gfx::replaceTexture(*cp.srb, si.binding, si.texture);
+          if(si.prev_binding >= 0 && si.read_texture)
+            score::gfx::replaceTexture(*cp.srb, si.prev_binding, si.read_texture);
+        }
+        // No trailing create() — replaceTexture's updateResources() fast
+        // path already refreshes the backend descriptor state.
+      }
+
+      // Diagnostic 014: graphics passes that visualize the persistent
+      // image bake the pre-swap `si.texture` pointer at construction time
+      // (createGraphicsPass calls textureForOutput for the edge's source
+      // port). After ping-pong, that bound handle now identifies the
+      // stale-frame slot. Patch every graphics SRB so it samples the
+      // post-swap writable target — i.e. what the next compute dispatch
+      // will write into and what we want to display.
+      for(auto& [e, gp] : m_graphicsPasses)
+      {
+        if(!gp.pipeline.srb || !gp.outputSampler)
+          continue;
+        // Resolve which storage image this graphics pass shows. Mirrors
+        // textureForOutput(): first the per-port mapping in
+        // m_outStorageImages, otherwise the m_outputTexture fallback.
+        QRhiTexture* newTex = nullptr;
+        for(const auto& [port, index] : m_outStorageImages)
+        {
+          if(port == e->source && index < (int)m_storageImages.size())
+          {
+            const auto& si = m_storageImages[index];
+            if(si.persistent)
+              newTex = si.texture;
+            break;
+          }
+        }
+        if(!newTex)
+        {
+          // Fallback path — graphics pass uses m_outputTexture. Find the
+          // persistent entry whose post-swap read_texture equals the
+          // pre-swap m_outputTexture (= what the SRB currently binds).
+          for(const auto& si : m_storageImages)
+          {
+            if(si.persistent && si.read_texture == m_outputTexture)
+            {
+              newTex = si.texture;
+              break;
+            }
+          }
+        }
+        if(newTex)
+          score::gfx::replaceTexture(*gp.pipeline.srb, gp.outputSampler, newTex);
+      }
+
+      // Diagnostic 014: m_outputTexture is the fallback returned by
+      // textureForOutput()/resolveDispatchTexture() for default-port
+      // queries. It was captured from the first persistent storage
+      // image's primary `texture` at build time; after the swap that
+      // pointer is the stale-frame slot. Identify the entry whose
+      // post-swap read_texture (= pre-swap texture) matches the cached
+      // m_outputTexture and refresh it to the new writable target.
+      if(m_outputTexture)
+      {
+        for(const auto& si : m_storageImages)
+        {
+          if(si.persistent && si.read_texture == m_outputTexture && si.texture)
+          {
+            m_outputTexture = si.texture;
+            break;
+          }
+        }
+      }
+    }
+  }
+
+  // GENERATE_MIPS: regenerate the mip chain so downstream samplers with a
+  // mipmap filter see a valid level > 0. Queued on the same per-frame
+  // resource-update batch as the rest of update()'s work — same pattern
+  // used for input samplers above at `res.generateMips(texture)`.
+  //
+  // Gated on FRAMEINDEX > 0: the textures are created with layout
+  // PREINITIALIZED and Qt RHI's GenMips path transitions FROM a transfer
+  // layout BACK to whatever the texture was stored as. Calling generateMips
+  // before the compute pass has actually written the image at least once
+  // leaves it in PREINITIALIZED, which trips VUID-VkImageMemoryBarrier-
+  // newLayout-01198. After one frame the compute dispatch has transitioned
+  // the image to GENERAL and generateMips is safe.
+  if(n.standardUBO.frameIndex > 0u)
+  {
+    for(const auto& si : m_storageImages)
+    {
+      if(!si.generate_mips || !si.texture)
+        continue;
+      if(!(si.texture->flags() & QRhiTexture::MipMapped))
+        continue;
+      res->generateMips(si.texture);
+    }
+  }
 }
 }
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedCSFNode.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedCSFNode.hpp
index b89c4c873b..d7953a0c0e 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedCSFNode.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedCSFNode.hpp
@@ -17,13 +17,23 @@ struct RenderedCSFNode : score::gfx::NodeRenderer
 
   virtual ~RenderedCSFNode();
 
-  void updateInputTexture(const Port& input, QRhiTexture* tex) override;
+  void updateInputTexture(const Port& input, QRhiTexture* tex, QRhiTexture* depthTex = nullptr) override;
   QRhiTexture* textureForOutput(const Port& output) override;
 
   void init(RenderList& renderer, QRhiResourceUpdateBatch& res) override;
   void update(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) override;
   void release(RenderList& r) override;
 
+  void initState(RenderList& renderer, QRhiResourceUpdateBatch& res) override;
+  void releaseState(RenderList& renderer) override;
+  void addOutputPass(
+      RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) override;
+  void removeOutputPass(RenderList& renderer, Edge& edge) override;
+  bool hasOutputPassForEdge(Edge& edge) const override;
+  void
+  addInputEdge(RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) override;
+  void removeInputEdge(RenderList& renderer, Edge& edge) override;
+
   void runInitialPasses(
       RenderList&, QRhiCommandBuffer& commands, QRhiResourceUpdateBatch*& res,
       Edge& edge) override;
@@ -31,7 +41,7 @@ struct RenderedCSFNode : score::gfx::NodeRenderer
   void runRenderPass(RenderList&, QRhiCommandBuffer& commands, Edge& edge) override;
 
 private:
-  void initComputePass(const TextureRenderTarget& rt, RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res);
+  void initComputeSRBAndPasses(RenderList& renderer, QRhiResourceUpdateBatch& res);
   void createComputePipeline(RenderList& renderer);
   void createGraphicsPass(const TextureRenderTarget& rt, RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res);
   void updateDescriptorSet(RenderList& renderer, Edge& edge);
@@ -51,11 +61,24 @@ struct RenderedCSFNode : score::gfx::NodeRenderer
       RenderList& renderer, const QString& name, const QString& access, int size);
   void updateStorageBuffers(RenderList& renderer, QRhiResourceUpdateBatch& res);
   void recreateShaderResourceBindings(RenderList& renderer, QRhiResourceUpdateBatch& res);
+
+  // Single source of truth for the CSF compute SRB binding list. Walks the
+  // descriptor's INPUTS / RESOURCES / AUXILIARIES in order and emits one
+  // QRhiShaderResourceBinding per shader binding slot. Both
+  // initComputeSRBAndPasses (init path) and recreateShaderResourceBindings
+  // (re-emit path) call this so the two paths can never drift in their
+  // emission order, indices, or fallback-on-missing-resource policy.
+  // Binding 1 (ProcessUBO) is left as a nullptr placeholder; each caller
+  // patches it per-pass. Output: appended to `bindings`.
+  void buildComputeSrbBindings(
+      RenderList& renderer, QRhiResourceUpdateBatch& res,
+      QList<QRhiShaderResourceBinding>& bindings);
   int getArraySizeFromUI(const QString& bufferName) const;
   QString updateShaderWithImageFormats(QString current);
 
   // Geometry buffer management
   void updateGeometryBindings(RenderList& renderer, QRhiResourceUpdateBatch& res);
+
   void pushOutputGeometry(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge& edge);
   int resolveCountExpression(
       const std::string& expr, const isf::geometry_input& geo,
@@ -69,6 +92,12 @@ struct RenderedCSFNode : score::gfx::NodeRenderer
     QRhiComputePipeline* pipeline{};
     QRhiShaderResourceBindings* srb{};
     QRhiBuffer* processUBO{};
+    // Hash of the last bindings vector applied to `srb`. Compared in
+    // recreateShaderResourceBindings to skip a destroy+setBindings+
+    // create cycle when the bindings haven't actually changed since the
+    // previous frame. 0 = "never built / unknown" — first call always
+    // rebuilds. See RenderedCSFNode.cpp recreateShaderResourceBindings.
+    size_t srbBindingsHash{0};
   };
 
   struct GraphicsPass
@@ -106,9 +135,21 @@ struct RenderedCSFNode : score::gfx::NodeRenderer
   struct StorageImage
   {
     QRhiTexture* texture{};
+    QRhiTexture* read_texture{}; //!< Previous-frame slot, only when persistent
     QString name;
     QString access; // "read_only", "write_only", "read_write"
     QRhiTexture::Format format{QRhiTexture::RGBA8};
+    bool is3D{false};
+    bool isCube{false};           //!< Writable cubemap (imageCube)
+    bool persistent{false};       //!< Ping-pong this image across frames
+    bool pending_initial_copy{false}; //!< First frame: _prev reads from `texture` too
+    bool generate_mips{false};    //!< Run QRhi::generateMips after compute passes
+
+    // Recorded binding slots in the compute SRB so that end-of-frame
+    // swapping can call replaceTexture() without having to re-walk the
+    // descriptor layout.
+    int binding{-1};
+    int prev_binding{-1};
   };
   std::vector<StorageImage> m_storageImages;
 
@@ -138,22 +179,55 @@ struct RenderedCSFNode : score::gfx::NodeRenderer
       bool scatterPending{false};        // true = needs dispatch this frame
     };
 
-    // Structured SSBOs that travel with the geometry (matched by name
-    // against ossia::geometry::auxiliary_buffer entries).
+    // Structured SSBOs (or UBOs) that travel with the geometry (matched
+    // by name against ossia::geometry::auxiliary_buffer entries). The
+    // `is_uniform` flag mirrors the AUXILIARY request's kind: when true,
+    // the buffer is bound as a std140 uniform block via
+    // QRhiShaderResourceBinding::uniformBuffer; when false, as an std430
+    // SSBO via bufferLoad / bufferStore / bufferLoadStore.
     struct AuxiliarySSBO
     {
-      QRhiBuffer* buffer{};       // GPU SSBO (write target / primary)
+      QRhiBuffer* buffer{};       // GPU SSBO/UBO (write target / primary)
       QRhiBuffer* read_buffer{};  // Separate read buffer for ping-pong (nullptr = use buffer for both)
       int64_t size{};
       bool owned{true};
+      bool is_uniform{false};     // true = std140 UBO, false = std430 SSBO
       std::string name;
       std::string access;
       std::vector<isf::storage_input::layout_field> layout;
       std::string size_expr; // expression for flexible array count, may contain $USER
     };
 
+    // Auxiliary textures that travel with the geometry (resolved from
+    // ossia::geometry::auxiliary_textures by name). Either sampled
+    // (sampler*) or storage-image (image*). Shape-matched placeholder
+    // used as fallback when no match exists on the incoming geometry.
+    struct AuxiliaryTexture
+    {
+      QRhiSampler* sampler{};   // null for storage-image entries
+      QRhiTexture* texture{};   // current bound handle (placeholder or upstream)
+      QRhiTexture* placeholder{}; // shape-matched empty from RenderList
+      std::string name;
+      int binding{-1};          // assigned at SRB build
+      bool is_storage{false};
+      std::string access;       // "read_only" / "write_only" / "read_write"
+
+      // True when this binding allocated `texture` itself (write_only /
+      // read_write storage image declared as a nested aux on a geometry
+      // input — same lifecycle role as m_storageImages plays for top-
+      // level csf_image_input outputs). Owned textures:
+      //   - skip the per-frame upstream-resolution overwrite (we own
+      //     the data, no upstream contributes);
+      //   - get pushed into out_geo.auxiliary_textures by name so
+      //     downstream consumers can resolve the live handle;
+      //   - get deleted on release().
+      bool owned{false};
+    };
+
     std::vector<AttributeSSBO> attribute_ssbos;
     std::vector<AuxiliarySSBO> auxiliary_ssbos;
+    std::vector<AuxiliaryTexture> auxiliary_textures;
+    std::string input_name;    // RESOURCES[].NAME (e.g. "geoIn", "geoOut") — used by PER_VERTEX/PER_INSTANCE TARGET filtering
     int vertex_count{0};       // Number of elements (vertices) in the geometry
     int instance_count{1};      // Number of instances
     int input_port_index{-1};   // Input port index for this binding (-1 = no input port, e.g. write_only generator)
@@ -175,11 +249,11 @@ struct RenderedCSFNode : score::gfx::NodeRenderer
     int prev_attribute_count{-1};
     int prev_upstream_attr_count{-1};
 
-#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0)
-    QRhiBuffer* indirectDrawBuffer{};   // StorageBuffer | IndirectBuffer for GPU-driven draw args
-    bool uses_indirect_draw{false};     // true when geometry_input has INDIRECT_DRAW: true
-    bool indirect_draw_indexed{false};  // true for drawIndexedIndirect, false for drawIndirect
-#endif
+    QRhiBuffer* indirectBuffer{};       // StorageBuffer (+ IndirectBuffer on Qt 6.12+)
+    int64_t indirectBufferSize{};
+    int indirectCountResult{0};         // Resolved command count
+    std::string indirectCountExpr;      // Expression string for dynamic re-resolve
+    bool uses_indirect_draw{false};
   };
   std::vector<GeometryBinding> m_geometryBindings;
 
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedISFNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedISFNode.cpp
index 327f4a9ff0..fb1b064968 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedISFNode.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedISFNode.cpp
@@ -1,3 +1,5 @@
+#include <Gfx/Graph/PipelineStateHelpers.hpp>
+#include <Gfx/Graph/RenderList.hpp>
 #include <Gfx/Graph/RenderedISFNode.hpp>
 #include <Gfx/Graph/RenderedISFSamplerUtils.hpp>
 #include <Gfx/Graph/ShaderCache.hpp>
@@ -14,22 +16,67 @@ PassOutput RenderedISFNode::initPassSampler(
     QRhiResourceUpdateBatch& res)
 {
   QRhi& rhi = *renderer.state.rhi;
+
+  // Volumetric fragment passes: a pass targeting a 3D output (OUTPUTS entry
+  // with DEPTH > 1) or carrying a Z expression requires per-slice color
+  // attachments / 3D image storage that this node does not wire end-to-end.
+  // The ISF parser rejects such shaders up-front (see isf.cpp parse_isf:
+  // "fragment-mode ISF with PASSES targeting Z / 3D OUTPUTS"); reaching this
+  // point with such a pass means the rejection drifted out of sync.
+  if(!pass.z_expression.empty() || [&]{
+       for(const auto& out : n.descriptor().outputs)
+         if(out.name == pass.target && out.depth > 1) return true;
+       return false;
+     }())
+  {
+    qFatal(
+        "RenderedISFNode: fragment PASSES with Z / 3D OUTPUTS reached the "
+        "renderer; parse-time rejection in isf::parser::parse_isf() should "
+        "have prevented this. Target: %s",
+        pass.target.c_str());
+  }
+
+  // Per-pass FORMAT override takes precedence over the legacy FLOAT flag.
+  // Covers the handful of formats useful as intermediate render targets:
+  // rgba8 (default), rgba16f (common precision bump), rgba32f, r16f, r32f.
+  auto pass_format = [&]() -> QRhiTexture::Format {
+    if(pass.format.empty())
+      return pass.float_storage ? QRhiTexture::RGBA32F : QRhiTexture::RGBA8;
+    std::string f = pass.format;
+    for(auto& c : f)
+      c = (char)std::tolower((unsigned char)c);
+    if(f == "rgba8")    return QRhiTexture::RGBA8;
+    if(f == "rgba16f")  return QRhiTexture::RGBA16F;
+    if(f == "rgba32f")  return QRhiTexture::RGBA32F;
+    if(f == "r8")       return QRhiTexture::R8;
+    if(f == "r16f")     return QRhiTexture::R16F;
+    if(f == "r32f")     return QRhiTexture::R32F;
+    qWarning() << "ISF pass FORMAT" << pass.format.c_str()
+               << "not recognised — falling back to RGBA8";
+    return QRhiTexture::RGBA8;
+  };
   // In all the other cases we create a custom render target
-  const auto fmt = (pass.float_storage) ? QRhiTexture::RGBA32F : QRhiTexture::RGBA8;
+  const auto fmt = pass_format();
   const auto filter = (pass.nearest_filter) ? QRhiSampler::Nearest : QRhiSampler::Linear;
   auto sampler = rhi.newSampler(
       filter, filter, QRhiSampler::None, QRhiSampler::Mirror, QRhiSampler::Mirror);
-  sampler->setName("ISFNode::initPassSamplers::sampler");
+  sampler->setName("RenderedISFNode::initPassSamplers::sampler");
   sampler->create();
 
   const QSize texSize = (pass.width_expression.empty() && pass.height_expression.empty())
                             ? mainTexSize
                             : n.computeTextureSize(pass, mainTexSize);
 
-  QImage clear_texture(texSize, pass.float_storage ? QImage::Format_RGBA32FPx4 : QImage::Format_ARGB32);
+  // Upload a zero clear matching the texture format. Qt can convert, so we
+  // pick a plausible source: float32 for floating-point formats, uint8 otherwise.
+  const bool is_float_fmt
+      = fmt == QRhiTexture::RGBA16F || fmt == QRhiTexture::RGBA32F
+        || fmt == QRhiTexture::R16F || fmt == QRhiTexture::R32F;
+  QImage clear_texture(
+      texSize, is_float_fmt ? QImage::Format_RGBA32FPx4 : QImage::Format_ARGB32);
   clear_texture.fill(0);
   auto tex = rhi.newTexture(fmt, texSize, 1, QRhiTexture::RenderTarget);
-  tex->setName("ISFNode::initPassSamplers::tex");
+  tex->setName("RenderedISFNode::initPassSamplers::tex");
   SCORE_ASSERT(tex->create());
   res.uploadTexture(tex, clear_texture);
 
@@ -39,7 +86,7 @@ PassOutput RenderedISFNode::initPassSampler(
   if(pass.persistent)
   {
     auto tex2 = rhi.newTexture(fmt, texSize, 1, QRhiTexture::RenderTarget);
-    tex2->setName("ISFNode::initPassSamplers::tex2");
+    tex2->setName("RenderedISFNode::initPassSamplers::tex2");
     SCORE_ASSERT(tex2->create());
     res.uploadTexture(tex2, clear_texture);
 
@@ -83,7 +130,7 @@ RenderedISFNode::RenderedISFNode(const ISFNode& node) noexcept
 {
 }
 
-void RenderedISFNode::updateInputTexture(const Port& input, QRhiTexture* tex)
+void RenderedISFNode::updateInputTexture(const Port& input, QRhiTexture* tex, QRhiTexture* depthTex)
 {
   int sampler_idx = 0;
   for(auto* p : node.input)
@@ -91,7 +138,11 @@ void RenderedISFNode::updateInputTexture(const Port& input, QRhiTexture* tex)
     if(p == &input)
       break;
     if(p->type == Types::Image)
+    {
       sampler_idx++;
+      if((p->flags & Flag::SamplableDepth) == Flag::SamplableDepth)
+        sampler_idx++;
+    }
   }
 
   if(sampler_idx < (int)m_inputSamplers.size())
@@ -110,6 +161,65 @@ void RenderedISFNode::updateInputTexture(const Port& input, QRhiTexture* tex)
             score::gfx::replaceTexture(*pass.p.srb, sampl.sampler, tex);
       }
     }
+
+    if(depthTex
+       && (input.flags & Flag::SamplableDepth) == Flag::SamplableDepth
+       && sampler_idx + 1 < (int)m_inputSamplers.size())
+    {
+      auto& depthSampl = m_inputSamplers[sampler_idx + 1];
+      if(depthSampl.texture != depthTex)
+      {
+        depthSampl.texture = depthTex;
+        for(auto& [e, passes] : m_passes)
+        {
+          for(auto& pass : passes.passes)
+            if(pass.p.srb)
+              score::gfx::replaceTexture(*pass.p.srb, depthSampl.sampler, depthTex);
+          for(auto& pass : passes.altPasses)
+            if(pass.p.srb)
+              score::gfx::replaceTexture(*pass.p.srb, depthSampl.sampler, depthTex);
+        }
+      }
+    }
+  }
+}
+
+void RenderedISFNode::updateInputSamplerFilter(
+    const Port& input, const RenderTargetSpecs& spec)
+{
+  int sampler_idx = 0;
+  for(auto* p : node.input)
+  {
+    if(p == &input)
+      break;
+    if(p->type == Types::Image)
+      sampler_idx++;
+  }
+
+  if(sampler_idx < (int)m_inputSamplers.size())
+  {
+    auto* sampler = m_inputSamplers[sampler_idx].sampler;
+    if(sampler->magFilter() == spec.mag_filter
+       && sampler->minFilter() == spec.min_filter
+       && sampler->mipmapMode() == spec.mipmap_mode
+       && sampler->addressU() == spec.address_u
+       && sampler->addressV() == spec.address_v
+       && sampler->addressW() == spec.address_w)
+    {
+      // Nothing to update. The surgical rt_changed path calls this
+      // whenever renderTargetSpecsChanged fires, but filter/address
+      // state is often unchanged (the bump was for size or format).
+      // Skip the sampler->create() — it would destroy and re-allocate
+      // the backend QRhiSampler for no observable reason.
+      return;
+    }
+    sampler->setMagFilter(spec.mag_filter);
+    sampler->setMinFilter(spec.min_filter);
+    sampler->setMipmapMode(spec.mipmap_mode);
+    sampler->setAddressU(spec.address_u);
+    sampler->setAddressV(spec.address_v);
+    sampler->setAddressW(spec.address_w);
+    sampler->create();
   }
 }
 
@@ -194,7 +304,8 @@ void main ()
 
 std::pair<Pass, Pass> RenderedISFNode::createPass(
     RenderList& renderer, ossia::small_vector<PassOutput, 1>& passSamplers,
-    PassOutput target, bool previousPassIsPersistent)
+    PassOutput target, const isf::pass& modelPass,
+    bool previousPassIsPersistent)
 {
   std::pair<Pass, Pass> ret;
   QRhi& rhi = *renderer.state.rhi;
@@ -205,6 +316,32 @@ std::pair<Pass, Pass> RenderedISFNode::createPass(
   pubo->setName("RenderedISFNode::createPass::pubo");
   pubo->create();
 
+  // Compute effective pipeline state: global default + per-pass override.
+  const auto eff_state
+      = mergeState(n.descriptor().default_state, modelPass.override_state);
+
+  // Build the extra-binding list (storage + optional multiview UBO).
+  auto extraRhiBindings = buildExtraBindings(m_storage);
+  if(m_multiViewUBO)
+  {
+    // Multiview UBO binds right after storage resources.
+    int mvBinding = m_firstStorageBinding;
+    for(const auto& e : m_storage.ssbos)
+    {
+      if(e.binding >= 0) mvBinding = std::max(mvBinding, e.binding + 1);
+      if(e.prev_binding >= 0) mvBinding = std::max(mvBinding, e.prev_binding + 1);
+    }
+    for(const auto& e : m_storage.images)
+      if(e.binding >= 0) mvBinding = std::max(mvBinding, e.binding + 1);
+
+    extraRhiBindings.append(QRhiShaderResourceBinding::uniformBuffer(
+        mvBinding,
+        QRhiShaderResourceBinding::VertexStage | QRhiShaderResourceBinding::FragmentStage,
+        m_multiViewUBO));
+  }
+  const std::span<QRhiShaderResourceBinding> extras{
+      extraRhiBindings.data(), (std::size_t)extraRhiBindings.size()};
+
   // Create the main pass
   {
     // Render target for the pass
@@ -230,9 +367,13 @@ std::pair<Pass, Pass> RenderedISFNode::createPass(
     try
     {
       auto [v, s] = score::gfx::makeShaders(renderer.state, n.m_vertexS, n.m_fragmentS);
-      auto pip = score::gfx::buildPipeline(
+      const auto mainSamplers = allSamplers(passSamplers, 1);
+      auto pip = score::gfx::buildPipelineWithState(
           renderer, renderer.defaultTriangle(), v, s, renderTarget, pubo, m_materialUBO,
-          allSamplers(passSamplers, 1));
+          mainSamplers,
+          extras,
+          eff_state,
+          n.descriptor().multiview_count);
 
       ret.first = Pass{renderTarget, pip, pubo};
     }
@@ -262,7 +403,7 @@ std::pair<Pass, Pass> RenderedISFNode::createPass(
         // Then we have to use the textures the "main" passes are rendering to
         ret.second.p.srb = score::gfx::createDefaultBindings(
             renderer, ret.second.renderTarget, pubo, m_materialUBO,
-            allSamplers(passSamplers, 0));
+            allSamplers(passSamplers, 0), extras);
       }
     }
     else if(auto psampler = ossia::get_if<PersistSampler>(&target))
@@ -284,7 +425,7 @@ std::pair<Pass, Pass> RenderedISFNode::createPass(
         // We necessarily use the main pass rendered-to samplers
         ret.second.p.srb = score::gfx::createDefaultBindings(
             renderer, ret.second.renderTarget, pubo, m_materialUBO,
-            allSamplers(passSamplers, 0));
+            allSamplers(passSamplers, 0), extras);
       }
       else
       {
@@ -294,7 +435,7 @@ std::pair<Pass, Pass> RenderedISFNode::createPass(
           // Then we have to use the textures the "main" passes are rendering to
           ret.second.p.srb = score::gfx::createDefaultBindings(
               renderer, ret.second.renderTarget, pubo, m_materialUBO,
-              allSamplers(passSamplers, 0));
+              allSamplers(passSamplers, 0), extras);
         }
       }
     }
@@ -327,12 +468,53 @@ void RenderedISFNode::initPasses(
     }
   }
 
+  // Lazily compute the storage-binding offset now that pass-samplers are
+  // known. Each PersistSampler entry in passes.samplers consumes one sampler
+  // binding in the shader reflection (input_samplers + audio_samplers +
+  // pass_samplers). Only do this once per node lifetime — m_firstStorageBinding
+  // stays >= 0 on subsequent edges, but ensureStorageResources is idempotent
+  // and must run so that any resize reallocates the buffers.
+  if(m_firstStorageBinding < 0)
+  {
+    int passSamplerCount = 0;
+    for(auto& s : passes.samplers)
+      if(ossia::get_if<PersistSampler>(&s))
+        passSamplerCount++;
+
+    const int firstStorageBinding
+        = 3 + (int)m_inputSamplers.size() + (int)m_audioSamplers.size()
+          + passSamplerCount;
+    m_firstStorageBinding = firstStorageBinding;
+    collectGraphicsStorageResources(n.descriptor(), firstStorageBinding, m_storage);
+
+    // Allocate the multiview UBO when MULTIVIEW >= 2 is declared.
+    if(n.descriptor().multiview_count >= 2)
+    {
+      QRhi& rhi = *renderer.state.rhi;
+      const int mvCount = n.descriptor().multiview_count;
+      m_multiViewUBO = rhi.newBuffer(
+          QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer,
+          sizeof(float[16]) * mvCount);
+      m_multiViewUBO->setName("RenderedISFNode::multiview_ubo");
+      SCORE_ASSERT(m_multiViewUBO->create());
+    }
+  }
+
+  // Ensure storage buffers/images exist. Safe to call per edge: it's idempotent
+  // and resizes to match renderSize. Then borrow any upstream-provided UBOs /
+  // read-only SSBOs (no SRB patch here — SRBs don't exist yet).
+  ensureStorageResources(
+      *renderer.state.rhi, res, renderer, n.descriptor(), m_storage,
+      renderer.state.renderSize);
+  bindUpstreamBuffers(renderer, n.input, m_storage);
+
   bool previousPassIsPersistent = false;
   for(std::size_t i = 0; i < passes.samplers.size(); i++)
   {
     auto& pass = passes.samplers[i];
     const auto [p1, p2]
-        = createPass(renderer, passes.samplers, pass, previousPassIsPersistent);
+        = createPass(renderer, passes.samplers, pass, model_passes[i],
+                     previousPassIsPersistent);
     if(p1.p.pipeline)
     {
       passes.passes.push_back(p1);
@@ -386,6 +568,14 @@ void RenderedISFNode::initPasses(
 }
 
 void RenderedISFNode::init(RenderList& renderer, QRhiResourceUpdateBatch& res)
+{
+  initState(renderer, res);
+
+  for(Edge* edge : n.output[0]->edges)
+    addOutputPass(renderer, *edge, res);
+}
+
+void RenderedISFNode::initState(RenderList& renderer, QRhiResourceUpdateBatch& res)
 {
   QRhi& rhi = *renderer.state.rhi;
 
@@ -407,6 +597,8 @@ void RenderedISFNode::init(RenderList& renderer, QRhiResourceUpdateBatch& res)
         = rhi.newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, m_materialSize);
     m_materialUBO->setName("RenderedISFNode::init::m_materialUBO");
     SCORE_ASSERT(m_materialUBO->create());
+    if(n.m_material_data)
+      res.updateDynamicBuffer(m_materialUBO, 0, m_materialSize, n.m_material_data.get());
   }
 
   // Create the samplers
@@ -414,40 +606,116 @@ void RenderedISFNode::init(RenderList& renderer, QRhiResourceUpdateBatch& res)
   SCORE_ASSERT(m_inputSamplers.empty());
   SCORE_ASSERT(m_audioSamplers.empty());
 
-  m_inputSamplers = initInputSamplers(this->n, renderer, n.input);
+  m_inputSamplers = initInputSamplers(this->n, renderer, n.input, &n.descriptor());
 
   m_audioSamplers = initAudioTextures(renderer, n.m_audio_textures);
 
-  // Create the passes
+  m_initialized = true;
+}
 
-  for(Edge* edge : n.output[0]->edges)
+void RenderedISFNode::addOutputPass(
+    RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res)
+{
+  auto rt = renderer.renderTargetForOutput(edge);
+  if(rt.renderTarget)
+  {
+    initPasses(rt, renderer, edge, renderer.renderSize(&edge), res);
+  }
+}
+
+void RenderedISFNode::addInputEdge(
+    RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res)
+{
+  if(edge.sink->type == Types::Image)
+  {
+    // Find upstream texture through the upstream renderer's textureForOutput().
+    if(auto it = edge.source->node->renderedNodes.find(&renderer);
+       it != edge.source->node->renderedNodes.end())
+    {
+      if(auto* tex = it->second->textureForOutput(*edge.source))
+      {
+        auto rt = renderer.renderTargetForInputPort(*edge.sink);
+        updateInputTexture(*edge.sink, tex, rt.depthTexture);
+      }
+    }
+  }
+}
+
+void RenderedISFNode::removeInputEdge(RenderList& renderer, Edge& edge)
+{
+  if(edge.sink && edge.sink->type == Types::Image)
   {
-    auto rt = renderer.renderTargetForOutput(*edge);
-    if(rt.renderTarget)
+    // Swap image-sampler bindings to empty-texture placeholders so the SRB
+    // never holds pointers to the just-released upstream renderer's
+    // textures. Mirrors SimpleRenderedISFNode::removeInputEdge — same
+    // dangling VkImageView / end-of-frame barrier crash applies to the
+    // multi-pass ISF renderer whenever a cable is cut at runtime. Include
+    // the depth companion when the port declared DEPTH: true.
+    const bool hasDepthCompanion
+        = (edge.sink->flags & Flag::SamplableDepth) == Flag::SamplableDepth;
+    QRhiTexture* depthFallback
+        = hasDepthCompanion ? &renderer.emptyTexture() : nullptr;
+    updateInputTexture(*edge.sink, &renderer.emptyTexture(), depthFallback);
+  }
+}
+
+void RenderedISFNode::removeOutputPass(RenderList& renderer, Edge& edge)
+{
+  auto it = ossia::find_if(m_passes, [&](auto& p) { return p.first == &edge; });
+  if(it != m_passes.end())
+  {
+    auto& [passes, altPasses, passSamplers] = it->second;
+
+    std::size_t num = passes.size();
+    for(std::size_t i = 0; i < num; i++)
     {
-      initPasses(rt, renderer, *edge, renderer.renderSize(edge), res);
+      auto& pass = passes[i];
+      auto& altpass = altPasses[i];
+      auto& sampler = passSamplers[i];
+
+      if(pass.p.srb != altpass.p.srb)
+      {
+        altpass.p.srb->deleteLater();
+      }
+
+      pass.p.release();
+
+      if(pass.processUBO)
+        pass.processUBO->deleteLater();
+
+      if(auto p = ossia::get_if<PersistSampler>(&sampler))
+      {
+        delete p->sampler;
+      }
     }
+
+    m_passes.erase(it);
   }
 }
 
+bool RenderedISFNode::hasOutputPassForEdge(Edge& edge) const
+{
+  return ossia::find_if(m_passes, [&](const auto& p) { return p.first == &edge; })
+         != m_passes.end();
+}
+
 void RenderedISFNode::update(
     RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge)
 {
   SCORE_ASSERT(m_passes.size() > 0);
 
-  // PASSINDEX must be set to the last index
-  // FIXME
-
-  // FIXME should be -2 if last pass is persistent
-  if(n.m_descriptor.passes.back().persistent)
-    n.standardUBO.passIndex = m_passes.size() - 2;
-  else
-    n.standardUBO.passIndex = m_passes.size() - 1;
+  // Persistent-storage ping-pong happens once per frame. Reset the guard
+  // here so whichever edge's runRenderPass fires first does the swap.
+  m_storageSwappedThisFrame = false;
 
+  // passIndex gets set per-pass in the processUBO update loop below; no
+  // need to seed a value here (previous code used m_passes.size() — which
+  // is the edge count, not the pass count — and was then overwritten).
   n.standardUBO.frameIndex++;
 
   // Update audio textures
   bool audioChanged = false;
+  std::size_t audio_idx = 0;
   for(auto& audio : n.m_audio_textures)
   {
     if(std::optional<Sampler> sampl
@@ -456,7 +724,14 @@ void RenderedISFNode::update(
       // Audio texture changed, this means the material needs update
       audioChanged = true;
 
-      auto& [rhiSampler, tex] = *sampl;
+      auto& [rhiSampler, tex, fb_] = *sampl;
+      // Keep m_audioSamplers[i].texture in sync with the live GPU texture so
+      // any later pipeline rebuild (rt_changed path in RenderList::render
+      // calling removeOutputPass + addOutputPass) uses the live binding
+      // instead of the placeholder empty texture.
+      if(audio_idx < m_audioSamplers.size())
+        m_audioSamplers[audio_idx].texture = tex;
+
       for(auto& [e, p] : m_passes)
       {
         for(auto& pass : p.passes)
@@ -467,6 +742,7 @@ void RenderedISFNode::update(
               *pass.p.srb, rhiSampler, tex ? tex : &renderer.emptyTexture());
       }
     }
+    ++audio_idx;
   }
 
   // Update material
@@ -475,6 +751,28 @@ void RenderedISFNode::update(
     char* data = n.m_material_data.get();
     res.updateDynamicBuffer(m_materialUBO, 0, m_materialSize, data);
   }
+  materialChanged = false;
+
+  // Reset event ports now that the UBO has captured their pulse value.
+  // If anything fired, force next frame's upload so the reset-to-zero
+  // propagates out through the normally-gated upload path.
+  if(n.resetEventPortsAfterFrame())
+    materialChanged = true;
+
+  // Re-bind upstream UBOs / read-only SSBOs on every pass's SRB. Cables can
+  // be added or replaced after init, so this runs every frame. Both the main
+  // and alt chains hold independent descriptor sets referencing the same
+  // storage resources; both must be patched. bindUpstreamBuffers is
+  // idempotent when the pointer already matches.
+  for(auto& [e, p] : m_passes)
+  {
+    for(auto& pass : p.passes)
+      if(pass.p.srb)
+        bindUpstreamBuffers(renderer, n.input, m_storage, pass.p.srb);
+    for(auto& pass : p.altPasses)
+      if(pass.p.srb)
+        bindUpstreamBuffers(renderer, n.input, m_storage, pass.p.srb);
+  }
 
   // Update all the process UBOs
 
@@ -518,7 +816,15 @@ void RenderedISFNode::update(
 
 void RenderedISFNode::release(RenderList& r)
 {
-  // customRelease
+  releaseState(r);
+}
+
+void RenderedISFNode::releaseState(RenderList& r)
+{
+  if(!m_initialized)
+    return;
+
+  // Release all remaining passes
   {
     for(auto& texture : n.m_audio_textures)
     {
@@ -530,7 +836,6 @@ void RenderedISFNode::release(RenderList& r)
           if(tex != &r.emptyTexture())
             tex->deleteLater();
         }
-        // FIXME remove it from n.m_audio_textures?
       }
     }
 
@@ -538,8 +843,8 @@ void RenderedISFNode::release(RenderList& r)
     {
       auto& [passes, altPasses, passSamplers] = allPasses;
 
-      std::size_t n = passes.size();
-      for(std::size_t i = 0; i < n; i++)
+      std::size_t num = passes.size();
+      for(std::size_t i = 0; i < num; i++)
       {
         auto& pass = passes[i];
         auto& altpass = altPasses[i];
@@ -558,12 +863,6 @@ void RenderedISFNode::release(RenderList& r)
         if(auto p = ossia::get_if<PersistSampler>(&sampler))
         {
           delete p->sampler;
-          // TODO check texture deletion ???
-          // texture isdeleted elsewxheree
-        }
-        else
-        {
-          // It's the render target of another node, do not touch it
         }
       }
     }
@@ -578,13 +877,11 @@ void RenderedISFNode::release(RenderList& r)
   for(auto sampler : m_inputSamplers)
   {
     delete sampler.sampler;
-    // texture isdeleted elsewxheree
   }
   m_inputSamplers.clear();
   for(auto sampler : m_audioSamplers)
   {
     delete sampler.sampler;
-    // texture isdeleted elsewxheree
   }
   m_audioSamplers.clear();
 
@@ -592,6 +889,19 @@ void RenderedISFNode::release(RenderList& r)
   m_materialUBO = nullptr;
 
   m_meshBuffer = {};
+
+  // Release storage resources (owned SSBOs + storage images).
+  m_storage.release();
+  m_firstStorageBinding = -1;
+  m_storageSwappedThisFrame = false;
+
+  if(m_multiViewUBO)
+  {
+    m_multiViewUBO->deleteLater();
+    m_multiViewUBO = nullptr;
+  }
+
+  m_initialized = false;
 }
 
 void RenderedISFNode::runInitialPasses(
@@ -630,8 +940,10 @@ void RenderedISFNode::runInitialPasses(
     auto srb = pass.p.srb;
     auto texture = pass.renderTarget.texture;
 
-    // TODO need to free stuff
-    cb.beginPass(rt, Qt::black, {1.0f, 0}, updateBatch);
+    // Note: updateBatch ownership transfers to QRhi on beginPass; per-pass
+    // state (pipeline/srb/processUBO/renderTarget) is owned by m_passes and
+    // released in releaseState() / removeOutputPass(). Nothing to free here.
+    cb.beginPass(rt, Qt::black, {0.0f, 0}, updateBatch);
     updateBatch = nullptr;
     {
       cb.setGraphicsPipeline(pipeline);
@@ -681,7 +993,10 @@ void RenderedISFNode::runRenderPass(
     auto srb = pass.p.srb;
     auto texture = pass.renderTarget.texture;
 
-    // TODO need to free stuff
+    // No allocations in this scope: this function records draw calls into a
+    // command buffer already opened by RenderList::render(). updateBatch is
+    // managed by the caller; per-pass state lives in m_passes and is released
+    // in releaseState() / removeOutputPass().
     {
       cb.setGraphicsPipeline(pipeline);
       cb.setShaderResources(srb);
@@ -703,6 +1018,32 @@ void RenderedISFNode::runRenderPass(
 
   using namespace std;
   swap(passes, altPasses);
+
+  // Persistent-storage ping-pong. Mutate the shared state exactly once per
+  // frame, then re-apply bindings to every SRB across every edge/chain so
+  // each draw next frame sees the swapped pointers. Patching only one SRB
+  // would leave others referencing stale buffers and read wrong data.
+  if(!m_storageSwappedThisFrame)
+  {
+    m_storageSwappedThisFrame = true;
+    swapPersistentSSBOsState(m_storage);
+    for(auto& [e, p] : m_passes)
+    {
+      const std::size_t num = p.passes.size();
+      for(std::size_t i = 0; i < num; i++)
+      {
+        auto* mainSrb = p.passes[i].p.srb;
+        if(mainSrb)
+          reapplyStorageBindings(m_storage, *mainSrb);
+        // altPass's SRB aliases the main one for non-persistent passes; skip
+        // the second reapply in that case — replaceBuffer is idempotent but
+        // srb->create() is not free.
+        auto* altSrb = p.altPasses[i].p.srb;
+        if(altSrb && altSrb != mainSrb)
+          reapplyStorageBindings(m_storage, *altSrb);
+      }
+    }
+  }
 }
 
 AudioTextureUpload::AudioTextureUpload()
@@ -737,9 +1078,14 @@ void AudioTextureUpload::processTemporal(
     m_scratchpad[i] = 0.5f + audio.data[i] / 2.f;
   }
 
-  // Copy it
+  // Copy it. Texture layout is samples × channels (width × height).
   QRhiTextureSubresourceUploadDescription subdesc(
       m_scratchpad.data(), audio.data.size() * sizeof(float));
+  if(audio.channels > 0)
+  {
+    const int samples_per_channel = int(audio.data.size()) / audio.channels;
+    subdesc.setSourceSize(QSize(samples_per_channel, audio.channels));
+  }
   QRhiTextureUploadEntry entry{0, 0, subdesc};
   QRhiTextureUploadDescription desc{entry};
   res.uploadTexture(rhiTexture, desc);
@@ -751,7 +1097,8 @@ void AudioTextureUpload::processHistogram(
   // Size of the audio input buffer
   std::size_t audioInputBufferSize = audio.data.size() / audio.channels;
 
-  // Effective size of the FFT data we want to use (e.g. without DC offset and nyquist coefficient at the end)
+  // Effective size of the FFT data we want to use (skips DC and nyquist bins;
+  // this also matches the texture width picked in updateAudioTexture).
   if(audioInputBufferSize < 4)
     return;
   std::size_t fftSize = audioInputBufferSize / 2 - 2;
@@ -769,48 +1116,60 @@ void AudioTextureUpload::processHistogram(
     const float byte_norm = 255.f / (dbmax - dbmin);
     const float norm = 2.f / (fftSize);
 
-    for(int i = 0; i < 1; i++)
+    // Histogram treats channel 0 as the source — it's a scrolling
+    // spectrogram display and summing / interleaving channels would blur
+    // the visualisation. Explicitly use i=0 rather than the old
+    // `for(int i = 0; i < 1; i++)` single-iteration loop.
+    const int i = 0;
     {
       float* inputData = audio.data.data() + i * audioInputBufferSize;
       double current_window_value = 0.;
 
-      // Basic window function on the audio buffer
+      // Basic triangular window function on the audio buffer
       double window_increment = 1. / (audioInputBufferSize / 2);
-      for(int s = 0; s < audioInputBufferSize / 2; s++)
+      for(int s = 0; s < (int)(audioInputBufferSize / 2); s++)
       {
         inputData[s] *= current_window_value;
         current_window_value += window_increment;
       }
-      for(int s = audioInputBufferSize / 2; s < audioInputBufferSize; s++)
+      for(int s = (int)(audioInputBufferSize / 2); s < (int)audioInputBufferSize; s++)
       {
         current_window_value -= window_increment;
         inputData[s] *= current_window_value;
       }
 
-      // Compute fft. Spectrum is in CCs format.
+      // Compute fft. Spectrum is in CCs format — index 0 is DC, the last
+      // coefficient is nyquist. Skip both.
       auto spectrum = m_fft.execute(inputData, audioInputBufferSize);
 
       float* outputSpectrum = m_scratchpad.data();
 
-      // Compute the actual data to show
-      for(std::size_t k = 1; k < fftSize - 1; k++)
+      // Fill all fftSize slots of the new row. Previously the loop bounds
+      // (k=1..fftSize-1) left the last two pixels of each row untouched,
+      // leaking stale data from a 240-frame-old row into every output.
+      for(std::size_t k = 0; k < fftSize; k++)
       {
+        const std::size_t bin = k + 1; // bins 1..fftSize (skip DC at 0)
         const float float_magnitude
             = std::sqrt(
-                  spectrum[k][0] * spectrum[k][0] + spectrum[k][1] * spectrum[k][1])
+                  spectrum[bin][0] * spectrum[bin][0]
+                  + spectrum[bin][1] * spectrum[bin][1])
               * norm;
-        const float float_db = 20.f * std::log10(std ::max(float_magnitude, 1e-10f));
+        const float float_db = 20.f * std::log10(std::max(float_magnitude, 1e-10f));
 
         const float magnitude_byte = (float_db - dbmin) * byte_norm;
 
-        // We are going to put the data in a R32F texture thus we scale to [0; 1]
-        outputSpectrum[k - 1] = std::clamp(magnitude_byte, 0.f, 255.f) / 255.f;
+        // R32F texture with values scaled to [0; 1]
+        outputSpectrum[k] = std::clamp(magnitude_byte, 0.f, 255.f) / 255.f;
       }
     }
   }
-  // Copy it
+  // Copy it. setSourceSize makes the upload strides explicit so Qt RHI
+  // never second-guesses the row pitch — processSpectral sets it, keeping
+  // the histogram path aligned avoids a subtle inconsistency in validation.
   QRhiTextureSubresourceUploadDescription subdesc(
       m_scratchpad.data(), m_scratchpad.size() * sizeof(float));
+  subdesc.setSourceSize(QSize((int)fftSize, 240));
   QRhiTextureUploadEntry entry{0, 0, subdesc};
   QRhiTextureUploadDescription desc{entry};
   res.uploadTexture(rhiTexture, desc);
@@ -865,46 +1224,62 @@ std::optional<Sampler> AudioTextureUpload::updateAudioTexture(
     return {};
   }
 
-  auto& [rhiSampler, rhiTexture] = it->second;
-  const auto curSz = (rhiTexture) ? rhiTexture->pixelSize() : QSize{};
-  int numSamples = curSz.width() * curSz.height();
-  if(numSamples != std::max(1, int(audio.data.size())) || !rhiTexture)
+  auto& [rhiSampler, rhiTexture, fb_] = it->second;
+
+  // The texture the shader wants for the current (mode, samples, channels)
+  // triple. Previously the detection compared `curSz.w * curSz.h` against
+  // `audio.data.size()` — correct for Waveform (a W=samples × H=channels
+  // layout has pixel_count == raw_sample_count), but completely wrong for
+  // FFT (half the pixels) and Histogram (H is hard-coded 240 so pixel count
+  // bears no relation to the raw audio buffer). The mismatch meant every
+  // frame saw "size changed → destroy+recreate the texture", which also
+  // forced a full SRB rebuild via replaceTexture in the caller and
+  // thrashed the FFT planner's reset() cache.
+  const bool has_data = audio.channels > 0 && !audio.data.empty();
+  int samples = 0;
+  QSize desired{1, 1};
+  if(has_data)
   {
-    if(audio.channels > 0)
+    samples = int(audio.data.size()) / audio.channels;
+    if(samples % 2 != 0)
+      samples++;
+    switch(audio.mode)
     {
-      int samples = audio.data.size() / audio.channels;
-      if(samples % 2 != 0)
-        samples++;
-      int pixelWidth = 0;
-      int pixelHeight = 0;
-      switch(audio.mode)
-      {
-        case AudioTexture::Mode::Waveform:
-          pixelWidth = samples;
-          pixelHeight = audio.channels;
-          break;
-        case AudioTexture::Mode::FFT:
-          pixelWidth = samples / 2;
-          pixelHeight = audio.channels;
-          break;
-        case AudioTexture::Mode::Histogram:
-          pixelWidth = samples / 2 - 2;
-          pixelHeight = 240;
-          break;
-      }
+      case AudioTexture::Mode::Waveform:
+        desired = {samples, audio.channels};
+        break;
+      case AudioTexture::Mode::FFT:
+        desired = {std::max(1, samples / 2), audio.channels};
+        break;
+      case AudioTexture::Mode::Histogram:
+        // Histogram is a scrolling spectrogram: rows = frames of FFT history.
+        desired = {std::max(1, samples / 2 - 2), 240};
+        break;
+    }
+  }
 
+  const QSize curSz = rhiTexture ? rhiTexture->pixelSize() : QSize{};
+  if(curSz != desired || !rhiTexture)
+  {
+    if(has_data)
+    {
       m_fft.reset(samples);
 
       if(rhiTexture)
       {
+        // destroy()+create() on the same QRhiTexture wrapper swaps the
+        // native handle (VkImage / ID3D12Resource / MTLTexture). Flag
+        // the change so the caller re-runs replaceTexture to refresh
+        // the SRB's descriptor set binding.
         rhiTexture->destroy();
-        rhiTexture->setPixelSize({pixelWidth, pixelHeight});
+        rhiTexture->setPixelSize(desired);
         rhiTexture->create();
+        textureChanged = true;
       }
       else
       {
         rhiTexture = rhi.newTexture(
-            QRhiTexture::R32F, {pixelWidth, pixelHeight}, 1, QRhiTexture::Flag{});
+            QRhiTexture::R32F, desired, 1, QRhiTexture::Flag{});
         rhiTexture->setName("AudioTextureUpload::rhiTexture");
         auto created = rhiTexture->create();
         SCORE_ASSERT(created);
@@ -915,34 +1290,33 @@ std::optional<Sampler> AudioTextureUpload::updateAudioTexture(
     {
       if(rhiTexture)
       {
+        // Audio went quiet: drop our texture and fall back to the
+        // RenderList's shared emptyTexture via the caller. Never resize
+        // the stored rhiTexture in-place — when that pointer aliased
+        // `&renderer.emptyTexture()` (old no-data init path) a resize
+        // would have destroyed the shared empty texture used by every
+        // unbound sampler in every node on this RenderList.
         rhiTexture->destroy();
-        rhiTexture->setPixelSize({1, 1});
-        rhiTexture->create();
-      }
-      else
-      {
-        rhiTexture = &renderer.emptyTexture();
+        rhiTexture->deleteLater();
+        rhiTexture = nullptr;
         textureChanged = true;
       }
+      // else: stays nullptr; caller already bound emptyTexture on a
+      // previous pass. No need to re-fire replaceTexture.
     }
   }
 
   if(rhiTexture)
   {
-    // Process the audio data
     auto sz = rhiTexture->pixelSize();
     if(sz.width() * sz.height() > 1)
       this->process(audio, res, rhiTexture);
   }
 
   if(textureChanged)
-  {
     return it->second;
-  }
   else
-  {
     return {};
-  }
 }
 
 }
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedISFNode.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedISFNode.hpp
index 341bb6a2d6..07adaa0e75 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedISFNode.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedISFNode.hpp
@@ -1,5 +1,6 @@
 #pragma once
 #include <Gfx/Graph/ISFNode.hpp>
+#include <Gfx/Graph/IsfBindingsBuilder.hpp>
 #include <Gfx/Graph/NodeRenderer.hpp>
 #include <Gfx/Graph/RenderedISFUtils.hpp>
 
@@ -11,12 +12,22 @@ struct RenderedISFNode : score::gfx::NodeRenderer
 
   virtual ~RenderedISFNode();
 
-  void updateInputTexture(const Port& input, QRhiTexture* tex) override;
+  void updateInputTexture(const Port& input, QRhiTexture* tex, QRhiTexture* depthTex = nullptr) override;
+  void updateInputSamplerFilter(const Port& input, const RenderTargetSpecs& spec) override;
+  void addInputEdge(RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) override;
+  void removeInputEdge(RenderList& renderer, Edge& edge) override;
 
   void init(RenderList& renderer, QRhiResourceUpdateBatch& res) override;
   void update(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* e) override;
   void release(RenderList& r) override;
 
+  void initState(RenderList& renderer, QRhiResourceUpdateBatch& res) override;
+  void releaseState(RenderList& renderer) override;
+  void addOutputPass(
+      RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) override;
+  void removeOutputPass(RenderList& renderer, Edge& edge) override;
+  bool hasOutputPassForEdge(Edge& edge) const override;
+
   void runInitialPasses(
       RenderList&, QRhiCommandBuffer& commands, QRhiResourceUpdateBatch*& res,
       Edge& edge) override;
@@ -26,7 +37,8 @@ struct RenderedISFNode : score::gfx::NodeRenderer
 private:
   std::pair<Pass, Pass> createPass(
       RenderList& renderer, ossia::small_vector<PassOutput, 1>& m_passSamplers,
-      PassOutput target, bool previousPassIsPersistent);
+      PassOutput target, const isf::pass& modelPass,
+      bool previousPassIsPersistent);
 
   std::pair<Pass, Pass> createFinalPass(
       RenderList& renderer, ossia::small_vector<PassOutput, 1>& m_passSamplers,
@@ -65,6 +77,22 @@ struct RenderedISFNode : score::gfx::NodeRenderer
   int m_materialSize{};
 
   AudioTextureUpload m_audioTex;
+
+  // Graphics-visible storage buffers / images declared by the shader
+  // (storage_input / csf_image_input / uniform_input). See IsfBindingsBuilder.
+  GraphicsStorageResources m_storage;
+
+  // Multiview UBO: N × mat4 view-projection matrices, when MULTIVIEW >= 2.
+  QRhiBuffer* m_multiViewUBO{};
+
+  // First binding slot reserved for storage resources; determined lazily in
+  // initPasses once the pass-sampler count is known (Rendered differs from
+  // Simple by having one extra sampler per inner pass).
+  int m_firstStorageBinding{-1};
+
+  // Guard so the persistent-SSBO state swap runs exactly once per frame even
+  // when the node has multiple output edges (each triggers runRenderPass).
+  bool m_storageSwappedThisFrame{false};
 };
 
 }
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedISFSamplerUtils.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedISFSamplerUtils.hpp
index 9219f2d95a..4694677869 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedISFSamplerUtils.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedISFSamplerUtils.hpp
@@ -7,6 +7,28 @@
 namespace score::gfx
 {
 
+namespace detail
+{
+inline QRhiSampler::Filter parseAudioFilter(const std::string& s)
+{
+  if(s.empty()) return QRhiSampler::Linear;
+  std::string v = s;
+  for(auto& c : v) c = (char)tolower(c);
+  if(v == "nearest") return QRhiSampler::Nearest;
+  return QRhiSampler::Linear;
+}
+inline QRhiSampler::AddressMode parseAudioWrap(const std::string& s)
+{
+  if(s.empty()) return QRhiSampler::ClampToEdge;
+  std::string v = s;
+  for(auto& c : v) c = (char)tolower(c);
+  for(auto& c : v) if(c == '-') c = '_';
+  if(v == "repeat")                           return QRhiSampler::Repeat;
+  if(v == "mirror" || v == "mirrored_repeat") return QRhiSampler::Mirror;
+  return QRhiSampler::ClampToEdge;
+}
+}
+
 inline std::vector<Sampler>
 initAudioTextures(RenderList& renderer, std::list<AudioTexture>& textures)
 {
@@ -14,13 +36,14 @@ initAudioTextures(RenderList& renderer, std::list<AudioTexture>& textures)
   QRhi& rhi = *renderer.state.rhi;
   for(auto& texture : textures)
   {
+    const auto filter = detail::parseAudioFilter(texture.filter);
+    const auto wrap = detail::parseAudioWrap(texture.wrap);
     auto sampler = rhi.newSampler(
-        QRhiSampler::Linear, QRhiSampler::Linear, QRhiSampler::None,
-        QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge);
+        filter, filter, QRhiSampler::None, wrap, wrap);
     sampler->setName("ISFNode::initAudioTextures::sampler");
     sampler->create();
 
-    samplers.push_back({sampler, &renderer.emptyTexture()});
+    samplers.push_back({sampler, nullptr});
     texture.samplers[&renderer] = {sampler, nullptr};
   }
   return samplers;
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedISFUtils.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedISFUtils.hpp
index 9b9d3b0862..7cfa08b677 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedISFUtils.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedISFUtils.hpp
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <Gfx/Graph/NodeRenderer.hpp>
 #include <Gfx/Graph/Utils.hpp>
 
 #include <ossia/audio/fft.hpp>
@@ -9,13 +10,6 @@
 
 namespace score::gfx
 {
-struct Pass
-{
-  TextureRenderTarget renderTarget;
-  Pipeline p;
-  QRhiBuffer* processUBO{};
-};
-
 struct PersistSampler
 {
   QRhiSampler* sampler{};
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedRawRasterPipelineNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedRawRasterPipelineNode.cpp
index 5d8893466e..4dd58d4aed 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedRawRasterPipelineNode.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedRawRasterPipelineNode.cpp
@@ -1,14 +1,61 @@
+#include <Gfx/Graph/CustomMesh.hpp>
+#include <Gfx/Graph/ISFVisitors.hpp>
+#include <Gfx/Graph/PipelineStateHelpers.hpp>
 #include <Gfx/Graph/RenderedISFSamplerUtils.hpp>
 #include <Gfx/Graph/RenderedRawRasterPipelineNode.hpp>
+#include <Gfx/Graph/SSBO.hpp>
 #include <Gfx/Graph/Utils.hpp>
 
 #include <score/tools/Debug.hpp>
 
 #include <ossia/detail/algorithms.hpp>
+#include <ossia/detail/hash_map.hpp>
+#include <ossia/detail/small_vector.hpp>
+#include <ossia/math/math_expression.hpp>
+
+#include <boost/algorithm/string/replace.hpp>
+
+#include <cctype>
+#include <chrono>
 
 namespace score::gfx
 {
 
+static const constexpr auto rrp_blit_vs = R"_(#version 450
+layout(location = 0) in vec2 position;
+layout(location = 1) in vec2 texcoord;
+layout(location = 0) out vec2 v_texcoord;
+
+layout(std140, binding = 0) uniform renderer_t {
+  mat4 clipSpaceCorrMatrix;
+  vec2 renderSize;
+} renderer;
+
+out gl_PerVertex { vec4 gl_Position; };
+
+void main()
+{
+  v_texcoord = texcoord;
+  gl_Position = renderer.clipSpaceCorrMatrix * vec4(position.xy, 0.0, 1.);
+#if defined(QSHADER_HLSL) || defined(QSHADER_MSL)
+  gl_Position.y = - gl_Position.y;
+#endif
+}
+)_";
+
+static const constexpr auto rrp_blit_fs = R"_(#version 450
+layout(std140, binding = 0) uniform renderer_t {
+  mat4 clipSpaceCorrMatrix;
+  vec2 renderSize;
+} renderer;
+
+layout(binding = 3) uniform sampler2D blitTexture;
+layout(location = 0) in vec2 v_texcoord;
+layout(location = 0) out vec4 fragColor;
+
+void main() { fragColor = texture(blitTexture, v_texcoord); }
+)_";
+
 RenderedRawRasterPipelineNode::RenderedRawRasterPipelineNode(
     const ISFNode& node) noexcept
     : score::gfx::NodeRenderer{node}
@@ -16,7 +63,7 @@ RenderedRawRasterPipelineNode::RenderedRawRasterPipelineNode(
 {
 }
 
-void RenderedRawRasterPipelineNode::updateInputTexture(const Port& input, QRhiTexture* tex)
+void RenderedRawRasterPipelineNode::updateInputTexture(const Port& input, QRhiTexture* tex, QRhiTexture* depthTex)
 {
   // Find which image-type sampler index this port corresponds to
   int sampler_idx = 0;
@@ -25,20 +72,97 @@ void RenderedRawRasterPipelineNode::updateInputTexture(const Port& input, QRhiTe
     if(p == &input)
       break;
     if(p->type == Types::Image)
+    {
       sampler_idx++;
+      if((p->flags & Flag::SamplableDepth) == Flag::SamplableDepth)
+        sampler_idx++;
+    }
   }
 
+  // Match key for replaceTexture MUST be the sampler that's actually
+  // in the SRB binding. allSamplers() (line ~155-170) substitutes
+  // m_inputSamplerOverrides[i] for m_inputSamplers[i] when an
+  // override is present (per-bucket sampler from ScenePreprocessor).
+  // Same fix as commit 7d1afd27b applied to FIX-C — see the long
+  // comment there. Without this updateInputTexture silently no-ops on
+  // every override-bound entry.
+  auto srbKey = [&](int i) -> QRhiSampler* {
+    if(i >= 0 && i < (int)m_inputSamplerOverrides.size()
+       && m_inputSamplerOverrides[i])
+      return m_inputSamplerOverrides[i];
+    return m_inputSamplers[i].sampler;
+  };
+
   if(sampler_idx < (int)m_inputSamplers.size())
   {
     auto& sampl = m_inputSamplers[sampler_idx];
     if(sampl.texture != tex)
     {
       sampl.texture = tex;
+      auto* key = srbKey(sampler_idx);
       for(auto& [e, pass] : m_passes)
         if(pass.p.srb)
-          score::gfx::replaceTexture(*pass.p.srb, sampl.sampler, tex);
+          score::gfx::replaceTexture(*pass.p.srb, key, tex);
+    }
+
+    if(depthTex
+       && (input.flags & Flag::SamplableDepth) == Flag::SamplableDepth
+       && sampler_idx + 1 < (int)m_inputSamplers.size())
+    {
+      auto& depthSampl = m_inputSamplers[sampler_idx + 1];
+      if(depthSampl.texture != depthTex)
+      {
+        depthSampl.texture = depthTex;
+        auto* depthKey = srbKey(sampler_idx + 1);
+        for(auto& [e, pass] : m_passes)
+          if(pass.p.srb)
+            score::gfx::replaceTexture(*pass.p.srb, depthKey, depthTex);
+      }
+    }
+  }
+}
+
+QRhiTexture* RenderedRawRasterPipelineNode::textureForOutput(const Port& output)
+{
+  if(!m_hasMRT)
+    return nullptr;
+
+  // Find which output port index this is
+  const auto& outputs = n.descriptor().outputs;
+  for(int i = 0; i < (int)n.output.size() && i < (int)outputs.size(); i++)
+  {
+    if(n.output[i] == &output)
+    {
+      // Depth outputs expose the depth attachment directly. With
+      // EXECUTION_MODEL: PER_LAYER on a depth target this is the
+      // multi-layer Texture2DArray populated layer-by-layer via the
+      // scratch+copy dance in runInitialPasses; for single-layer
+      // depth shaders (shadow_map.frag) it's the plain 2D depth
+      // texture. Either way, downstream wires it through
+      // SceneResourceRoute(ShadowMapArray) into scene_state.
+      if(outputs[i].type == "depth")
+        return m_mrtRenderTarget.depthTexture;
+
+      // Color output: index 0 = primary texture, 1+ = additional
+      int colorIdx = 0;
+      for(int j = 0; j < i; j++)
+        if(outputs[j].type != "depth")
+          colorIdx++;
+
+      // CUBEMAP + MULTIVIEW shim: the public handle is the CubeMap,
+      // not the shadow TextureArray that we actually render into.
+      // Consumers bind this as samplerCube without knowing about the
+      // array-then-copy dance happening under the hood.
+      if(colorIdx == m_cubeCopyOutputIdx && m_cubeCopyCube)
+        return m_cubeCopyCube;
+
+      if(colorIdx == 0)
+        return m_mrtRenderTarget.texture;
+      else if(colorIdx - 1 < (int)m_mrtRenderTarget.additionalColorTextures.size())
+        return m_mrtRenderTarget.additionalColorTextures[colorIdx - 1];
     }
   }
+  return nullptr;
 }
 
 std::vector<Sampler> RenderedRawRasterPipelineNode::allSamplers() const noexcept
@@ -46,6 +170,21 @@ std::vector<Sampler> RenderedRawRasterPipelineNode::allSamplers() const noexcept
   // Input ports
   std::vector<Sampler> samplers = m_inputSamplers;
 
+  // Apply non-owning per-port sampler overrides published by upstream
+  // geometry's auxiliary_texture::sampler_handle (e.g., the per-bucket
+  // QRhiSampler from ScenePreprocessor's per-glTF-texture sampler
+  // config). The override is applied only on the SRB-build copy here;
+  // m_inputSamplers itself keeps its original (owning) sampler so
+  // release() can `delete sampler.sampler` without freeing a registry-
+  // owned sampler.
+  const std::size_t n_overrides
+      = std::min(samplers.size(), m_inputSamplerOverrides.size());
+  for(std::size_t i = 0; i < n_overrides; ++i)
+  {
+    if(m_inputSamplerOverrides[i])
+      samplers[i].sampler = m_inputSamplerOverrides[i];
+  }
+
   // Audio textures
   samplers.insert(samplers.end(), m_audioSamplers.begin(), m_audioSamplers.end());
 
@@ -53,7 +192,8 @@ std::vector<Sampler> RenderedRawRasterPipelineNode::allSamplers() const noexcept
 }
 
 void RenderedRawRasterPipelineNode::initPass(
-    const TextureRenderTarget& renderTarget, RenderList& renderer, Edge& edge)
+    const TextureRenderTarget& renderTarget, RenderList& renderer,
+    QRhiResourceUpdateBatch& res, Edge& edge)
 {
   auto& model_passes = n.descriptor().passes;
   SCORE_ASSERT(model_passes.size() == 1);
@@ -63,7 +203,6 @@ void RenderedRawRasterPipelineNode::initPass(
   QRhiBuffer* pubo{};
   pubo = rhi.newBuffer(
       QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, sizeof(ProcessUBO));
-  qWarning() << "RRP ALLOC [processUBO] size=" << sizeof(ProcessUBO);
   pubo->setName("RenderedRawRasterPipelineNode::initPass::pubo");
   pubo->create();
 
@@ -86,22 +225,57 @@ void RenderedRawRasterPipelineNode::initPass(
 
     ossia::small_vector<QRhiShaderResourceBinding, 4> additionalBindings;
 
+    // INPUTS storage trio (storage_input SSBO / csf_image_input image2D /
+    // uniform_input UBO) — order MUST match isf_emit_graphics_storage's
+    // GLSL emission (declaration order, sequential bindings starting at
+    // max_binding == 3 + samplers count).
+    {
+      auto extras = buildExtraBindings(m_storage);
+      for(const auto& b : extras)
+      {
+        additionalBindings.push_back(b);
+        max_binding++;
+      }
+    }
+
     for(auto& aux : m_auxiliarySSBOs)
     {
-      // If no buffer yet, create a small dummy so the descriptor set is valid
+      // If no buffer yet, create a small dummy so the descriptor set is valid.
+      // Dummy usage flag matches the aux kind so the created buffer can be
+      // bound as the intended descriptor type.
       if(!aux.buffer)
       {
-        auto* dummy = rhi.newBuffer(
-            QRhiBuffer::Immutable, QRhiBuffer::StorageBuffer, 16);
-        dummy->setName("RRP_aux_dummy");
+        auto usage = aux.is_uniform ? QRhiBuffer::UniformBuffer
+                                    : QRhiBuffer::StorageBuffer;
+        const int64_t dummySize = aux.is_uniform ? 256 : 16;
+        auto* dummy = rhi.newBuffer(QRhiBuffer::Immutable, usage, dummySize);
+        dummy->setName(aux.is_uniform ? "RRP_ubo_dummy" : "RRP_aux_dummy");
         dummy->create();
         aux.buffer = dummy;
-        aux.size = 16;
+        aux.size = dummySize;
         aux.owned = true;
       }
 
+      // Persistent ping-pong pair: emit the read-only <name>_prev binding
+      // FIRST (binding N), then the writable <name> binding (binding N+1).
+      // GLSL emission uses the same ordering.
+      if(aux.persistent && aux.prev_buffer)
+      {
+        additionalBindings.push_back(
+            QRhiShaderResourceBinding::bufferLoad(
+                max_binding, bindingStages, aux.prev_buffer));
+        aux.prev_binding = max_binding;
+        max_binding++;
+      }
+
       QRhiShaderResourceBinding binding;
-      if(aux.access == "read_only")
+      if(aux.is_uniform)
+      {
+        // uniform_input → std140 UBO binding
+        binding = QRhiShaderResourceBinding::uniformBuffer(
+            max_binding, bindingStages, aux.buffer);
+      }
+      else if(aux.access == "read_only")
         binding = QRhiShaderResourceBinding::bufferLoad(
             max_binding, bindingStages, aux.buffer);
       else if(aux.access == "write_only")
@@ -112,6 +286,36 @@ void RenderedRawRasterPipelineNode::initPass(
             max_binding, bindingStages, aux.buffer);
 
       additionalBindings.push_back(binding);
+      aux.binding = max_binding;  // remember slot for per-sub-mesh patching
+      max_binding++;
+    }
+
+    // Auxiliary texture / storage-image bindings: placed right after
+    // aux SSBOs, matching GLSL emission order. Dispatch on is_storage
+    // so TYPE:"image" gets sampledTexture and TYPE:"storage_image"
+    // gets imageLoad / imageStore / imageLoadStore per `access`.
+    for(auto& ats : m_auxTextureSamplers)
+    {
+      QRhiShaderResourceBinding b;
+      if(ats.is_storage)
+      {
+        if(ats.access == "read_only")
+          b = QRhiShaderResourceBinding::imageLoad(
+              max_binding, bindingStages, ats.texture, 0);
+        else if(ats.access == "write_only")
+          b = QRhiShaderResourceBinding::imageStore(
+              max_binding, bindingStages, ats.texture, 0);
+        else
+          b = QRhiShaderResourceBinding::imageLoadStore(
+              max_binding, bindingStages, ats.texture, 0);
+      }
+      else
+      {
+        b = QRhiShaderResourceBinding::sampledTexture(
+            max_binding, bindingStages, ats.texture, ats.sampler);
+      }
+      additionalBindings.push_back(b);
+      ats.binding = max_binding;
       max_binding++;
     }
 
@@ -142,19 +346,73 @@ void RenderedRawRasterPipelineNode::initPass(
     }
     ps->setSampleCount(pipelineSamples);
 
-    m_mesh->preparePipeline(*ps);
-
-    // Override topology and blend after preparePipeline,
-    // since the mesh may set its own defaults (e.g. CSF geometry outputs as points)
-    QRhiGraphicsPipeline::TargetBlend premulAlphaBlend;
-    premulAlphaBlend.enable = mat.enable_blend;
-    premulAlphaBlend.srcColor = mat.src_color;
-    premulAlphaBlend.dstColor = mat.dst_color;
-    premulAlphaBlend.opColor = mat.op_color;
-    premulAlphaBlend.srcAlpha = mat.src_alpha;
-    premulAlphaBlend.dstAlpha = mat.dst_alpha;
-    premulAlphaBlend.opAlpha = mat.op_alpha;
-    ps->setTargetBlends({premulAlphaBlend});
+    // Procedural draws (VERTEX_INPUTS: [] + VERTEX_COUNT) don't need
+    // a mesh — skip preparePipeline (no vertex-input layout bindings
+    // to set).
+    if(m_mesh)
+      m_mesh->preparePipeline(*ps);
+
+    // Compute effective pipeline state: the descriptor's PIPELINE_STATE (if
+    // any) wins over the legacy material-UBO-driven blend. When no state is
+    // declared (empty pipeline_state) we keep the legacy behaviour: blending
+    // driven by the material's runtime-editable blend UI + hardcoded depth
+    // test/write. This preserves bit-exact output for existing shaders.
+    const auto& desc = n.m_descriptor;
+    const bool hasDescriptorState = stateAffectsPipeline(desc.default_state);
+
+    if(hasDescriptorState)
+    {
+      // New path: pipeline_state drives blend/depth/cull/stencil. Seed the
+      // legacy material-UBO-driven blend on every attachment first so that
+      // a partial PIPELINE_STATE declaration (e.g. just CULL_MODE) doesn't
+      // silently lose the runtime blend UI's effect; applyPipelineState only
+      // overrides blend when BLEND was explicitly declared.
+      QRhiGraphicsPipeline::TargetBlend seededBlend;
+      seededBlend.enable = mat.enable_blend;
+      seededBlend.srcColor = mat.src_color;
+      seededBlend.dstColor = mat.dst_color;
+      seededBlend.opColor = mat.op_color;
+      seededBlend.srcAlpha = mat.src_alpha;
+      seededBlend.dstAlpha = mat.dst_alpha;
+      seededBlend.opAlpha = mat.op_alpha;
+      QList<QRhiGraphicsPipeline::TargetBlend> seedBlends;
+      for(int i = 0; i < std::max(1, renderTarget.colorAttachmentCount()); i++)
+        seedBlends.append(seededBlend);
+      ps->setTargetBlends(seedBlends.begin(), seedBlends.end());
+      ps->setDepthTest(true);
+      ps->setDepthWrite(true);
+      // Reverse-Z project rule (applyPipelineState overrides only if the
+      // shader explicitly declares depth_compare).
+      ps->setDepthOp(QRhiGraphicsPipeline::Greater);
+
+      const bool depthAvailable
+          = (renderTarget.depthTexture != nullptr)
+            || (renderTarget.depthRenderBuffer != nullptr)
+            || (renderTarget.msDepthTexture != nullptr);
+      applyPipelineState(
+          *ps, desc.default_state, renderTarget.colorAttachmentCount(),
+          depthAvailable, /*wantsDepthByDefault=*/true);
+    }
+    else
+    {
+      // Legacy path: blend from material UBO, depth hardcoded on.
+      QRhiGraphicsPipeline::TargetBlend premulAlphaBlend;
+      premulAlphaBlend.enable = mat.enable_blend;
+      premulAlphaBlend.srcColor = mat.src_color;
+      premulAlphaBlend.dstColor = mat.dst_color;
+      premulAlphaBlend.opColor = mat.op_color;
+      premulAlphaBlend.srcAlpha = mat.src_alpha;
+      premulAlphaBlend.dstAlpha = mat.dst_alpha;
+      premulAlphaBlend.opAlpha = mat.op_alpha;
+      ps->setTargetBlends({premulAlphaBlend});
+
+      ps->setDepthTest(true);
+      ps->setDepthWrite(true);
+      // Reverse-Z project rule.
+      ps->setDepthOp(QRhiGraphicsPipeline::Greater);
+    }
+
+    // Topology is always runtime-controllable via the material UBO.
     switch(mat.mode)
     {
       default:
@@ -170,25 +428,29 @@ void RenderedRawRasterPipelineNode::initPass(
     }
 
     // Remap vertex inputs by semantic: match shader input variable names
-    // to geometry attribute semantics.
-    if(auto* geom = m_mesh->semanticGeometry())
+    // to geometry attribute semantics. Honour explicit SEMANTIC overrides
+    // declared on VERTEX_INPUTS in the descriptor (CSF-style). Skip for
+    // procedural draws (no mesh, no attributes to remap).
+    //
+    // The fallback-aware overload resolves "REQUIRED: false" inputs
+    // missing from upstream geometry to a shared PerInstance identity
+    // buffer from the RenderList's pool. When no inputs opted in, the
+    // plan is empty and the draw path short-circuits with zero cost.
+    FallbackBindingPlan fallbackPlan;
+    if(m_mesh)
     {
-      if(!remapPipelineVertexInputs(*ps, v, *geom))
+      if(auto* geom = m_mesh->semanticGeometry())
       {
-        qDebug() << "RawRaster::initPass: remapPipelineVertexInputs FAILED";
-        delete ps;
-        delete pubo;
-        return;
+        if(!remapPipelineVertexInputs(
+               *ps, v, *geom, n.descriptor(),
+               rhi, renderer.vertexFallbackPool(), res, fallbackPlan))
+        {
+          delete ps;
+          delete pubo;
+          return;
+        }
       }
-      qDebug() << "RawRaster::initPass: remapPipelineVertexInputs OK";
     }
-    else
-    {
-      qDebug() << "RawRaster::initPass: no semanticGeometry";
-    }
-
-    ps->setDepthTest(true);
-    ps->setDepthWrite(true);
 
     ps->setShaderStages({{QRhiShaderStage::Vertex, v}, {QRhiShaderStage::Fragment, s}});
 
@@ -207,7 +469,9 @@ void RenderedRawRasterPipelineNode::initPass(
     Pipeline pip = {ps, bindings};
     if(pip.pipeline)
     {
-      m_passes.emplace_back(&edge, Pass{renderTarget, pip, pubo});
+      Pass pass{renderTarget, pip, pubo};
+      pass.fallback_bindings = std::move(fallbackPlan);
+      m_passes.emplace_back(&edge, std::move(pass));
     }
     else
     {
@@ -220,201 +484,1966 @@ void RenderedRawRasterPipelineNode::initPass(
   }
 }
 
-void RenderedRawRasterPipelineNode::init(
+void RenderedRawRasterPipelineNode::initMRTPass(
     RenderList& renderer, QRhiResourceUpdateBatch& res)
 {
   QRhi& rhi = *renderer.state.rhi;
+  const auto& outputs = n.descriptor().outputs;
+
+  // Tear down any state left from a previous init pass. `update` calls
+  // `m_mrtRenderTarget.release()` before hitting us again, but it's not
+  // responsible for our private per-mip / per-face RT pool or the
+  // CUBEMAP+MULTIVIEW shim's separate cube handle. Without these drops
+  // the pool would grow unboundedly across re-inits and, worse,
+  // m_mipRTs entries would point at a shadow array that's already been
+  // freed — the next beginPass on one of those stale RTs triggers a
+  // driver-level crash in CmdBeginRenderPass (NVIDIA specifically).
+  for(auto& e : m_mipRTs)
+  {
+    if(e.renderTarget)
+      e.renderTarget->deleteLater();
+    if(e.renderPass)
+      e.renderPass->deleteLater();
+    if(e.depth)
+      e.depth->deleteLater();
+  }
+  m_mipRTs.clear();
+  m_mipCount = 0;
+
+  // PerLayer depth-path resources. The color path's per-layer RTs are
+  // owned by m_mipRTs (cleared above); the shared scratch depth + RT
+  // used by the depth path live outside m_mipRTs and must be dropped
+  // explicitly here. m_perLayerOutputDepthArray aliases depthTex (owned
+  // by m_mrtRenderTarget) so it just gets nulled out.
+  if(m_perLayerSharedRT)
+  {
+    m_perLayerSharedRT->deleteLater();
+    m_perLayerSharedRT = nullptr;
+  }
+  if(m_perLayerSharedRP)
+  {
+    m_perLayerSharedRP->deleteLater();
+    m_perLayerSharedRP = nullptr;
+  }
+  if(m_perLayerScratchDepth)
+  {
+    m_perLayerScratchDepth->deleteLater();
+    m_perLayerScratchDepth = nullptr;
+  }
+  if(m_perLayerDummyColor)
+  {
+    m_perLayerDummyColor->deleteLater();
+    m_perLayerDummyColor = nullptr;
+  }
+  m_perLayerOutputDepthArray = nullptr;
+  m_perLayerOutputIndex = -1;
+  m_perLayerIsDepth = false;
 
-  // Create the mesh
+  if(m_cubeCopyCube)
   {
-    if(geometry.meshes)
+    m_cubeCopyCube->deleteLater();
+    m_cubeCopyCube = nullptr;
+  }
+  // m_cubeCopyShadowArray is a pointer into m_mrtRenderTarget's
+  // attachments; it's freed by m_mrtRenderTarget.release() in update().
+  m_cubeCopyShadowArray = nullptr;
+  m_cubeCopyOutputIdx = -1;
+
+  // Per-invocation UBO+SRB pool — rebuilt below against the fresh
+  // main SRB once the pipeline is re-created. Leaking these across
+  // re-inits would point old SRBs at freed buffers (same failure
+  // mode as the stale mip RTs above).
+  for(auto* ubo : m_perInvocationUBOs)
+    if(ubo) ubo->deleteLater();
+  m_perInvocationUBOs.clear();
+  for(auto* srb : m_perInvocationSRBs)
+    if(srb) srb->deleteLater();
+  m_perInvocationSRBs.clear();
+
+  // Target size resolution: honour OUTPUTS.WIDTH / HEIGHT (integer
+  // literal or string expression) when declared; otherwise fall back
+  // to the renderer's render-size. A RAW_RASTER_PIPELINE shader has
+  // one shared render pass, so all attachments end up at the same
+  // size — pick the first OUTPUT with an explicit size as the RT
+  // size. Mixing sized and unsized outputs is fine (unsized ones
+  // just inherit); mixing differing explicit sizes is a shader-
+  // author error we don't diagnose here.
+  QSize sz = renderer.state.renderSize;
+  // First non-zero explicit WIDTH/HEIGHT wins. Depth outputs participate
+  // too: shadow_cascades.frag (depth-only, no colour outputs at all)
+  // declares the shadow-map resolution on its depth output, and we want
+  // that to drive the RT size rather than falling through to renderSize.
+  for(const auto& out : outputs)
+  {
+    int w = out.width_expression.empty()
+                ? out.width
+                : resolveIntExpression(out.width_expression, 0);
+    int h = out.height_expression.empty()
+                ? out.height
+                : resolveIntExpression(out.height_expression, 0);
+    if(w > 0 && h > 0)
     {
-      std::tie(m_mesh, m_meshbufs)
-          = renderer.acquireMesh(geometry, res, m_mesh, m_meshbufs);
+      sz = QSize(w, h);
+      break;
     }
+  }
+
+  // EXECUTION_MODEL resolution. Matters before allocation because
+  // PER_MIP forces a MipMapped flag on the target output's texture,
+  // PER_CUBE_FACE forces a CubeMap flag. Manual / Single have no
+  // effect on allocation — they only influence the render loop in
+  // runInitialPasses().
+  {
+    const auto& em = n.descriptor().execution_model;
+    std::string et = em.type;
+    for(auto& c : et)
+      c = (char)std::toupper((unsigned char)c);
+    if(et == "PER_MIP")
+      m_executionMode = ExecutionMode::PerMip;
+    else if(et == "PER_CUBE_FACE")
+      m_executionMode = ExecutionMode::PerCubeFace;
+    else if(et == "PER_LAYER")
+      m_executionMode = ExecutionMode::PerLayer;
+    else if(et == "MANUAL")
+      m_executionMode = ExecutionMode::Manual;
     else
+      m_executionMode = ExecutionMode::Single;
+
+    m_perMipOutputIndex = -1;
+    m_perCubeFaceOutputIndex = -1;
+    m_perLayerOutputIndex = -1;
+    m_perLayerIsDepth = false;
+    const bool needsTarget = m_executionMode == ExecutionMode::PerMip
+                             || m_executionMode == ExecutionMode::PerCubeFace
+                             || m_executionMode == ExecutionMode::PerLayer;
+    if(needsTarget && !em.target.empty())
     {
-      if(m_mesh)
+      // PER_MIP / PER_CUBE_FACE only make sense on colour outputs (depth
+      // attachments don't have mip chains in our pipeline, and cube
+      // depth would need a separate code path). PER_LAYER allows either:
+      // colour TextureArray (setLayer attachment) or depth TextureArray
+      // (scratch + copy strategy). Walk the raw outputs[] for PER_LAYER
+      // so depth entries are included; keep the colour-only walk for the
+      // other two modes.
+      if(m_executionMode == ExecutionMode::PerLayer)
       {
-        if(m_meshbufs.buffers.empty())
+        for(int i = 0; i < (int)outputs.size(); ++i)
         {
-          m_meshbufs = renderer.initMeshBuffer(*m_mesh, res);
+          if(outputs[i].name == em.target)
+          {
+            m_perLayerOutputIndex = i;
+            m_perLayerIsDepth = (outputs[i].type == "depth");
+            break;
+          }
+        }
+      }
+      else
+      {
+        int colorIdx = 0;
+        for(const auto& out : outputs)
+        {
+          if(out.type == "depth")
+            continue;
+          if(out.name == em.target)
+          {
+            if(m_executionMode == ExecutionMode::PerMip)
+              m_perMipOutputIndex = colorIdx;
+            else
+              m_perCubeFaceOutputIndex = colorIdx;
+            break;
+          }
+          ++colorIdx;
         }
       }
+      const bool resolved
+          = (m_executionMode == ExecutionMode::PerMip
+             && m_perMipOutputIndex >= 0)
+            || (m_executionMode == ExecutionMode::PerCubeFace
+                && m_perCubeFaceOutputIndex >= 0)
+            || (m_executionMode == ExecutionMode::PerLayer
+                && m_perLayerOutputIndex >= 0);
+      if(!resolved)
+      {
+        qWarning() << "RawRaster EXECUTION_MODEL=" << et.c_str()
+                   << ": TARGET" << QString::fromStdString(em.target)
+                   << "not found among outputs — falling back to SINGLE";
+        m_executionMode = ExecutionMode::Single;
+      }
     }
-  }
 
-  // Create the material UBO
-  m_materialSize = n.m_materialSize;
-  if(m_materialSize > 0)
-  {
-    m_materialUBO
-        = rhi.newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, m_materialSize);
-    qWarning() << "RRP ALLOC [materialUBO] size=" << m_materialSize;
-    m_materialUBO->setName("RenderedRawRasterPipelineNode::init::m_materialUBO");
-    SCORE_ASSERT(m_materialUBO->create());
+    // PER_CUBE_FACE + MULTIVIEW on the same shader is redundant:
+    // multiview already amplifies one draw into 6 face writes, so
+    // iterating per face would collapse back to the same 6 writes.
+    // Warn and disable the per-face loop — the cube-copy shim
+    // (CUBEMAP + MULTIVIEW) handles everything downstream.
+    if(m_executionMode == ExecutionMode::PerCubeFace
+       && n.descriptor().multiview_count >= 2)
+    {
+      qWarning()
+          << "RawRaster EXECUTION_MODEL=PER_CUBE_FACE + MULTIVIEW:"
+          << n.descriptor().multiview_count
+          << "is redundant. Multiview already amplifies one draw to"
+             " N faces; PER_CUBE_FACE is for the explicit 6-pass path"
+             " without multiview. Disabling PER_CUBE_FACE.";
+      m_executionMode = ExecutionMode::Single;
+      m_perCubeFaceOutputIndex = -1;
+    }
   }
 
-  m_modelUBO
-      = rhi.newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, sizeof(float[16]));
-  qWarning() << "RRP ALLOC [modelUBO] size=" << sizeof(float[16]);
-  m_modelUBO->setName("RenderedRawRasterPipelineNode::init::m_modelUBO");
-  SCORE_ASSERT(m_modelUBO->create());
-
-  // Create the samplers
-  SCORE_ASSERT(m_passes.empty());
-  SCORE_ASSERT(m_inputSamplers.empty());
-  SCORE_ASSERT(m_audioSamplers.empty());
-
-  m_inputSamplers = initInputSamplers(this->n, renderer, n.input);
-
-  m_audioSamplers = initAudioTextures(renderer, n.m_audio_textures);
-
-  // Initialize auxiliary SSBOs from descriptor
+  // Layered / multiview detection — same shape as SimpleRenderedISFNode.
+  // `LAYERS: N` on any OUTPUT → N-layer texture array; `MULTIVIEW: N` on
+  // the descriptor → single-draw-writes-N-views (requires caps.multiview).
+  // Consumer shaders like `prefilter_ggx.frag` / `irradiance_convolve.frag`
+  // / `shadow_cascades.frag` all rely on this plumbing to land their
+  // outputs on the right cubemap face / cascade slice.
+  int maxLayers = 1;
+  for(const auto& out : outputs)
+    if(out.layers > maxLayers)
+      maxLayers = out.layers;
+  const int mvCount = n.descriptor().multiview_count;
+  const bool wantMultiview
+      = mvCount >= 2 && renderer.state.caps.multiview;
+  if(wantMultiview && mvCount > maxLayers)
+    maxLayers = mvCount;
+
+  // MSAA uniform across colour attachments — pick the max SAMPLES declared
+  // by any OUTPUT and apply it to the render pass. Allocated textures stay
+  // single-sample and serve as MSAA resolve targets (see SimpleRenderedISF
+  // initMRTPass for the full rationale).
+  int mrtSamples = std::max(renderer.samples(), 1);
+  for(const auto& out : outputs)
+    mrtSamples = std::max(mrtSamples, out.samples);
+
+  // Allocate colour + depth textures per declared OUTPUT. Unknown / empty
+  // FORMAT falls back to RGBA8 (colour) or D32F (depth). `type: "depth"`
+  // skips the standard depth-renderbuffer path and uses this texture as
+  // the depth attachment — required for shadow-map passes that want to
+  // sample the depth array downstream.
+  std::vector<QRhiTexture*> colorTextures;
+  QRhiTexture* depthTex = nullptr;
+
+  // Resolve the colour-attachment index of the PER_MIP / PER_CUBE_FACE
+  // target up-front (walk order matches the colorTextures[] we're
+  // about to build) so the allocation pass can OR in the matching
+  // flag only for that texture.
+  const int perMipColorIdx
+      = (m_executionMode == ExecutionMode::PerMip) ? m_perMipOutputIndex
+                                                   : -1;
+  const int perCubeFaceColorIdx
+      = (m_executionMode == ExecutionMode::PerCubeFace)
+            ? m_perCubeFaceOutputIndex
+            : -1;
+  int colorAllocIdx = 0;
+  // Reset the cube-copy shim state; (re)assigned below when an output
+  // with CUBEMAP:true + MULTIVIEW:N is encountered.
+  m_cubeCopyOutputIdx = -1;
+  m_cubeCopyShadowArray = nullptr;
+  m_cubeCopyCube = nullptr;
+
+  for(const auto& out : outputs)
   {
-    const auto& desc = n.descriptor();
-    m_auxiliarySSBOs.clear();
-    m_auxiliarySSBOs.reserve(desc.auxiliary.size());
-    for(const auto& aux : desc.auxiliary)
+    if(out.type == "depth")
     {
-      AuxiliarySSBO ssbo;
-      ssbo.name = aux.name;
-      ssbo.access = aux.access;
-
-      // Try to find a matching auxiliary buffer from upstream geometry
-      if(geometry.meshes && !geometry.meshes->meshes.empty())
+      auto depthFmt = score::gfx::parseOutputFormat(out.format, QRhiTexture::D32F);
+      QRhiTexture::Flags dflags = QRhiTexture::RenderTarget;
+      if(maxLayers > 1)
       {
-        const auto& mesh = geometry.meshes->meshes[0];
-        if(auto* geo_aux = mesh.find_auxiliary(ssbo.name))
-        {
-          if(geo_aux->buffer >= 0 && geo_aux->buffer < (int)mesh.buffers.size())
-          {
-            const auto& geo_buf = mesh.buffers[geo_aux->buffer];
-            if(auto* gpu = ossia::get_if<ossia::geometry::gpu_buffer>(&geo_buf.data))
-            {
-              if(gpu->handle)
-              {
-                ssbo.buffer = static_cast<QRhiBuffer*>(gpu->handle);
-                ssbo.size = geo_aux->byte_size > 0 ? geo_aux->byte_size : gpu->byte_size;
-                ssbo.owned = false;
-              }
-            }
-            else if(auto* cpu = ossia::get_if<ossia::geometry::cpu_buffer>(&geo_buf.data))
-            {
-              if(cpu->raw_data && cpu->byte_size > 0)
-              {
-                int64_t sz = geo_aux->byte_size > 0 ? geo_aux->byte_size : cpu->byte_size;
-                auto* buf = rhi.newBuffer(
-                    QRhiBuffer::Immutable, QRhiBuffer::StorageBuffer, sz);
-                buf->setName(QByteArray("RRP_aux_") + ssbo.name.c_str());
-                buf->create();
-                res.uploadStaticBuffer(buf, 0, sz, cpu->raw_data.get());
-
-                ssbo.buffer = buf;
-                ssbo.size = sz;
-                ssbo.owned = true;
-              }
-            }
-          }
-        }
+        dflags |= QRhiTexture::TextureArray;
+        depthTex = rhi.newTextureArray(depthFmt, maxLayers, sz, 1, dflags);
+      }
+      else
+      {
+        depthTex = rhi.newTexture(depthFmt, sz, 1, dflags);
+      }
+      depthTex->setName(
+          ("RenderedRawRasterPipelineNode::MRT::depth::" + out.name).c_str());
+      SCORE_ASSERT(depthTex->create());
+    }
+    else
+    {
+      auto fmt = score::gfx::parseOutputFormat(out.format, QRhiTexture::RGBA8);
+      QRhiTexture::Flags flags
+          = QRhiTexture::RenderTarget | QRhiTexture::UsedWithLoadStore;
+      const int layers
+          = std::max({1, out.layers, (wantMultiview ? mvCount : 1),
+                       (out.is_cubemap ? 6 : 1)});
+      // PER_MIP: flag the target output so QRhi allocates the full mip
+      // chain. Downstream consumers that care about the mips (prefilter
+      // sampling keyed on roughness) need them, and the per-mip render
+      // targets built below attach individual levels.
+      if(colorAllocIdx == perMipColorIdx)
+        flags |= QRhiTexture::MipMapped;
+
+      // GENERATE_MIPS: MipMapped allocation + UsedWithGenerateMips flag
+      // so QRhi's generateMips() can filter the base level into the
+      // sub-mips at end-of-frame. Orthogonal to PER_MIP (which provides
+      // shader-authored per-mip content) — we just need the storage
+      // shape + the capability bit.
+      if(out.generate_mips)
+        flags |= QRhiTexture::MipMapped | QRhiTexture::UsedWithGenerateMips;
+      QRhiTexture* tex = nullptr;
+
+      // Transparent CUBEMAP + MULTIVIEW path. QRhi forbids multiview on
+      // a cube texture (qrhi.cpp:2561-2565), so we render into a
+      // `UsedAsTransferSource`-tagged 2D TextureArray (what multiview
+      // accepts) and stamp a separate CubeMap alongside for downstream
+      // sampling. After the render pass ends we copyTexture each array
+      // layer into the matching cube face — downstream sees a real
+      // samplerCube without the shader having to know about it.
+      // Only one output gets the cube-copy treatment in this first cut
+      // (multiview already amortises 6× render amplification for free).
+      const bool wantCubeCopy
+          = out.is_cubemap && wantMultiview && m_cubeCopyOutputIdx < 0;
+
+      // PER_CUBE_FACE target: allocate as a real CubeMap (6 implicit
+      // layers). setLayer(face) per per-face render target drives each
+      // loop iteration. Mutually exclusive with the multiview-cube-copy
+      // shim above: PER_CUBE_FACE assumes you want the 6-pass behaviour
+      // explicitly; multiview would collapse the 6 passes back into 1.
+      const bool useCubeDirect
+          = (colorAllocIdx == perCubeFaceColorIdx)
+            || (out.is_cubemap && !wantMultiview);
+
+      if(wantCubeCopy)
+      {
+        // Cubemaps must have square faces in QRhi / Vulkan (CUBE_COMPATIBLE
+        // images require extent.width == extent.height). When the render
+        // target size is non-square (typical window aspect), the cube we
+        // hand downstream would otherwise be non-cubemap-compatible and
+        // produce stripe-like artefacts from the copy/sample stride
+        // mismatch. Force the cube face to min(w, h); the shadow array is
+        // sized to match so the multiview draw writes the full face.
+        const int face_edge = std::min(sz.width(), sz.height());
+        const QSize cubeSz(face_edge, face_edge);
+
+        // The rendered-to shadow array. Multiview-compatible shape, square
+        // (matches the cube). UsedAsTransferSource so it can be a
+        // copyTexture source.
+        QRhiTexture::Flags arrayFlags = flags | QRhiTexture::TextureArray
+                                        | QRhiTexture::UsedAsTransferSource;
+        tex = rhi.newTextureArray(fmt, 6, cubeSz, 1, arrayFlags);
+        tex->setName(
+            ("RRPNode::MRT::cubeCopyArray::" + out.name).c_str());
+        SCORE_ASSERT(tex->create());
+        m_cubeCopyShadowArray = tex;
+
+        // The downstream-visible cube. Same format, no RenderTarget
+        // flag (we never render into it directly, only copy). Default
+        // access is sampled/transfer-dst — enough for the classic
+        // consumer path (samplerCube). MipMapped is forwarded so a
+        // future prefilter chain can be generated downstream if the
+        // user also requested it on this output. UsedWithGenerateMips
+        // lets the end-of-frame generateMips() hit the public cube
+        // (the shadow array isn't sampled downstream so it doesn't
+        // need the flag itself).
+        QRhiTexture::Flags cubeFlags = QRhiTexture::CubeMap;
+        if(flags & QRhiTexture::MipMapped)
+          cubeFlags |= QRhiTexture::MipMapped;
+        if(out.generate_mips)
+          cubeFlags |= QRhiTexture::UsedWithGenerateMips;
+        QRhiTexture* cube = rhi.newTexture(fmt, cubeSz, 1, cubeFlags);
+        cube->setName(
+            ("RRPNode::MRT::cubeCopyCube::" + out.name).c_str());
+        SCORE_ASSERT(cube->create());
+        m_cubeCopyCube = cube;
+        m_cubeCopyOutputIdx = colorAllocIdx;
+      }
+      else if(useCubeDirect)
+      {
+        flags |= QRhiTexture::CubeMap;
+        // QRhi: a cubemap is allocated via newTexture (not newTextureArray)
+        // — its 6 faces are implicit when the CubeMap flag is set. A cube
+        // array (multiple cubes) would need newTextureArray + CubeMap, but
+        // we only cover single-cube here.
+        tex = rhi.newTexture(fmt, sz, 1, flags);
+      }
+      else if(layers > 1)
+      {
+        flags |= QRhiTexture::TextureArray;
+        tex = rhi.newTextureArray(fmt, layers, sz, 1, flags);
+      }
+      else
+      {
+        tex = rhi.newTexture(fmt, sz, 1, flags);
       }
 
-      m_auxiliarySSBOs.push_back(std::move(ssbo));
+      if(!wantCubeCopy)
+      {
+        tex->setName(
+            ("RRPNode::MRT::color::" + out.name).c_str());
+        SCORE_ASSERT(tex->create());
+      }
+      colorTextures.push_back(tex);
+      ++colorAllocIdx;
     }
   }
 
-  if(!m_mesh)
-    return;
-
-  // Create the passes
-  for(Edge* edge : n.output[0]->edges)
+  // Render-target variant picked from the shape of the declared outputs.
+  // Raw Raster always ships with depth test/write (3D geometry invariant),
+  // so on the common colour-only path we still synthesise a depth target
+  // if the shader didn't declare one explicitly.
+  if(colorTextures.empty() && depthTex)
   {
-    auto rt = renderer.renderTargetForOutput(*edge);
-    if(rt.renderTarget)
+    // Depth-only shader (e.g. shadow_cascades.frag). Build the RT AROUND the
+    // node-owned depth texture (possibly a TextureArray) instead of letting
+    // the helper allocate one and then deleting it while the render pass
+    // still references it (use-after-free + never-rendered output texture).
+    m_mrtRenderTarget = createDepthOnlyRenderTarget(
+        renderer.state, depthTex, mrtSamples, /*samplableDepth=*/true);
+  }
+  else if(wantMultiview && !colorTextures.empty())
+  {
+    // Allocate depth for the multiview RT if the shader didn't declare
+    // one — createMultiViewRenderTarget expects a matching layered depth
+    // or nullptr. Layered depth is cheaper and Vulkan-correct for MV.
+    if(!depthTex)
     {
-      initPass(rt, renderer, *edge);
+      depthTex = rhi.newTextureArray(
+          QRhiTexture::D32F, mvCount, sz, 1,
+          QRhiTexture::RenderTarget | QRhiTexture::TextureArray);
+      depthTex->setName(
+          "RenderedRawRasterPipelineNode::MRT::depthTextureArray (D32F)");
+      SCORE_ASSERT(depthTex->create());
     }
+    // Attach ALL color textures so attachments == pipeline blend targets.
+    m_mrtRenderTarget = createMultiViewRenderTarget(
+        renderer.state,
+        std::span<QRhiTexture* const>{colorTextures.data(), colorTextures.size()},
+        mvCount, depthTex, mrtSamples);
   }
-}
-
-bool RenderedRawRasterPipelineNode::updateMaterials(
-    RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge)
-{
-  bool mustRecreatePasses = false;
-  // Update audio textures
-  if(!n.m_audio_textures.empty() && !m_audioTex)
+  else if(maxLayers > 1 && !colorTextures.empty())
   {
-    m_audioTex.emplace();
+    // Layered but not multiview — render to layer 0 by default; downstream
+    // per-pass LAYER selection (once PASSES loop lands) will pick others.
+    // Attach ALL color textures so attachments == pipeline blend targets.
+    m_mrtRenderTarget = createLayeredRenderTarget(
+        renderer.state,
+        std::span<QRhiTexture* const>{colorTextures.data(), colorTextures.size()},
+        0, depthTex, mrtSamples);
+  }
+  else if(!colorTextures.empty())
+  {
+    // Plain MRT path — single-sample 2D textures, renderbuffer depth if
+    // the shader didn't ask for a samplable depth OUTPUT.
+    if(depthTex)
+    {
+      m_mrtRenderTarget = createRenderTarget(
+          renderer.state,
+          std::span<QRhiTexture* const>{
+              colorTextures.data(), colorTextures.size()},
+          depthTex, mrtSamples);
+    }
+    else
+    {
+      m_mrtRenderTarget.texture = colorTextures[0];
+      for(std::size_t i = 1; i < colorTextures.size(); i++)
+        m_mrtRenderTarget.additionalColorTextures.push_back(colorTextures[i]);
+
+      QList<QRhiColorAttachment> attachments;
+      for(auto* tex : colorTextures)
+        attachments.append(QRhiColorAttachment(tex));
+
+      QRhiTextureRenderTargetDescription desc;
+      desc.setColorAttachments(attachments.begin(), attachments.end());
+
+      // Reverse-Z project rule: D32F float depth. D24 + reverse-Z is strictly
+      // worse than standard-Z. Stencil dropped (unused elsewhere).
+      m_mrtRenderTarget.depthTexture = rhi.newTexture(
+          QRhiTexture::D32F, sz, renderer.samples(),
+          QRhiTexture::RenderTarget);
+      m_mrtRenderTarget.depthTexture->setName(
+          "RenderedRawRasterPipelineNode::MRT::depthTexture (D32F)");
+      SCORE_ASSERT(m_mrtRenderTarget.depthTexture->create());
+      desc.setDepthTexture(m_mrtRenderTarget.depthTexture);
+
+      auto* renderTarget = rhi.newTextureRenderTarget(desc);
+      renderTarget->setName("RenderedRawRasterPipelineNode::MRT::renderTarget");
+      SCORE_ASSERT(renderTarget);
+
+      auto* renderPass = renderTarget->newCompatibleRenderPassDescriptor();
+      renderPass->setName("RenderedRawRasterPipelineNode::MRT::renderPass");
+      SCORE_ASSERT(renderPass);
+
+      renderTarget->setRenderPassDescriptor(renderPass);
+      SCORE_ASSERT(renderTarget->create());
+
+      m_mrtRenderTarget.renderTarget = renderTarget;
+      m_mrtRenderTarget.renderPass = renderPass;
+    }
+  }
+  else
+  {
+    return;
   }
 
-  bool audioChanged = false;
-  for(auto& audio : n.m_audio_textures)
+  // PER_CUBE_FACE: build one render target per cube face, each
+  // attaching the same cube texture via setLayer(i). Mirrors the
+  // PER_MIP path structurally (iteration over a fixed axis with a
+  // distinct per-iteration RT) but with a CubeMap target instead of
+  // a MipMapped one. m_mipRTs reused as storage (semantics: index =
+  // face in this mode, mip level in PER_MIP mode). MUTUALLY EXCLUSIVE
+  // with PER_MIP — PER_CUBE_FACE_MIP would require a 2D iteration
+  // and isn't supported here; compose via external looping if needed.
+  if(m_executionMode == ExecutionMode::PerCubeFace
+     && m_perCubeFaceOutputIndex >= 0 && !colorTextures.empty())
   {
-    if(std::optional<Sampler> sampl
-       = m_audioTex->updateAudioTexture(audio, renderer, n.m_material_data.get(), res))
+    QRhiTexture* targetTex
+        = (m_perCubeFaceOutputIndex == 0)
+              ? m_mrtRenderTarget.texture
+              : (m_perCubeFaceOutputIndex - 1
+                         < (int)m_mrtRenderTarget.additionalColorTextures.size()
+                     ? m_mrtRenderTarget.additionalColorTextures
+                           [m_perCubeFaceOutputIndex - 1]
+                     : nullptr);
+
+    if(targetTex)
     {
-      // Texture changed -> material changed
-      audioChanged = true;
+      m_mipCount = 6;  // m_mipCount stores invocation count for the loop
+      m_mipRTs.reserve(6);
+      const QSize faceSize = targetTex->pixelSize();
 
-      auto& [rhiSampler, tex] = *sampl;
-      for(auto& [e, pass] : m_passes)
+      for(int face = 0; face < 6; ++face)
       {
-        score::gfx::replaceTexture(
-            *pass.p.srb, rhiSampler, tex ? tex : &renderer.emptyTexture());
+        QRhiColorAttachment color(targetTex);
+        color.setLayer(face);
+        // No multiview here: PER_CUBE_FACE opts into per-pass cube
+        // rendering explicitly. Multiview + cubemap is forbidden by
+        // QRhi anyway.
+
+        QRhiTexture* faceDepth = rhi.newTexture(
+            QRhiTexture::D32F, faceSize, 1, QRhiTexture::RenderTarget);
+        faceDepth->setName(
+            ("RRPNode::MRT::perCubeFaceDepth::"
+             + std::to_string(face))
+                .c_str());
+        SCORE_ASSERT(faceDepth->create());
+
+        QRhiTextureRenderTargetDescription faceDesc;
+        faceDesc.setColorAttachments({color});
+        faceDesc.setDepthTexture(faceDepth);
+
+        auto* faceRT = rhi.newTextureRenderTarget(faceDesc);
+        faceRT->setName(
+            ("RRPNode::MRT::perCubeFaceRT::"
+             + std::to_string(face))
+                .c_str());
+        auto* faceRP = faceRT->newCompatibleRenderPassDescriptor();
+        faceRP->setName(
+            ("RRPNode::MRT::perCubeFaceRP::"
+             + std::to_string(face))
+                .c_str());
+        faceRT->setRenderPassDescriptor(faceRP);
+        SCORE_ASSERT(faceRT->create());
+
+        MipRT entry;
+        entry.renderTarget = faceRT;
+        entry.renderPass = faceRP;
+        entry.depth = faceDepth;
+        m_mipRTs.push_back(entry);
       }
     }
-  }
-
-  // Update material
-  if(m_materialUBO && m_materialSize > 0 && (materialChanged || audioChanged))
-  {
-    char* data = n.m_material_data.get();
-    SCORE_ASSERT(m_materialSize >= size_of_pipeline_material);
-    if(std::memcmp(data, this->m_prevPipelineChangingMaterial, size_of_pipeline_material)
-       != 0)
+    else
     {
-      mustRecreatePasses = true;
-      std::copy_n(data, size_of_pipeline_material, this->m_prevPipelineChangingMaterial);
+      qWarning() << "RawRaster EXECUTION_MODEL=PER_CUBE_FACE: could not "
+                    "resolve target texture — falling back to SINGLE";
+      m_executionMode = ExecutionMode::Single;
     }
-    res.updateDynamicBuffer(m_materialUBO, 0, m_materialSize, data);
   }
-  return mustRecreatePasses;
-}
-
-void RenderedRawRasterPipelineNode::update(
-    RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge)
-{
-  // Update node materials. This must be before any initial return,
-  // otherwise we miss the materialsChanged
-  bool mustRecreatePasses = updateMaterials(renderer, res, edge);
-  bool recreateDueToMaterial = mustRecreatePasses;
-
-  // Update the geometry (sync with ModelDisplayNode)
 
-  if(this->geometryChanged)
+  // PER_MIP: build one render target per mip level of the target output,
+  // each attaching that specific level via setLevel(i). The draw loop in
+  // runInitialPasses() iterates these in order, injecting the mip index
+  // via ProcessUBO.passIndex. Multiview propagates: when the shader
+  // declared MULTIVIEW:6 (irradiance / prefilter cube case), each mip's
+  // attachment also carries setMultiViewCount(6) so one draw writes all
+  // six faces of that mip. Depth is a per-mip single-sample D32F to
+  // keep the pipeline's render-pass contract consistent across levels.
+  if(m_executionMode == ExecutionMode::PerMip && m_perMipOutputIndex >= 0
+     && !colorTextures.empty())
   {
-    if(geometry.meshes)
+    QRhiTexture* targetTex
+        = (m_perMipOutputIndex == 0)
+              ? m_mrtRenderTarget.texture
+              : (m_perMipOutputIndex - 1
+                         < (int)m_mrtRenderTarget.additionalColorTextures.size()
+                     ? m_mrtRenderTarget.additionalColorTextures
+                           [m_perMipOutputIndex - 1]
+                     : nullptr);
+
+    if(targetTex)
     {
-      const Mesh* prevMesh = m_mesh;
-      std::tie(m_mesh, m_meshbufs)
-          = renderer.acquireMesh(geometry, res, m_mesh, m_meshbufs);
-
-      this->meshChangedIndex = this->m_mesh->dirtyGeometryIndex;
+      QSize baseSize = targetTex->pixelSize();
+      int mipCount = 1;
+      {
+        int s = std::min(baseSize.width(), baseSize.height());
+        while(s > 1)
+        {
+          s >>= 1;
+          ++mipCount;
+        }
+      }
+      m_mipCount = mipCount;
+      m_mipRTs.reserve(mipCount);
 
-#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0)
-      // Check for standalone indirect draw buffer from Buffer input ports
-      if(!m_meshbufs.useIndirectDraw)
+      for(int i = 0; i < mipCount; ++i)
       {
-        for(auto* port : n.input)
+        QSize mipSize(
+            std::max(1, baseSize.width() >> i),
+            std::max(1, baseSize.height() >> i));
+
+        QRhiColorAttachment color(targetTex);
+        color.setLevel(i);
+#if QT_VERSION >= QT_VERSION_CHECK(6, 7, 0)
+        if(wantMultiview)
+          color.setMultiViewCount(mvCount);
+#endif
+
+        // Depth must match multiview shape: a plain 2D texture as the
+        // depth attachment against a multiview color attachment fails
+        // QRhi's render-pass compat check. Allocate a layered depth for
+        // the multiview case, plain 2D otherwise. Each mip gets its own
+        // depth because the attachment size must match the colour
+        // attachment's mip-i pixel size.
+        QRhiTexture* mipDepth = nullptr;
+        if(wantMultiview)
         {
-          if(port->type == Types::Buffer && !port->edges.empty())
-          {
-            auto bv = renderer.bufferForInput(*port->edges.front());
-            if(bv.usage == BufferView::Usage::IndirectDraw)
-            {
-              m_meshbufs.indirectDrawBuffer = bv.handle;
-              m_meshbufs.useIndirectDraw = true;
-              m_meshbufs.indirectDrawIndexed = false;
+          mipDepth = rhi.newTextureArray(
+              QRhiTexture::D32F, mvCount, mipSize, 1,
+              QRhiTexture::RenderTarget | QRhiTexture::TextureArray);
+        }
+        else
+        {
+          mipDepth = rhi.newTexture(
+              QRhiTexture::D32F, mipSize, 1, QRhiTexture::RenderTarget);
+        }
+        mipDepth->setName(
+            ("RenderedRawRasterPipelineNode::MRT::perMipDepth::"
+             + std::to_string(i))
+                .c_str());
+        SCORE_ASSERT(mipDepth->create());
+
+        QRhiTextureRenderTargetDescription mipDesc;
+        mipDesc.setColorAttachments({color});
+        mipDesc.setDepthTexture(mipDepth);
+
+        auto* mipRT = rhi.newTextureRenderTarget(mipDesc);
+        mipRT->setName(
+            ("RenderedRawRasterPipelineNode::MRT::perMipRT::"
+             + std::to_string(i))
+                .c_str());
+        auto* mipRP = mipRT->newCompatibleRenderPassDescriptor();
+        mipRP->setName(
+            ("RenderedRawRasterPipelineNode::MRT::perMipRP::"
+             + std::to_string(i))
+                .c_str());
+        mipRT->setRenderPassDescriptor(mipRP);
+        SCORE_ASSERT(mipRT->create());
+
+        MipRT entry;
+        entry.renderTarget = mipRT;
+        entry.renderPass = mipRP;
+        entry.depth = mipDepth;
+        m_mipRTs.push_back(entry);
+      }
+    }
+    else
+    {
+      qWarning() << "RawRaster EXECUTION_MODEL=PER_MIP: could not resolve "
+                    "target texture — falling back to SINGLE";
+      m_executionMode = ExecutionMode::Single;
+    }
+  }
+
+  // PER_LAYER: build one render target per layer of the target output's
+  // TextureArray (or copy strategy for depth targets — see below). The
+  // draw loop in runInitialPasses() iterates them in order, injecting
+  // the layer index via ProcessUBO.passIndex. Drives shadow_cascades.
+  //
+  // Two paths depending on target type:
+  //
+  //   - COLOR target: same shape as PER_CUBE_FACE with a variable layer
+  //     count. m_mipRTs holds N entries, each with QRhiColorAttachment
+  //     bound via setLayer(i). Per-layer 2D depth (one D32F per slice)
+  //     keeps the render-pass attachment shapes consistent.
+  //
+  //   - DEPTH target: Qt RHI 6.11 has no per-layer depth-attachment API
+  //     (QRhiTextureRenderTargetDescription::setDepthTexture takes a
+  //     QRhiTexture* with no layer overload). We render to a single
+  //     shared scratch 2D D32F and copy it into layer i of the OUTPUT
+  //     depth array after each iteration's endPass. The scratch is
+  //     UsedAsTransferSource so the per-iteration copyTexture works.
+  if(m_executionMode == ExecutionMode::PerLayer && m_perLayerOutputIndex >= 0)
+  {
+    const auto& targetOut = outputs[m_perLayerOutputIndex];
+    const int layerCount = std::max(1, targetOut.layers);
+
+    if(m_perLayerIsDepth)
+    {
+      // depthTex is the OUTPUT array (allocated as Texture2DArray
+      // earlier when maxLayers > 1). m_perLayerOutputDepthArray
+      // aliases it for the post-pass copy destination.
+      if(depthTex && layerCount > 1)
+      {
+        m_perLayerOutputDepthArray = depthTex;
+
+        const auto depthFmt = depthTex->format();
+        m_perLayerScratchDepth = rhi.newTexture(
+            depthFmt, sz, 1,
+            QRhiTexture::RenderTarget | QRhiTexture::UsedAsTransferSource);
+        m_perLayerScratchDepth->setName(
+            ("RRPNode::MRT::perLayerScratch::" + targetOut.name).c_str());
+        SCORE_ASSERT(m_perLayerScratchDepth->create());
+
+        // Mirror createDepthOnlyRenderTarget's attachment shape so the
+        // pipeline (created against m_mrtRenderTarget.renderPass, which
+        // came from createDepthOnlyRenderTarget) is render-pass-
+        // compatible with our shared RT. That helper attaches a 1×1
+        // dummy RGBA8 color alongside the depth — required by GLES
+        // backends and harmless on desktop. We allocate our own dummy
+        // (rather than borrowing m_mrtRenderTarget.dummyColorTexture,
+        // whose lifetime is owned by m_mrtRenderTarget) so the shared
+        // RT here owns a self-contained set of attachments.
+        m_perLayerDummyColor = rhi.newTexture(
+            QRhiTexture::RGBA8, QSize(1, 1), 1, QRhiTexture::RenderTarget);
+        m_perLayerDummyColor->setName(
+            ("RRPNode::MRT::perLayerDummyColor::" + targetOut.name).c_str());
+        SCORE_ASSERT(m_perLayerDummyColor->create());
+
+        QRhiTextureRenderTargetDescription scratchDesc;
+        {
+          QRhiColorAttachment color0(m_perLayerDummyColor);
+          scratchDesc.setColorAttachments({color0});
+        }
+        scratchDesc.setDepthTexture(m_perLayerScratchDepth);
+
+        m_perLayerSharedRT = rhi.newTextureRenderTarget(scratchDesc);
+        m_perLayerSharedRT->setName(
+            ("RRPNode::MRT::perLayerSharedRT::" + targetOut.name).c_str());
+        m_perLayerSharedRP
+            = m_perLayerSharedRT->newCompatibleRenderPassDescriptor();
+        m_perLayerSharedRP->setName(
+            ("RRPNode::MRT::perLayerSharedRP::" + targetOut.name).c_str());
+        m_perLayerSharedRT->setRenderPassDescriptor(m_perLayerSharedRP);
+        SCORE_ASSERT(m_perLayerSharedRT->create());
+
+        m_mipCount = layerCount;  // reuse for invocation count
+      }
+      else
+      {
+        qDebug()
+            << "RawRaster EXECUTION_MODEL=PER_LAYER: depth target"
+            << QString::fromStdString(targetOut.name)
+            << "needs LAYERS > 1 — falling back to SINGLE";
+        m_executionMode = ExecutionMode::Single;
+      }
+    }
+    else
+    {
+      // Color path. Resolve the colour-attachment index from the raw
+      // outputs[] index (depth entries don't take a colour slot).
+      int colorIdx = 0;
+      for(int j = 0; j < m_perLayerOutputIndex; ++j)
+        if(outputs[j].type != "depth")
+          ++colorIdx;
+
+      QRhiTexture* targetTex
+          = (colorIdx == 0)
+                ? m_mrtRenderTarget.texture
+                : (colorIdx - 1
+                           < (int)m_mrtRenderTarget.additionalColorTextures.size()
+                       ? m_mrtRenderTarget.additionalColorTextures[colorIdx - 1]
+                       : nullptr);
+
+      if(targetTex && layerCount > 1)
+      {
+        const QSize layerSize = targetTex->pixelSize();
+        m_mipCount = layerCount;
+        m_mipRTs.reserve(layerCount);
+
+        for(int layer = 0; layer < layerCount; ++layer)
+        {
+          QRhiColorAttachment color(targetTex);
+          color.setLayer(layer);
+
+          // Per-layer 2D depth — same rationale as PER_CUBE_FACE: depth
+          // attachment size must match the colour attachment, and a
+          // layered depth here would force multi-view shape against a
+          // single-layer colour binding.
+          QRhiTexture* layerDepth = rhi.newTexture(
+              QRhiTexture::D32F, layerSize, 1, QRhiTexture::RenderTarget);
+          layerDepth->setName(
+              ("RRPNode::MRT::perLayerDepth::" + std::to_string(layer))
+                  .c_str());
+          SCORE_ASSERT(layerDepth->create());
+
+          QRhiTextureRenderTargetDescription layerDesc;
+          layerDesc.setColorAttachments({color});
+          layerDesc.setDepthTexture(layerDepth);
+
+          auto* layerRT = rhi.newTextureRenderTarget(layerDesc);
+          layerRT->setName(
+              ("RRPNode::MRT::perLayerRT::" + std::to_string(layer))
+                  .c_str());
+          auto* layerRP = layerRT->newCompatibleRenderPassDescriptor();
+          layerRP->setName(
+              ("RRPNode::MRT::perLayerRP::" + std::to_string(layer))
+                  .c_str());
+          layerRT->setRenderPassDescriptor(layerRP);
+          SCORE_ASSERT(layerRT->create());
+
+          MipRT entry;
+          entry.renderTarget = layerRT;
+          entry.renderPass = layerRP;
+          entry.depth = layerDepth;
+          m_mipRTs.push_back(entry);
+        }
+      }
+      else
+      {
+        qDebug()
+            << "RawRaster EXECUTION_MODEL=PER_LAYER: colour target"
+            << QString::fromStdString(targetOut.name)
+            << "needs LAYERS > 1 and a resolved texture — falling back"
+               " to SINGLE";
+        m_executionMode = ExecutionMode::Single;
+      }
+    }
+  }
+
+  // Create the pipeline
+  QRhiBuffer* pubo = rhi.newBuffer(
+      QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, sizeof(ProcessUBO));
+  pubo->setName("RenderedRawRasterPipelineNode::initMRTPass::pubo");
+  pubo->create();
+
+  try
+  {
+    auto [v, s] = score::gfx::makeShaders(renderer.state, n.m_vertexS, n.m_fragmentS);
+
+    auto& mat
+        = *reinterpret_cast<PipelineChangingMaterial*>(m_prevPipelineChangingMaterial);
+
+    int max_binding = 3;
+    auto samplers = allSamplers();
+    if(!samplers.empty())
+      max_binding += samplers.size();
+
+    // Build additional bindings: auxiliary SSBOs + model UBO
+    const auto bindingStages = QRhiShaderResourceBinding::StageFlag::VertexStage
+                               | QRhiShaderResourceBinding::StageFlag::FragmentStage;
+
+    ossia::small_vector<QRhiShaderResourceBinding, 4> additionalBindings;
+
+    // INPUTS storage trio (storage_input SSBO / csf_image_input image2D /
+    // uniform_input UBO) — order MUST match isf_emit_graphics_storage's
+    // GLSL emission (declaration order, sequential bindings starting at
+    // max_binding == 3 + samplers count).
+    {
+      auto extras = buildExtraBindings(m_storage);
+      for(const auto& b : extras)
+      {
+        additionalBindings.push_back(b);
+        max_binding++;
+      }
+    }
+
+    for(auto& aux : m_auxiliarySSBOs)
+    {
+      // Dummy usage flag matches the aux kind so the created buffer can be
+      // bound as the intended descriptor type (UBO for uniform_input, SSBO
+      // otherwise). Mirrors the non-MRT path.
+      if(!aux.buffer)
+      {
+        auto usage = aux.is_uniform ? QRhiBuffer::UniformBuffer
+                                    : QRhiBuffer::StorageBuffer;
+        const int64_t dummySize = aux.is_uniform ? 256 : 16;
+        auto* dummy = rhi.newBuffer(QRhiBuffer::Immutable, usage, dummySize);
+        dummy->setName(aux.is_uniform ? "RRP_ubo_dummy" : "RRP_aux_dummy");
+        dummy->create();
+        aux.buffer = dummy;
+        aux.size = dummySize;
+        aux.owned = true;
+      }
+
+      // Persistent ping-pong: <name>_prev (readonly) goes first.
+      if(aux.persistent && aux.prev_buffer)
+      {
+        additionalBindings.push_back(
+            QRhiShaderResourceBinding::bufferLoad(
+                max_binding, bindingStages, aux.prev_buffer));
+        aux.prev_binding = max_binding;
+        max_binding++;
+      }
+
+      QRhiShaderResourceBinding binding;
+      if(aux.is_uniform)
+      {
+        // uniform_input → std140 UBO binding
+        binding = QRhiShaderResourceBinding::uniformBuffer(
+            max_binding, bindingStages, aux.buffer);
+      }
+      else if(aux.access == "read_only")
+        binding = QRhiShaderResourceBinding::bufferLoad(
+            max_binding, bindingStages, aux.buffer);
+      else if(aux.access == "write_only")
+        binding = QRhiShaderResourceBinding::bufferStore(
+            max_binding, bindingStages, aux.buffer);
+      else
+        binding = QRhiShaderResourceBinding::bufferLoadStore(
+            max_binding, bindingStages, aux.buffer);
+
+      additionalBindings.push_back(binding);
+      aux.binding = max_binding;  // remember slot for per-sub-mesh patching
+      max_binding++;
+    }
+
+    // Auxiliary texture / storage-image bindings (MRT path). Same
+    // is_storage dispatch as the non-MRT site.
+    for(auto& ats : m_auxTextureSamplers)
+    {
+      QRhiShaderResourceBinding b;
+      if(ats.is_storage)
+      {
+        if(ats.access == "read_only")
+          b = QRhiShaderResourceBinding::imageLoad(
+              max_binding, bindingStages, ats.texture, 0);
+        else if(ats.access == "write_only")
+          b = QRhiShaderResourceBinding::imageStore(
+              max_binding, bindingStages, ats.texture, 0);
+        else
+          b = QRhiShaderResourceBinding::imageLoadStore(
+              max_binding, bindingStages, ats.texture, 0);
+      }
+      else
+      {
+        b = QRhiShaderResourceBinding::sampledTexture(
+            max_binding, bindingStages, ats.texture, ats.sampler);
+      }
+      additionalBindings.push_back(b);
+      ats.binding = max_binding;
+      max_binding++;
+    }
+
+    additionalBindings.push_back(QRhiShaderResourceBinding::uniformBuffer(
+        max_binding, bindingStages, m_modelUBO));
+
+    auto bindings = createDefaultBindings(
+        renderer, m_mrtRenderTarget, pubo, m_materialUBO, allSamplers(),
+        std::span<QRhiShaderResourceBinding>(
+            additionalBindings.data(), additionalBindings.size()));
+
+    auto ps = rhi.newGraphicsPipeline();
+    ps->setName("RenderedRawRasterPipelineNode::initMRTPass::ps");
+    SCORE_ASSERT(ps);
+
+    const int rtSamples = m_mrtRenderTarget.sampleCount();
+    const int pipelineSamples = (rtSamples > 0) ? rtSamples : renderer.samples();
+    ps->setSampleCount(pipelineSamples);
+
+    // Multiview: activate the matching view count on the pipeline so that
+    // `gl_ViewIndex` in the shader actually picks up the per-view state
+    // (mat4[] viewProjection etc., emitted by the ISF layer). Must match
+    // the color attachment's setMultiViewCount set in
+    // createMultiViewRenderTarget above.
+#if QT_VERSION >= QT_VERSION_CHECK(6, 7, 0)
+    if(wantMultiview)
+      ps->setMultiViewCount(mvCount);
+#endif
+
+    // preparePipeline sets the vertex-input layout from the mesh's
+    // attributes. Skip for procedural draws (VERTEX_INPUTS: []): the
+    // pipeline has no vertex bindings and the draw uses gl_VertexIndex.
+    if(m_mesh)
+      m_mesh->preparePipeline(*ps);
+
+    const auto& desc = n.m_descriptor;
+    const bool hasDescriptorState = stateAffectsPipeline(desc.default_state);
+
+    if(hasDescriptorState)
+    {
+      // Seed legacy material-UBO blend on every attachment first; applyPipelineState
+      // only overrides BLEND when the shader explicitly declares it.
+      QRhiGraphicsPipeline::TargetBlend seededBlend;
+      seededBlend.enable = mat.enable_blend;
+      seededBlend.srcColor = mat.src_color;
+      seededBlend.dstColor = mat.dst_color;
+      seededBlend.opColor = mat.op_color;
+      seededBlend.srcAlpha = mat.src_alpha;
+      seededBlend.dstAlpha = mat.dst_alpha;
+      seededBlend.opAlpha = mat.op_alpha;
+      QList<QRhiGraphicsPipeline::TargetBlend> seedBlends;
+      for(int i = 0; i < std::max(1, m_mrtRenderTarget.colorAttachmentCount()); i++)
+        seedBlends.append(seededBlend);
+      ps->setTargetBlends(seedBlends.begin(), seedBlends.end());
+      ps->setDepthTest(true);
+      ps->setDepthWrite(true);
+      // Reverse-Z project rule (applyPipelineState overrides only if the
+      // shader explicitly declares depth_compare).
+      ps->setDepthOp(QRhiGraphicsPipeline::Greater);
+
+      const bool depthAvailable
+          = (m_mrtRenderTarget.depthTexture != nullptr)
+            || (m_mrtRenderTarget.depthRenderBuffer != nullptr)
+            || (m_mrtRenderTarget.msDepthTexture != nullptr);
+      applyPipelineState(
+          *ps, desc.default_state, m_mrtRenderTarget.colorAttachmentCount(),
+          depthAvailable, /*wantsDepthByDefault=*/true);
+    }
+    else
+    {
+      // Legacy: material-UBO-driven blend, hardcoded depth.
+      QRhiGraphicsPipeline::TargetBlend premulAlphaBlend;
+      premulAlphaBlend.enable = mat.enable_blend;
+      premulAlphaBlend.srcColor = mat.src_color;
+      premulAlphaBlend.dstColor = mat.dst_color;
+      premulAlphaBlend.opColor = mat.op_color;
+      premulAlphaBlend.srcAlpha = mat.src_alpha;
+      premulAlphaBlend.dstAlpha = mat.dst_alpha;
+      premulAlphaBlend.opAlpha = mat.op_alpha;
+
+      QList<QRhiGraphicsPipeline::TargetBlend> blends;
+      for(int i = 0; i < m_mrtRenderTarget.colorAttachmentCount(); i++)
+        blends.append(premulAlphaBlend);
+      ps->setTargetBlends(blends.begin(), blends.end());
+
+      ps->setDepthTest(true);
+      ps->setDepthWrite(true);
+      // Reverse-Z project rule.
+      ps->setDepthOp(QRhiGraphicsPipeline::Greater);
+    }
+
+    switch(mat.mode)
+    {
+      default:
+      case 0:
+        ps->setTopology(QRhiGraphicsPipeline::Triangles);
+        break;
+      case 1:
+        ps->setTopology(QRhiGraphicsPipeline::Points);
+        break;
+      case 2:
+        ps->setTopology(QRhiGraphicsPipeline::Lines);
+        break;
+    }
+
+    // Remap vertex inputs by semantic (CSF-style; honour explicit
+    // SEMANTIC). Procedural draws have no vertex inputs to remap — skip.
+    // Same fallback-aware path as initPass — "REQUIRED: false" inputs
+    // missing upstream land on a pooled identity buffer.
+    FallbackBindingPlan fallbackPlan;
+    if(m_mesh)
+    {
+      if(auto* geom = m_mesh->semanticGeometry())
+      {
+        if(!remapPipelineVertexInputs(
+               *ps, v, *geom, n.descriptor(),
+               rhi, renderer.vertexFallbackPool(), res, fallbackPlan))
+        {
+          qWarning() << "RawRaster::initMRTPass: remapPipelineVertexInputs FAILED";
+          delete ps;
+          delete pubo;
+          return;
+        }
+      }
+    }
+
+    ps->setShaderStages({{QRhiShaderStage::Vertex, v}, {QRhiShaderStage::Fragment, s}});
+    ps->setShaderResourceBindings(bindings);
+
+    SCORE_ASSERT(m_mrtRenderTarget.renderPass);
+    ps->setRenderPassDescriptor(m_mrtRenderTarget.renderPass);
+
+    if(!ps->create())
+    {
+      qDebug() << "Warning! MRT Pipeline not created";
+      delete ps;
+      ps = nullptr;
+    }
+
+    Pipeline pip = {ps, bindings};
+    if(pip.pipeline)
+    {
+      // nullptr edge — MRT passes are shared across all output edges
+      Pass pass{m_mrtRenderTarget, pip, pubo};
+      pass.fallback_bindings = std::move(fallbackPlan);
+      m_passes.emplace_back(nullptr, std::move(pass));
+    }
+    else
+    {
+      delete pubo;
+    }
+  }
+  catch(...)
+  {
+    delete pubo;
+  }
+}
+
+void RenderedRawRasterPipelineNode::initMRTBlitPass(
+    RenderList& renderer, QRhiResourceUpdateBatch& res, Edge& edge)
+{
+  QRhiTexture* srcTex = textureForOutput(*edge.source);
+  if(!srcTex)
+    return;
+
+  auto rt = renderer.renderTargetForOutput(edge);
+  if(!rt.renderTarget)
+    return;
+
+  auto [vertexS, fragmentS] = score::gfx::makeShaders(renderer.state, rrp_blit_vs, rrp_blit_fs);
+
+  QRhiSampler* sampler = renderer.state.rhi->newSampler(
+      QRhiSampler::Linear, QRhiSampler::Linear, QRhiSampler::None,
+      QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge);
+  sampler->setName("RenderedRawRasterPipelineNode::MRT::blitSampler");
+  sampler->create();
+  m_blitSamplersByEdge[&edge] = sampler;
+
+  auto pip = score::gfx::buildPipeline(
+      renderer, *m_blitMesh, vertexS, fragmentS, rt, nullptr, nullptr,
+      std::array<Sampler, 1>{Sampler{sampler, srcTex}});
+
+  if(pip.pipeline)
+  {
+    m_passes.emplace_back(&edge, Pass{rt, pip, nullptr});
+  }
+  else
+  {
+    m_blitSamplersByEdge.erase(&edge);
+    delete sampler;
+  }
+}
+
+void RenderedRawRasterPipelineNode::initMRTBlitPasses(
+    RenderList& renderer, QRhiResourceUpdateBatch& res)
+{
+  // For each output port, create a blit pass for each downstream edge
+  for(auto* output_port : n.output)
+  {
+    for(Edge* edge : output_port->edges)
+    {
+      initMRTBlitPass(renderer, res, *edge);
+    }
+  }
+}
+
+void RenderedRawRasterPipelineNode::initState(
+    RenderList& renderer, QRhiResourceUpdateBatch& res)
+{
+  QRhi& rhi = *renderer.state.rhi;
+
+  // Create the mesh
+  {
+    if(geometry.meshes)
+    {
+      std::tie(m_mesh, m_meshbufs)
+          = renderer.acquireMesh(geometry, res, m_mesh, m_meshbufs);
+      m_meshbufs.gpuIndirectSupported = renderer.state.caps.drawIndirect;
+    }
+    else
+    {
+      if(m_mesh)
+      {
+        if(m_meshbufs.buffers.empty())
+        {
+          m_meshbufs = renderer.initMeshBuffer(*m_mesh, res);
+          m_meshbufs.gpuIndirectSupported = renderer.state.caps.drawIndirect;
+        }
+      }
+    }
+  }
+
+  // Create the material UBO
+  m_materialSize = n.m_materialSize;
+  if(m_materialSize > 0)
+  {
+    m_materialUBO
+        = rhi.newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, m_materialSize);
+    m_materialUBO->setName("RenderedRawRasterPipelineNode::init::m_materialUBO");
+    SCORE_ASSERT(m_materialUBO->create());
+    if(n.m_material_data)
+      res.updateDynamicBuffer(m_materialUBO, 0, m_materialSize, n.m_material_data.get());
+  }
+
+  m_modelUBO
+      = rhi.newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, sizeof(float[16]));
+  m_modelUBO->setName("RenderedRawRasterPipelineNode::init::m_modelUBO");
+  SCORE_ASSERT(m_modelUBO->create());
+
+  // Create the samplers
+  SCORE_ASSERT(m_passes.empty());
+  SCORE_ASSERT(m_inputSamplers.empty());
+  SCORE_ASSERT(m_audioSamplers.empty());
+
+  m_inputSamplers = initInputSamplers(this->n, renderer, n.input, &n.descriptor());
+
+  // Build the auxiliary-texture binding table and seed initial texture
+  // pointers from the incoming geometry. Walks desc.inputs parallel to
+  // n.input and m_inputSamplers, recording a (sampler_idx, name) pair
+  // for every image-style INPUT that might be served by a geometry aux
+  // texture. update() re-runs the lookup whenever the geometry changes
+  // so rebuilt / grown channel arrays flow through without a cable.
+  bindAuxTexturesInit(renderer);
+
+  m_audioSamplers = initAudioTextures(renderer, n.m_audio_textures);
+
+  // Initialize auxiliary SSBOs from descriptor
+  {
+    const auto& desc = n.descriptor();
+    m_auxiliarySSBOs.clear();
+    m_auxiliarySSBOs.reserve(desc.auxiliary.size() + desc.inputs.size());
+
+    // Resolve a buffer for `ssbo` by looking up its name in the first
+    // incoming geometry's auxiliary_buffer list. Used for the scene-aware
+    // wiring where the upstream ScenePreprocessor publishes scene_lights /
+    // scene_materials / per_draw as named aux buffers travelling with the
+    // geometry edge.
+    auto try_bind_from_geometry = [&](AuxiliarySSBO& ssbo) {
+      if(!geometry.meshes || geometry.meshes->meshes.empty())
+        return;
+      const auto& mesh = geometry.meshes->meshes[0];
+      auto* geo_aux = mesh.find_auxiliary(ssbo.name);
+      if(!geo_aux || geo_aux->buffer < 0
+         || geo_aux->buffer >= (int)mesh.buffers.size())
+        return;
+      const auto& geo_buf = mesh.buffers[geo_aux->buffer];
+      if(auto* gpu = ossia::get_if<ossia::geometry::gpu_buffer>(&geo_buf.data))
+      {
+        if(!gpu->handle)
+          return;
+        ssbo.buffer = static_cast<QRhiBuffer*>(gpu->handle);
+        ssbo.size = geo_aux->byte_size > 0 ? geo_aux->byte_size : gpu->byte_size;
+        ssbo.owned = false;
+      }
+      else if(auto* cpu = ossia::get_if<ossia::geometry::cpu_buffer>(&geo_buf.data))
+      {
+        if(!cpu->raw_data || cpu->byte_size <= 0)
+          return;
+        int64_t sz = geo_aux->byte_size > 0 ? geo_aux->byte_size : cpu->byte_size;
+        // Usage flag must match the aux kind — binding a StorageBuffer-
+        // only buffer as a uniform block (or vice versa) is rejected by
+        // the Vulkan validation layer.
+        const auto usage = ssbo.is_uniform ? QRhiBuffer::UniformBuffer
+                                           : QRhiBuffer::StorageBuffer;
+        auto* buf = rhi.newBuffer(QRhiBuffer::Immutable, usage, sz);
+        buf->setName(QByteArray("RRP_aux_") + ssbo.name.c_str());
+        buf->create();
+        res.uploadStaticBuffer(buf, 0, sz, cpu->raw_data.get());
+        ssbo.buffer = buf;
+        ssbo.size = sz;
+        ssbo.owned = true;
+      }
+    };
+
+    // Resolve a buffer for `ssbo` by scanning the connected input port's
+    // edges for an upstream producer (CSF storage output, ExtractBuffer2,
+    // ScenePreprocessor aux extractors, ...). Upstream renderers publish
+    // their output buffer through the virtual NodeRenderer::bufferForOutput()
+    // — Port::value is never written for buffer-typed outputs — so the
+    // retrieval goes through RenderList::bufferForInput(edge).
+    //
+    // Complements try_bind_from_geometry: an INPUTS-declared storage_input/
+    // uniform_input may be wired through a dedicated Buffer edge instead of
+    // riding along with the geometry. Mirrors
+    // IsfBindingsBuilder::bindUpstreamBuffers, which SimpleRenderedISFNode
+    // uses for non-RawRaster shaders.
+    auto try_bind_from_input_port = [&](AuxiliarySSBO& ssbo) {
+      if(ssbo.input_port_index < 0
+         || ssbo.input_port_index >= (int)n.input.size())
+        return;
+      Port* port = n.input[ssbo.input_port_index];
+      if(!port || port->type != Types::Buffer)
+        return;
+      for(Edge* edge : port->edges)
+      {
+        if(!edge || !edge->source)
+          continue;
+        if(edge->source->type != Types::Buffer)
+          continue;
+        auto view = renderer.bufferForInput(*edge);
+        if(!view.handle)
+          continue;
+        ssbo.buffer = view.handle;
+        if(ssbo.size <= 0)
+          ssbo.size = view.handle->size();
+        ssbo.owned = false;
+        break;
+      }
+    };
+
+    // Compute the byte size required by a LAYOUT. Used when we need to
+    // own the buffer (persistent aux). Flexible array members use `size`
+    // as the element count (falls back to 1 if unspecified).
+    auto aux_owned_size = [](const isf::geometry_input::auxiliary_request& aux) -> int64_t {
+      int64_t total = 0;
+      int64_t arr_elem_bytes = 0;
+      for(const auto& f : aux.layout)
+      {
+        auto bracket = f.type.find('[');
+        std::string base = (bracket == std::string::npos) ? f.type : f.type.substr(0, bracket);
+        int64_t sz = 0;
+        if(base == "float" || base == "int" || base == "uint") sz = 4;
+        else if(base == "vec2" || base == "ivec2" || base == "uvec2") sz = 8;
+        else if(base == "vec3" || base == "ivec3" || base == "uvec3") sz = 16; // std430 pads
+        else if(base == "vec4" || base == "ivec4" || base == "uvec4") sz = 16;
+        else if(base == "mat4") sz = 64;
+        else if(base == "mat3") sz = 48;
+        else sz = 16; // conservative default for unknown types / structs
+        if(bracket != std::string::npos)
+        {
+          // Flexible array (`name[]`) — size comes from SIZE expression.
+          arr_elem_bytes = sz;
+        }
+        else
+        {
+          total += sz;
+        }
+      }
+      int64_t count = 1;
+      if(!aux.size.empty())
+      {
+        try { count = std::max<int64_t>(1, std::stoll(aux.size)); }
+        catch(const std::exception& e) {
+          count = 1024; // TODO: evaluate $USER when we add it
+          qWarning() << "RenderedRawRasterPipelineNode: aux SSBO size"
+                     << aux.size.c_str() << "could not be parsed (" << e.what()
+                     << "); falling back to 1024.";
+        }
+      }
+      else if(arr_elem_bytes > 0)
+      {
+        qWarning() << "RenderedRawRasterPipelineNode: aux SSBO has element size but no count;"
+                      " falling back to 1024.";
+        count = 1024;
+      }
+      return total + arr_elem_bytes * count;
+    };
+
+    // Top-level AUXILIARY textures: allocate one QRhiSampler per sampled
+    // entry (storage-image entries don't need a sampler — imageLoad /
+    // imageStore don't take one), seed with a type-appropriate
+    // placeholder texture. Actual upstream resolution happens in
+    // rebindAuxTextures() every frame.
+    for(const auto& atx : desc.auxiliary_textures)
+    {
+      AuxTextureAuxSampler ats;
+      ats.name = atx.name;
+      ats.is_storage = atx.is_storage;
+      ats.access = atx.access;
+
+      if(!atx.is_storage)
+      {
+        ats.sampler = score::gfx::makeSampler(rhi, atx.sampler);
+        ats.sampler->setName(
+            ("RRP_aux_tex_sampler::" + atx.name).c_str());
+      }
+
+      // Pick placeholder matching the declared shape. Stored separately
+      // so rebindAuxTextures can revert to it when upstream stops
+      // publishing the aux name (otherwise we'd keep the stale upstream
+      // handle around — UAF waiting to happen when the producer releases
+      // the texture).
+      if(atx.is_cubemap)
+        ats.placeholder = &renderer.emptyTextureCube();
+      else if(atx.dimensions == 3)
+        ats.placeholder = &renderer.emptyTexture3D();
+      else if(atx.is_array)
+        ats.placeholder = &renderer.emptyTextureArray();
+      else
+        ats.placeholder = &renderer.emptyTexture();
+      ats.texture = ats.placeholder;
+
+      m_auxTextureSamplers.push_back(std::move(ats));
+    }
+
+    // INPUTS storage_input / uniform_input: these have a matching score
+    // input port created by ISFNode's isf_input_port_vis. We record its
+    // index so update() can re-pull the upstream buffer if it changes
+    // (useful when the upstream node's init() runs after ours and only
+    // publishes its Port::value then).
+    //
+    // walk_descriptor_inputs() advances the cumulative port_counts in
+    // lockstep with isf_input_port_vis (single source of truth — see
+    // ISFVisitors.hpp). For RawRaster the cursor starts at 1 because
+    // port 0 is the mandatory Geometry input.
+    //
+    // Ordering: GLSL emits desc.inputs first then top-level AUXILIARY,
+    // so we push AuxiliarySSBOs in the same order — reversing would
+    // shift every binding index by desc.auxiliary.size() and Vulkan
+    // would reject the pipeline with "VkDescriptorType mismatch".
+    const bool isRawRaster = (desc.mode == isf::descriptor::RawRaster);
+    const port_counts startPC{isRawRaster ? 1 : 0, 0, 0};
+    // INPUTS storage_input / csf_image_input / uniform_input are handled by
+    // IsfBindingsBuilder's m_storage path (allocateStorageResources +
+    // buildExtraBindings) so the SRB binding type matches what
+    // isf_emit_graphics_storage emits in GLSL. See `isf.cpp:4073` for the
+    // GLSL emission and `IsfBindingsBuilder.cpp:417` for the allocation
+    // path. The previous hand-rolled walker here only handled storage_input
+    // and uniform_input, silently skipping csf_image_input — the shader
+    // would emit `image2D NAME at binding=N` while no descriptor was added,
+    // triggering VUID-VkGraphicsPipelineCreateInfo-layout-07990 on bind.
+    //
+    // No-op for INPUTS storage/uniform/csf_image entries — IsfBindingsBuilder
+    // handles them. We still need the walker for indirect_draw storage_input
+    // (special-cased at runtime, no SRB binding).
+    walk_descriptor_inputs(
+        desc, startPC,
+        [&](const isf::input& inp, const port_counts&, const port_counts&) {
+          if(auto* s = ossia::get_if<isf::storage_input>(&inp.data))
+          {
+            if(!s->buffer_usage.empty())
+              return; // indirect_draw handled elsewhere
+          }
+          // INPUTS storage_input / uniform_input / csf_image_input now flow
+          // through m_storage (initialised below). All other variants:
+          // nothing to record here; the canonical walker still advances
+          // port_idx correctly via `delta`.
+        });
+
+    // Now init m_storage from desc.inputs (storage_input + csf_image_input
+    // + uniform_input). Bindings start at 3 + samplers count to align with
+    // the GLSL emission order (samplers first in the binding range, then
+    // INPUTS storage in declaration order via isf_emit_graphics_storage,
+    // then AUXILIARY storage, then AUXILIARY textures, then model UBO).
+    if(m_firstStorageBinding < 0)
+    {
+      const int firstStorageBinding
+          = 3 + (int)m_inputSamplers.size() + (int)m_audioSamplers.size();
+      m_firstStorageBinding = firstStorageBinding;
+      collectGraphicsStorageResources(desc, firstStorageBinding, m_storage);
+    }
+    ensureStorageResources(
+        *renderer.state.rhi, res, renderer, desc, m_storage,
+        renderer.state.renderSize);
+    bindUpstreamBuffers(renderer, n.input, m_storage);
+    // Read-only csf_image_input adopts the matching upstream
+    // auxiliary_texture by name (the storage image an upstream CSF /
+    // RawRaster published into its out_geo). The auto-allocated
+    // placeholder is freed inside the helper. The SRB doesn't exist
+    // yet at init time — patched in update() once the pass is built.
+    // INPUTS storage_input / uniform_input also name-match against the
+    // upstream geometry's auxiliary_buffers list — that's how
+    // ScenePreprocessor publishes scene_lights / world_transforms /
+    // per_draws / scene_materials / scene_counts / scene_light_indices /
+    // camera UBO / env UBO into flattened-scene shaders (classic_pbr et al.).
+    if(geometry.meshes && !geometry.meshes->meshes.empty())
+    {
+      bindUpstreamImagesFromGeometry(m_storage, geometry.meshes->meshes[0]);
+      bindUpstreamBuffersFromGeometry(
+          *renderer.state.rhi, res, m_storage, geometry.meshes->meshes[0]);
+    }
+
+    // Top-level AUXILIARY entries: no corresponding score input port —
+    // resolved by name from the upstream geometry's auxiliary list.
+    // Kind dispatch (is_uniform): SSBO → std430 buffer, UBO → std140
+    // uniform. The AuxiliarySSBO struct already carries an is_uniform
+    // flag that downstream allocation / SRB-build sites dispatch on.
+    // Non-persistent: resolved from the incoming geometry.
+    // Persistent: node owns a ping-pong pair (SSBO only — UBO + persistent
+    // is a no-op per the parser's semantic note; this branch is gated on
+    // !is_uniform).
+    //
+    // Ordering: GLSL emits these AFTER all INPUTS bindings, so we push
+    // them after the INPUTS loop above to keep binding slots aligned
+    // between shader and SRB.
+    for(const auto& aux : desc.auxiliary)
+    {
+      AuxiliarySSBO ssbo;
+      ssbo.name = aux.name;
+      ssbo.access = aux.access;
+      ssbo.persistent = aux.persistent && !aux.is_uniform;
+      ssbo.is_uniform = aux.is_uniform;
+
+      if(ssbo.persistent)
+      {
+        const int64_t sz = std::max<int64_t>(16, aux_owned_size(aux));
+        auto alloc = [&](const char* suffix) -> QRhiBuffer* {
+          auto* b = rhi.newBuffer(
+              QRhiBuffer::Static, QRhiBuffer::StorageBuffer, (quint32)sz);
+          b->setName(QByteArray("RRP_persistent_aux_") + aux.name.c_str() + suffix);
+          b->create();
+          // Zero-initialise so the first frame's readonly _prev reads don't
+          // hit uninitialised memory.
+          std::vector<char> zeros(sz, 0);
+          res.uploadStaticBuffer(b, 0, sz, zeros.data());
+          return b;
+        };
+        ssbo.buffer = alloc("");
+        ssbo.prev_buffer = alloc("_prev");
+        ssbo.size = sz;
+        ssbo.owned = true;
+      }
+      else
+      {
+        try_bind_from_geometry(ssbo);
+      }
+
+      m_auxiliarySSBOs.push_back(std::move(ssbo));
+    }
+  }
+
+  // Determine if we need MRT. MRT is required for anything that
+  // `initMRTPass` knows how to allocate which the non-MRT single-
+  // target path can't express: multiple colour attachments, explicit
+  // depth output, layered / cubemap output, or multiview. Multiview
+  // specifically needs the MRT path because the RT has a different
+  // shape from a swap-chain RT.
+  {
+    const auto& outputs = n.descriptor().outputs;
+    int colorCount = 0;
+    bool hasDepth = false;
+    bool hasLayered = false;
+    bool hasCubemap = false;
+    for(const auto& out : outputs)
+    {
+      if(out.type == "depth")
+        hasDepth = true;
+      else
+        ++colorCount;
+      if(out.layers > 1)
+        hasLayered = true;
+      if(out.is_cubemap)
+        hasCubemap = true;
+    }
+    m_hasMRT = colorCount > 1 || hasDepth || hasLayered || hasCubemap
+               || n.descriptor().multiview_count >= 2;
+  }
+
+  if(m_hasMRT)
+  {
+    // Initialize the blit mesh (default quad)
+    m_blitMesh = &renderer.defaultQuad();
+    if(m_blitMeshbufs.buffers.empty())
+      m_blitMeshbufs = renderer.initMeshBuffer(*m_blitMesh, res);
+  }
+
+  m_initialized = true;
+}
+
+void RenderedRawRasterPipelineNode::addOutputPass(
+    RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res)
+{
+  // Procedural draws (VERTEX_INPUTS: [] + VERTEX_COUNT) have no
+  // upstream geometry; m_mesh stays null and the draw call doesn't
+  // fetch vertex attributes. Don't block MRT setup on the absence
+  // of a mesh in that case.
+  if(!m_mesh && !isProceduralDraw())
+    return;
+
+  if(m_hasMRT)
+  {
+    // Create the shared MRT internal render target on first output edge
+    if(m_mrtRenderTarget.texture == nullptr)
+    {
+      initMRTPass(renderer, res);
+    }
+
+    // Create the blit pass for this single edge
+    initMRTBlitPass(renderer, res, edge);
+  }
+  else
+  {
+    auto rt = renderer.renderTargetForOutput(edge);
+    if(rt.renderTarget)
+    {
+      initPass(rt, renderer, res, edge);
+    }
+  }
+}
+
+void RenderedRawRasterPipelineNode::removeOutputPass(RenderList& renderer, Edge& edge)
+{
+  // Find and erase the pass for this edge
+  auto it = ossia::find_if(m_passes, [&](auto& p) { return p.first == &edge; });
+  if(it != m_passes.end())
+  {
+    it->second.p.release();
+    if(it->second.processUBO)
+      it->second.processUBO->deleteLater();
+    m_passes.erase(it);
+  }
+
+  if(m_hasMRT)
+  {
+    // Release the blit sampler for this edge
+    auto sit = m_blitSamplersByEdge.find(&edge);
+    if(sit != m_blitSamplersByEdge.end())
+    {
+      delete sit->second;
+      m_blitSamplersByEdge.erase(sit);
+    }
+
+    // If no more blit passes remain (only the shared MRT pass with nullptr edge),
+    // release MRT resources
+    bool hasBlitPasses = false;
+    for(auto& [e, pass] : m_passes)
+    {
+      if(e != nullptr)
+      {
+        hasBlitPasses = true;
+        break;
+      }
+    }
+    if(!hasBlitPasses)
+    {
+      // Remove the shared MRT pass
+      auto mrtIt = ossia::find_if(m_passes, [](auto& p) { return p.first == nullptr; });
+      if(mrtIt != m_passes.end())
+      {
+        mrtIt->second.p.release();
+        if(mrtIt->second.processUBO)
+          mrtIt->second.processUBO->deleteLater();
+        m_passes.erase(mrtIt);
+      }
+      m_mrtRenderTarget.release();
+    }
+  }
+}
+
+bool RenderedRawRasterPipelineNode::hasOutputPassForEdge(Edge& edge) const
+{
+  return ossia::find_if(m_passes, [&](const auto& p) { return p.first == &edge; })
+         != m_passes.end();
+}
+
+void RenderedRawRasterPipelineNode::releaseState(RenderList& r)
+{
+  if(!m_initialized)
+    return;
+
+  // Release all remaining passes
+  {
+    for(auto& texture : n.m_audio_textures)
+    {
+      auto it = texture.samplers.find(&r);
+      if(it != texture.samplers.end())
+      {
+        if(auto tex = it->second.texture)
+        {
+          if(tex != &r.emptyTexture())
+            tex->deleteLater();
+        }
+      }
+    }
+
+    for(auto& [edge, pass] : m_passes)
+    {
+      pass.p.release();
+
+      if(pass.processUBO)
+      {
+        pass.processUBO->deleteLater();
+      }
+    }
+
+    m_passes.clear();
+  }
+
+  for(auto sampler : m_inputSamplers)
+  {
+    delete sampler.sampler;
+    // texture is deleted elsewhere
+  }
+  m_inputSamplers.clear();
+  // Override entries are non-owning (registry-owned). Just drop the
+  // pointers — the registry's destroy() will deleteLater the underlying
+  // QRhiSampler.
+  m_inputSamplerOverrides.clear();
+  for(auto sampler : m_audioSamplers)
+  {
+    delete sampler.sampler;
+    // texture is deleted elsewhere
+  }
+  m_audioSamplers.clear();
+  for(auto& [edge, sampler] : m_blitSamplersByEdge)
+  {
+    delete sampler;
+  }
+  m_blitSamplersByEdge.clear();
+
+  delete m_materialUBO;
+  m_materialUBO = nullptr;
+
+  delete m_modelUBO;
+  m_modelUBO = nullptr;
+
+  m_blitMeshbufs = {}; // Freed in RenderList
+
+  for(auto& aux : m_auxiliarySSBOs)
+  {
+    if(aux.owned && aux.buffer)
+      aux.buffer->deleteLater();
+    if(aux.owned && aux.prev_buffer)
+      aux.prev_buffer->deleteLater();
+  }
+  m_auxiliarySSBOs.clear();
+
+  // INPUTS storage trio (storage_input/csf_image_input/uniform_input)
+  // — owned by m_storage; release frees the underlying QRhiBuffer/Texture.
+  m_storage.release();
+  m_firstStorageBinding = -1;
+
+  for(auto& ats : m_auxTextureSamplers)
+  {
+    if(ats.sampler)
+      ats.sampler->deleteLater();
+    // `texture` is either a renderer-owned placeholder or an upstream-
+    // geometry-owned handle — we don't own it here.
+  }
+  m_auxTextureSamplers.clear();
+
+  // Release per-mip / per-cube-face render targets. The underlying
+  // colour texture is owned by m_mrtRenderTarget and freed via its
+  // release() below — we only drop the per-iteration RT wrappers +
+  // per-iteration depth textures that we alloc'd here.
+  for(auto& e : m_mipRTs)
+  {
+    if(e.renderTarget)
+      e.renderTarget->deleteLater();
+    if(e.renderPass)
+      e.renderPass->deleteLater();
+    if(e.depth)
+      e.depth->deleteLater();
+  }
+  m_mipRTs.clear();
+  m_mipCount = 0;
+  m_perMipOutputIndex = -1;
+  m_perCubeFaceOutputIndex = -1;
+
+  // PerLayer state — same shape as the init-time cleanup in update().
+  // Color path is held in m_mipRTs (cleared above); depth path keeps
+  // its scratch + shared RT outside m_mipRTs.
+  if(m_perLayerSharedRT)
+  {
+    m_perLayerSharedRT->deleteLater();
+    m_perLayerSharedRT = nullptr;
+  }
+  if(m_perLayerSharedRP)
+  {
+    m_perLayerSharedRP->deleteLater();
+    m_perLayerSharedRP = nullptr;
+  }
+  if(m_perLayerScratchDepth)
+  {
+    m_perLayerScratchDepth->deleteLater();
+    m_perLayerScratchDepth = nullptr;
+  }
+  if(m_perLayerDummyColor)
+  {
+    m_perLayerDummyColor->deleteLater();
+    m_perLayerDummyColor = nullptr;
+  }
+  m_perLayerOutputDepthArray = nullptr;
+  m_perLayerOutputIndex = -1;
+  m_perLayerIsDepth = false;
+
+  m_executionMode = ExecutionMode::Single;
+
+  // CUBEMAP + MULTIVIEW shim textures. The shadow TextureArray is
+  // slotted into m_mrtRenderTarget's colour attachment slot, so
+  // m_mrtRenderTarget.release() below handles it. The cube, however,
+  // lives outside m_mrtRenderTarget (it's the public output handle)
+  // and must be deleteLater'd here.
+  if(m_cubeCopyCube)
+  {
+    m_cubeCopyCube->deleteLater();
+    m_cubeCopyCube = nullptr;
+  }
+  m_cubeCopyShadowArray = nullptr;  // owned via m_mrtRenderTarget
+  m_cubeCopyOutputIdx = -1;
+
+  // Per-invocation UBO + SRB pool (PerMip / PerCubeFace / Manual).
+  for(auto* ubo : m_perInvocationUBOs)
+    if(ubo) ubo->deleteLater();
+  m_perInvocationUBOs.clear();
+  for(auto* srb : m_perInvocationSRBs)
+    if(srb) srb->deleteLater();
+  m_perInvocationSRBs.clear();
+
+  // Release MRT render target (textures are owned by us)
+  if(m_hasMRT)
+  {
+    m_mrtRenderTarget.release();
+    m_hasMRT = false;
+  }
+
+  m_mesh = nullptr;
+  m_meshbufs = {};
+  m_blitMesh = nullptr;
+
+  m_initialized = false;
+}
+
+void RenderedRawRasterPipelineNode::addInputEdge(
+    RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res)
+{
+  if(edge.sink->type == Types::Image)
+  {
+    // Find upstream texture
+    if(auto it = edge.source->node->renderedNodes.find(&renderer);
+       it != edge.source->node->renderedNodes.end())
+    {
+      if(auto* tex = it->second->textureForOutput(*edge.source))
+      {
+        auto rt = renderer.renderTargetForInputPort(*edge.sink);
+        updateInputTexture(*edge.sink, tex, rt.depthTexture);
+      }
+    }
+  }
+}
+
+void RenderedRawRasterPipelineNode::removeInputEdge(RenderList& renderer, Edge& edge)
+{
+  if(edge.sink->type == Types::Image)
+  {
+    // See SimpleRenderedISFNode::removeInputEdge — same dangling-depth-
+    // sampler issue applies here when DEPTH: true inputs get disconnected.
+    const bool hasDepthCompanion
+        = (edge.sink->flags & Flag::SamplableDepth) == Flag::SamplableDepth;
+    QRhiTexture* depthFallback
+        = hasDepthCompanion ? &renderer.emptyTexture() : nullptr;
+    updateInputTexture(*edge.sink, &renderer.emptyTexture(), depthFallback);
+  }
+}
+
+void RenderedRawRasterPipelineNode::init(
+    RenderList& renderer, QRhiResourceUpdateBatch& res)
+{
+  initState(renderer, res);
+
+  // Procedural shaders (gl_VertexIndex + VERTEX_COUNT) don't need an
+  // upstream geometry cable — still wire their output passes.
+  if(!m_mesh && !isProceduralDraw())
+    return;
+
+  for(auto* out_port : n.output)
+    for(auto* edge : out_port->edges)
+      addOutputPass(renderer, *edge, res);
+}
+
+bool RenderedRawRasterPipelineNode::updateMaterials(
+    RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge)
+{
+  bool mustRecreatePasses = false;
+  // Update audio textures
+  if(!n.m_audio_textures.empty() && !m_audioTex)
+  {
+    m_audioTex.emplace();
+  }
+
+  bool audioChanged = false;
+  std::size_t audio_idx = 0;
+  for(auto& audio : n.m_audio_textures)
+  {
+    if(std::optional<Sampler> sampl
+       = m_audioTex->updateAudioTexture(audio, renderer, n.m_material_data.get(), res))
+    {
+      // Texture changed -> material changed
+      audioChanged = true;
+
+      auto& [rhiSampler, tex, fb_] = *sampl;
+      // Keep m_audioSamplers[i].texture in sync with the live GPU texture so
+      // any later pipeline rebuild (rt_changed path in RenderList::render
+      // calling removeOutputPass + addOutputPass) uses the live binding
+      // instead of the placeholder empty texture.
+      if(audio_idx < m_audioSamplers.size())
+        m_audioSamplers[audio_idx].texture = tex;
+
+      for(auto& [e, pass] : m_passes)
+      {
+        score::gfx::replaceTexture(
+            *pass.p.srb, rhiSampler, tex ? tex : &renderer.emptyTexture());
+      }
+    }
+    ++audio_idx;
+  }
+
+  // Update material
+  if(m_materialUBO && m_materialSize > 0 && (materialChanged || audioChanged))
+  {
+    char* data = n.m_material_data.get();
+    SCORE_ASSERT(m_materialSize >= size_of_pipeline_material);
+    if(std::memcmp(data, this->m_prevPipelineChangingMaterial, size_of_pipeline_material)
+       != 0)
+    {
+      mustRecreatePasses = true;
+      std::copy_n(data, size_of_pipeline_material, this->m_prevPipelineChangingMaterial);
+    }
+    res.updateDynamicBuffer(m_materialUBO, 0, m_materialSize, data);
+  }
+  materialChanged = false;
+  return mustRecreatePasses;
+}
+
+void RenderedRawRasterPipelineNode::update(
+    RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge)
+{
+  // Update node materials. This must be before any initial return,
+  // otherwise we miss the materialsChanged
+  bool mustRecreatePasses = updateMaterials(renderer, res, edge);
+  bool recreateDueToMaterial = mustRecreatePasses;
+
+  // Refresh upstream-bound storage_input / uniform_input buffers from input
+  // ports. The first pass will pick them up via the SRB; subsequent passes
+  // need bindUpstreamBuffers to patch their SRBs in-place — handled per-pass
+  // when m_passes is iterated for SRB updates further down. (Safe to call
+  // even with no SRB; the helper just refreshes the m_storage entries.)
+  bindUpstreamBuffers(renderer, n.input, m_storage);
+  // Same pattern for read-only csf_image_input: adopt the matching upstream
+  // auxiliary_texture (a storage image written by an upstream CSF /
+  // RawRaster). Called per-frame so a producer that switches its underlying
+  // QRhiTexture on resize / rebuild flows through. The helper is
+  // idempotent on the swap and unconditionally patches each SRB it's
+  // given — so calling it once per pass refreshes every SRB while only
+  // doing the actual upstream lookup + swap on the first iteration.
+  if(geometry.meshes && !geometry.meshes->meshes.empty())
+  {
+    // Per-pass refresh of name-matched-from-geometry bindings (SSBO/UBO/
+    // storage_image). bindUpstream*FromGeometry are idempotent on the
+    // swap and unconditionally patch each SRB they're given — so calling
+    // each once per pass refreshes every SRB while doing the actual
+    // upstream lookup + swap only on the first iteration that observed
+    // a change.
+    for(auto& [edge, pass] : m_passes)
+    {
+      if(pass.p.srb)
+      {
+        bindUpstreamImagesFromGeometry(
+            m_storage, geometry.meshes->meshes[0], pass.p.srb);
+        bindUpstreamBuffersFromGeometry(
+            *renderer.state.rhi, res, m_storage,
+            geometry.meshes->meshes[0], pass.p.srb);
+      }
+    }
+  }
+
+  // Update the geometry (sync with ModelDisplayNode)
+
+  if(this->geometryChanged)
+  {
+    if(geometry.meshes)
+    {
+      const Mesh* prevMesh = m_mesh;
+      std::tie(m_mesh, m_meshbufs)
+          = renderer.acquireMesh(geometry, res, m_mesh, m_meshbufs);
+      m_meshbufs.gpuIndirectSupported = renderer.state.caps.drawIndirect;
+
+      this->meshChangedIndex = this->m_mesh->dirtyGeometryIndex;
+
+#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0)
+      // Check for standalone indirect draw buffer from Buffer input ports
+      if(!m_meshbufs.useIndirectDraw)
+      {
+        for(auto* port : n.input)
+        {
+          if(port->type == Types::Buffer && !port->edges.empty())
+          {
+            auto bv = renderer.bufferForInput(*port->edges.front());
+            if(bv.usage == BufferView::Usage::IndirectDraw)
+            {
+              m_meshbufs.indirectDrawBuffer = bv.handle;
+              m_meshbufs.useIndirectDraw = true;
+              m_meshbufs.indirectDrawIndexed = false;
               break;
             }
             else if(bv.usage == BufferView::Usage::IndirectDrawIndexed)
@@ -443,6 +2472,14 @@ void RenderedRawRasterPipelineNode::update(
     }
     this->geometryChanged = false;
 
+    // Re-resolve image-input samplers against the geometry's aux
+    // textures. Growing a channel's texture array on ScenePreprocessor
+    // republishes the geometry with a new QRhiTexture*; picking that up
+    // here keeps the SRB bound to the live array instead of the deleted
+    // one. A sampler change forces pass recreation so the SRB rebinds.
+    if(rebindAuxTextures())
+      mustRecreatePasses = true;
+
     // Re-match auxiliary SSBOs from updated geometry
     if(geometry.meshes && !geometry.meshes->meshes.empty())
     {
@@ -499,17 +2536,57 @@ void RenderedRawRasterPipelineNode::update(
     }
   }
 
+  // Per-frame: re-pull upstream buffers wired through Buffer input ports
+  // (camera UBO, ExtractBuffer2 SSBOs, ...). Cheap: one virtual call per
+  // aux that has an input port index. Runs every frame because we cannot
+  // guarantee the upstream publisher's init() ran before ours — its
+  // bufferForOutput() may only return a non-null handle a frame later.
+  for(auto& aux : m_auxiliarySSBOs)
+  {
+    if(aux.input_port_index < 0
+       || aux.input_port_index >= (int)n.input.size())
+      continue;
+    Port* port = n.input[aux.input_port_index];
+    if(!port || port->type != Types::Buffer)
+      continue;
+
+    QRhiBuffer* upstream = nullptr;
+    for(Edge* edge : port->edges)
+    {
+      if(!edge || !edge->source)
+        continue;
+      if(edge->source->type != Types::Buffer)
+        continue;
+      if(auto view = renderer.bufferForInput(*edge); view.handle)
+      {
+        upstream = view.handle;
+        break;
+      }
+    }
+    if(!upstream || upstream == aux.buffer)
+      continue;
+
+    // Drop any placeholder / previously-owned buffer and adopt upstream.
+    if(aux.owned && aux.buffer)
+      aux.buffer->deleteLater();
+    aux.buffer = upstream;
+    aux.size = upstream->size();
+    aux.owned = false;
+    mustRecreatePasses = true;
+  }
+
   bool recreateDueToGeometry = mustRecreatePasses && !recreateDueToMaterial;
 
-  if(!m_mesh)
+  const bool procedural = isProceduralDraw();
+  if(!m_mesh && !procedural)
   {
-    qDebug() << "RawRaster::update: no mesh!";
     return;
   }
 
   // FIXME is that neeeded?
   // FIXME also not handling geometry_filter dirty geom so far
-  bool meshDirty = m_mesh->hasGeometryChanged(meshChangedIndex);
+  // Procedural draws never have a mesh — skip the dirty check.
+  bool meshDirty = m_mesh && m_mesh->hasGeometryChanged(meshChangedIndex);
   if(meshDirty)
   {
     mustRecreatePasses = true;
@@ -517,118 +2594,756 @@ void RenderedRawRasterPipelineNode::update(
 
   if(mustRecreatePasses)
   {
-    qWarning() << "RRP: recreating passes:"
-               << "material=" << recreateDueToMaterial
-               << "geometryChanged=" << recreateDueToGeometry
-               << "meshDirty=" << meshDirty;
     for(auto& pass : m_passes)
     {
       pass.second.p.release();
-      delete pass.second.processUBO;
+      if(pass.second.processUBO)
+        pass.second.processUBO->deleteLater();
     }
     m_passes.clear();
 
-    for(Edge* edge : n.output[0]->edges)
+    for(auto& [e, sampler] : m_blitSamplersByEdge)
+      sampler->deleteLater();
+    m_blitSamplersByEdge.clear();
+
+    if(m_hasMRT)
+    {
+      // Release and recreate the internal MRT render target
+      m_mrtRenderTarget.release();
+      initMRTPass(renderer, res);
+      initMRTBlitPasses(renderer, res);
+    }
+    else
+    {
+      for(Edge* edge : n.output[0]->edges)
+      {
+        auto rt = renderer.renderTargetForOutput(*edge);
+        if(rt.renderTarget)
+        {
+          initPass(rt, renderer, res, *edge);
+        }
+      }
+    }
+
+    // After pass recreation, the freshly built SRBs reference the
+    // CURRENT m_storage entries. For storage_input/uniform_input that
+    // are name-matched against the upstream geometry's auxiliary_buffers
+    // (the ScenePreprocessor publishing pattern: scene_lights /
+    // world_transforms / per_draws / scene_materials / scene_counts /
+    // scene_light_indices / camera UBO / env UBO), m_storage entries
+    // may still hold the 16-byte zero placeholder ensureStorageResources
+    // allocated for owned SSBOs — the per-pass refresh loop below
+    // (lines ~2640+) is gated on m_passes non-empty. On a fresh
+    // RenderList (resize / graph rebuild) the very first frame's
+    // initState ran with m_passes empty, init early-returned without
+    // building m_passes, then the per-pass refresh below was a no-op,
+    // and now mustRecreatePasses just built passes against the
+    // placeholder. Re-fire bindUpstream*FromGeometry on the freshly
+    // built SRBs so they pick up the live geometry buffers / textures
+    // immediately. Without this, classic_pbr's scene_counts.light_count
+    // reads as 0 on the resize frame → light loop runs 0 times → no
+    // specular until the next frame patches the SRB.
+    if(geometry.meshes && !geometry.meshes->meshes.empty())
     {
-      auto rt = renderer.renderTargetForOutput(*edge);
-      if(rt.renderTarget)
+      for(auto& [edge, pass] : m_passes)
+      {
+        if(pass.p.srb)
+        {
+          bindUpstreamImagesFromGeometry(
+              m_storage, geometry.meshes->meshes[0], pass.p.srb);
+          bindUpstreamBuffersFromGeometry(
+              *renderer.state.rhi, res, m_storage,
+              geometry.meshes->meshes[0], pass.p.srb);
+        }
+      }
+
+      // Sampler refresh: FIX-C above only patches m_storage entries
+      // (csf_image_input / storage_input / uniform_input). Plain
+      // image_input INPUTS (sampler2DArray, sampler2D, sampler3D, etc.)
+      // live in m_inputSamplers and are refreshed only by
+      // rebindAuxTextures Path A — gated on `geometryChanged` and run
+      // ONCE earlier in update() (line ~2462). If
+      // `geometry.meshes` was null at THAT moment (or if a sibling
+      // renderer republishes a fresh mesh_list AFTER that call) the
+      // sampler binding stays at its empty-texture placeholder OR a
+      // stale (deleteLater'd) upstream pointer.
+      //
+      // For the textured-PBR pipelines this manifests as:
+      // baseColorArray sampler reads garbage / NaN → BRDF math
+      // collapses → specular vanishes (ambient + base color factor +
+      // emissive remain). Untextured classic_pbr has zero image_input
+      // INPUTS so its m_inputSamplers is empty and the bug can't
+      // trigger — exactly the user-reported asymmetry.
+      //
+      // Re-run rebindAuxTextures here (idempotent: short-circuits when
+      // the slot's cached texture pointer matches the upstream's
+      // current pointer). When it returns true, hot-patch the existing
+      // SRBs in place via replaceTexture rather than going through
+      // another full mustRecreatePasses cycle — the pipeline layout
+      // is unchanged, only the texture pointer needs swapping.
+      if(rebindAuxTextures())
       {
-        initPass(rt, renderer, *edge);
+        // Match key for replaceTexture MUST be the sampler that's
+        // actually in the SRB binding. allSamplers() (line ~155-170)
+        // substitutes m_inputSamplerOverrides[i] for m_inputSamplers[i]
+        // when ScenePreprocessor publishes a per-bucket sampler_handle
+        // (e.g. baseColorArray gets the bucket's QRhiSampler so each
+        // glTF/FBX material's wrap/filter survives). replaceTexture
+        // matches by sampler-pointer (Utils.cpp:435); using the
+        // ORIGINAL m_inputSamplers[i].sampler as the key when the SRB
+        // has the OVERRIDE silently no-ops — so the texture refresh
+        // never lands on textured-PBR pipelines that go through
+        // ScenePreprocessor's per-bucket sampler overrides. That was
+        // the residual lighting glitch on resize.
+        const auto srb_key = [&](std::size_t i) -> QRhiSampler* {
+          if(i < m_inputSamplerOverrides.size() && m_inputSamplerOverrides[i])
+            return m_inputSamplerOverrides[i];
+          return m_inputSamplers[i].sampler;
+        };
+        for(auto& [edge, pass] : m_passes)
+        {
+          if(!pass.p.srb)
+            continue;
+          for(std::size_t i = 0; i < m_inputSamplers.size(); ++i)
+          {
+            auto& s = m_inputSamplers[i];
+            if(s.texture && s.sampler)
+              score::gfx::replaceTexture(
+                  *pass.p.srb, srb_key(i), s.texture);
+          }
+        }
+        for(auto* invSrb : m_perInvocationSRBs)
+        {
+          if(!invSrb)
+            continue;
+          for(std::size_t i = 0; i < m_inputSamplers.size(); ++i)
+          {
+            auto& s = m_inputSamplers[i];
+            if(s.texture && s.sampler)
+              score::gfx::replaceTexture(
+                  *invSrb, srb_key(i), s.texture);
+          }
+        }
       }
     }
   }
 
+  m_mrtRenderedThisFrame = false;
+
   n.standardUBO.passIndex = 0;
   n.standardUBO.frameIndex++;
   auto sz = renderer.renderSize(edge);
   n.standardUBO.renderSize[0] = sz.width();
   n.standardUBO.renderSize[1] = sz.height();
 
-  // Update all the process UBOs
+  // Update all the process UBOs (blit passes have nullptr processUBO)
   for(auto& [e, pass] : m_passes)
   {
+    if(!pass.processUBO)
+      continue;
     res.updateDynamicBuffer(
         pass.processUBO, 0, sizeof(ProcessUBO), &this->n.standardUBO);
   }
 
   res.updateDynamicBuffer(m_modelUBO, 0, sizeof(float[16]), m_modelTransform.matrix);
+
+  // Reset event ports now that the material UBO has captured their pulse
+  // value via updateMaterials() above. If anything fired, set the shared
+  // materialChanged flag so next frame's updateMaterials() uploads the
+  // now-zero CPU memory instead of being gated out as unchanged.
+  if(n.resetEventPortsAfterFrame())
+    this->materialChanged = true;
+
+  // Persistent AUXILIARY ping-pong: swap buffer/prev_buffer pointers, then
+  // patch every pipeline's SRB so binding slots reference the post-swap
+  // buffers. Done at the end of update() so the pass that renders this
+  // frame already reads the previous frame's writes via `<name>_prev`.
+  bool anyPersistentSwap = false;
+  for(auto& aux : m_auxiliarySSBOs)
+  {
+    if(!aux.persistent || !aux.prev_buffer || n.standardUBO.frameIndex < 2u)
+      continue;
+    std::swap(aux.buffer, aux.prev_buffer);
+    anyPersistentSwap = true;
+  }
+  if(anyPersistentSwap)
+  {
+    for(auto& [e, pass] : m_passes)
+    {
+      if(!pass.p.srb)
+        continue;
+      for(const auto& aux : m_auxiliarySSBOs)
+      {
+        if(!aux.persistent || aux.binding < 0 || aux.prev_binding < 0)
+          continue;
+        score::gfx::replaceBuffer(*pass.p.srb, aux.prev_binding, aux.prev_buffer);
+        score::gfx::replaceBuffer(*pass.p.srb, aux.binding, aux.buffer);
+      }
+      // No trailing create() — replaceBuffer's updateResources() fast
+      // path already refreshes the backend descriptor state.
+    }
+    // Per-invocation SRB pool (PerMip / PerCubeFace / Manual EXECUTION_MODELs)
+    // shares the same persistent aux bindings as pass.p.srb. Without this
+    // loop, invocation 0 reads post-swap data while invocations 1..N-1 read
+    // the pre-swap (now `prev_buffer`-backed) buffers.
+    for(auto* invSrb : m_perInvocationSRBs)
+    {
+      if(!invSrb)
+        continue;
+      for(const auto& aux : m_auxiliarySSBOs)
+      {
+        if(!aux.persistent || aux.binding < 0 || aux.prev_binding < 0)
+          continue;
+        score::gfx::replaceBuffer(*invSrb, aux.prev_binding, aux.prev_buffer);
+        score::gfx::replaceBuffer(*invSrb, aux.binding, aux.buffer);
+      }
+    }
+  }
 }
 
 void RenderedRawRasterPipelineNode::release(RenderList& r)
 {
-  // customRelease
+  releaseState(r);
+}
+
+void RenderedRawRasterPipelineNode::bindAuxTexturesInit(RenderList& /*renderer*/)
+{
+  m_auxTextureBindings.clear();
+  const auto& desc = n.descriptor();
+
+  // initInputSamplers walks n.input[] and pushes samplers for each
+  // Types::Image port: 1 sampler, plus an extra "depth sampler" when the
+  // port has SamplableDepth (set for image_input.depth=true on a
+  // non-GrabsFromSource input). walk_descriptor_inputs gives us the
+  // canonical sampler delta per input (see isf_input_port_count_vis),
+  // so each image-like INPUT lands on its matching sampler slot.
+  walk_descriptor_inputs(
+      desc, [&](const isf::input& inp, const port_counts& cur, const port_counts& delta) {
+        if(delta.samplers > 0)
+          m_auxTextureBindings.push_back({cur.samplers, inp.name});
+      });
+
+  // Seed initial texture pointers from whatever geometry was already
+  // published at init() time (typically none — the real lookup happens
+  // on the first update()'s geometryChanged branch).
+  rebindAuxTextures();
+}
+
+bool RenderedRawRasterPipelineNode::rebindAuxTextures()
+{
+  bool changed = false;
+  if(!geometry.meshes || geometry.meshes->meshes.empty())
+    return changed;
+  const auto& mesh = geometry.meshes->meshes[0];
+
+  // Path A: texture *overrides* on input-port-backed samplers (legacy
+  // pattern: an INPUTS image whose name matches a geometry aux texture
+  // gets its sampler's texture pointer swapped). When the geometry
+  // also publishes a sampler_handle, swap that too — that's how
+  // ScenePreprocessor's per-bucket samplers (per-glTF wrap/filter)
+  // override the shader's static INPUTS sampler config.
+  for(const auto& b : m_auxTextureBindings)
   {
-    for(auto& texture : n.m_audio_textures)
+    if(b.sampler_idx < 0 || b.sampler_idx >= (int)m_inputSamplers.size())
+      continue;
+    const auto* aux = mesh.find_auxiliary_texture(b.name);
+    if(!aux)
+      continue;
+    auto* tex = static_cast<QRhiTexture*>(aux->native_handle);
+    if(!tex)
+      continue;
+    auto& slot = m_inputSamplers[b.sampler_idx];
+    if(slot.texture != tex)
     {
-      auto it = texture.samplers.find(&r);
-      if(it != texture.samplers.end())
+      slot.texture = tex;
+      changed = true;
+    }
+    // Sampler override is non-owning — the bucket (in GpuResourceRegistry)
+    // owns the QRhiSampler. Stored in the parallel m_inputSamplerOverrides
+    // vector so the original initInputSamplers-owned sampler stays in
+    // m_inputSamplers and `delete sampler.sampler` in release() doesn't
+    // free the registry's sampler. allSamplers() applies the override
+    // when building the SRB.
+    if((int)m_inputSamplerOverrides.size() <= b.sampler_idx)
+      m_inputSamplerOverrides.resize(b.sampler_idx + 1, nullptr);
+    auto* smp = aux->sampler_handle
+                    ? static_cast<QRhiSampler*>(aux->sampler_handle)
+                    : nullptr;
+    if(m_inputSamplerOverrides[b.sampler_idx] != smp)
+    {
+      m_inputSamplerOverrides[b.sampler_idx] = smp;
+      changed = true;
+    }
+  }
+
+  // Path B: top-level AUXILIARY textures (no input port). Resolve each
+  // entry against the geometry's auxiliary_textures by name; fall back
+  // to the shape-matched placeholder when nothing matches so we never
+  // keep a stale upstream handle (protects against UAFs when a producer
+  // disconnects or frees its texture).
+  bool auxTexChanged = false;
+  for(auto& ats : m_auxTextureSamplers)
+  {
+    const auto* aux = mesh.find_auxiliary_texture(ats.name);
+    auto* tex = aux ? static_cast<QRhiTexture*>(aux->native_handle) : nullptr;
+    if(!tex)
+      tex = ats.placeholder; // revert to empty of the right kind
+    if(!tex || tex == ats.texture)
+      continue;
+    ats.texture = tex;
+    auxTexChanged = true;
+  }
+  if(auxTexChanged)
+  {
+    // Batched SRB rebuild: one destroy+setBindings+create per pass,
+    // regardless of how many aux texture handles changed this frame.
+    // The per-texture `replaceTexture(srb, binding, tex)` overload each
+    // does its own destroy/setBindings/create, so looping it N times
+    // would trigger N full SRB rebuilds per pass per frame whenever
+    // textures change. Using the vector overload lets us batch into a
+    // single rebuild cycle.
+    auto rebuildSrb = [&](QRhiShaderResourceBindings* srb) {
+      if(!srb)
+        return;
+      std::vector<QRhiShaderResourceBinding> tmp;
+      tmp.assign(srb->cbeginBindings(), srb->cendBindings());
+      for(const auto& ats : m_auxTextureSamplers)
       {
-        if(auto tex = it->second.texture)
-        {
-          if(tex != &r.emptyTexture())
-            tex->deleteLater();
-        }
+        if(ats.binding < 0 || !ats.texture)
+          continue;
+        score::gfx::replaceTexture(tmp, ats.binding, ats.texture);
       }
-    }
+      srb->destroy();
+      srb->setBindings(tmp.begin(), tmp.end());
+      srb->create();
+    };
+    for(auto& [e, pass] : m_passes)
+      rebuildSrb(pass.p.srb);
+    // Per-invocation SRB pool (PerMip / PerCubeFace / Manual
+    // EXECUTION_MODELs) — clones of pass.p.srb taken at construction
+    // (see initPass / initMRTPass per-invocation push). Without this
+    // mirror, invocation 0 (which renders through pass.p.srb) sees the
+    // refreshed aux texture while invocations 1..N-1 keep sampling the
+    // stale handle indefinitely. Same shape as the SSBO ping-pong fix
+    // for m_perInvocationSRBs above (line ~2649) — symmetric, the bug
+    // here was that the SSBO fix didn't propagate to aux-texture
+    // rebinds.
+    for(auto* invSrb : m_perInvocationSRBs)
+      rebuildSrb(invSrb);
+    changed = true;
+  }
 
-    for(auto& [edge, pass] : m_passes)
-    {
-      pass.p.release();
+  return changed;
+}
 
-      if(pass.processUBO)
+void RenderedRawRasterPipelineNode::runInitialPasses(
+    RenderList& renderer, QRhiCommandBuffer& cb, QRhiResourceUpdateBatch*& updateBatch,
+    Edge& edge)
+{
+  // MDI readback fallback: when the backend doesn't support drawIndirect,
+  // synchronously read back the GPU indirect buffer so the CPU draw loop
+  // has the commands ready for this frame's draw call.
+  //
+  // This MUST re-run every frame: the indirect buffer is GPU-generated (e.g.
+  // by a GPU culling compute pass) and changes frame to frame. Gating on
+  // cpuDrawCommands.empty() would freeze the draw list permanently after the
+  // first readback, so GPU culling output would diverge forever. We re-derive
+  // cpuDrawCommands from the latest indirect buffer contents each frame.
+  //
+  // Guard behind ReadBackNonUniformBuffer: this is exactly the feature missing
+  // on OpenGL ES 2.0 (GLES 3.x and desktop backends have it). Without it the
+  // readBackBuffer call would fail silently / assert, so we degrade gracefully
+  // (the draw falls back to whatever cpuDrawCommands already holds, or a single
+  // drawIndexed) and warn once.
+  if(m_meshbufs.useIndirectDraw
+     && !m_meshbufs.gpuIndirectSupported
+     && m_meshbufs.indirectDrawBuffer
+     && m_meshbufs.indirectDrawBuffer->size() > 0
+     && renderer.state.rhi->isFeatureSupported(QRhi::ReadBackNonUniformBuffer))
+  {
+    QRhi& rhi = *renderer.state.rhi;
+    auto* rb = rhi.nextResourceUpdateBatch();
+    const quint32 bufSize = m_meshbufs.indirectDrawBuffer->size();
+    m_meshbufs.readbackResult.completed = [this, bufSize]() {
+      const auto& data = m_meshbufs.readbackResult.data;
+      constexpr int cmdSize = 5 * sizeof(uint32_t);
+      const int cmdCount = data.size() / cmdSize;
+      m_meshbufs.cpuDrawCommands.clear();
+      m_meshbufs.cpuDrawCommands.reserve(cmdCount);
+      const auto* raw = reinterpret_cast<const uint32_t*>(data.constData());
+      for(int c = 0; c < cmdCount; ++c)
       {
-        pass.processUBO->deleteLater();
+        const uint32_t* p = raw + c * 5;
+        m_meshbufs.cpuDrawCommands.push_back({
+            .index_or_vertex_count = p[0],
+            .instance_count = p[1],
+            .first_index_or_vertex = p[2],
+            .base_vertex = static_cast<int32_t>(p[3]),
+            .first_instance = p[4]});
       }
+    };
+    rb->readBackBuffer(m_meshbufs.indirectDrawBuffer, 0, bufSize, &m_meshbufs.readbackResult);
+    cb.resourceUpdate(rb);
+    rhi.finish();
+  }
+  else if(
+      m_meshbufs.useIndirectDraw && !m_meshbufs.gpuIndirectSupported
+      && m_meshbufs.indirectDrawBuffer && m_meshbufs.indirectDrawBuffer->size() > 0
+      && !renderer.state.rhi->isFeatureSupported(QRhi::ReadBackNonUniformBuffer))
+  {
+    // Graceful degradation: the backend (e.g. OpenGL ES 2.0) can neither
+    // draw indirect nor read back the GPU-generated indirect buffer. The draw
+    // loop falls back to cpuDrawCommands (if a producer ever filled them) or a
+    // single drawIndexed. Warn once so the missing GPU-culled commands are
+    // diagnosable rather than a silent visual divergence.
+    static bool warned = false;
+    if(!warned)
+    {
+      warned = true;
+      qWarning() << "RenderedRawRasterPipelineNode: GPU-generated indirect draws "
+                    "require QRhi::ReadBackNonUniformBuffer, unsupported on this "
+                    "backend (e.g. OpenGL ES 2.0) — falling back to CPU draw "
+                    "commands; GPU culling output will not be reflected.";
     }
-
-    m_passes.clear();
   }
 
-  for(auto sampler : m_inputSamplers)
+  if(!m_hasMRT || m_passes.empty())
+    return;
+  // Procedural draws don't require a mesh/vertex buffers — the draw
+  // call uses gl_VertexIndex with no vertex bindings. Block only on
+  // the non-procedural path.
+  if(!isProceduralDraw() && (!m_mesh || m_meshbufs.buffers.empty()))
+    return;
+
+  // Only render once per frame even if multiple downstream nodes trigger us
+  if(m_mrtRenderedThisFrame)
+    return;
+  m_mrtRenderedThisFrame = true;
+
+  // MRT: render into our internal multi-attachment render target
+  auto& pass = m_passes[0].second;
+
+  SCORE_ASSERT(pass.renderTarget.renderTarget);
+  SCORE_ASSERT(pass.p.pipeline);
+  SCORE_ASSERT(pass.p.srb);
+
+  // Invocation-count resolution. Single → 1, PerMip / PerCubeFace →
+  // m_mipCount (reused to store either mip count or face count = 6),
+  // Manual → evaluate the COUNT expression (falls back to 1 when the
+  // expression is empty / unparseable). Runs every frame for Manual so
+  // the count can track live input values; cached for PerMip /
+  // PerCubeFace since the target shape is fixed at init.
+  int invocationCount = 1;
+  if(m_executionMode == ExecutionMode::PerMip
+     || m_executionMode == ExecutionMode::PerCubeFace
+     || m_executionMode == ExecutionMode::PerLayer)
   {
-    delete sampler.sampler;
-    // texture isdeleted elsewxheree
+    invocationCount = std::max(1, m_mipCount);
   }
-  m_inputSamplers.clear();
-  for(auto sampler : m_audioSamplers)
+  else if(m_executionMode == ExecutionMode::Manual)
   {
-    delete sampler.sampler;
-    // texture isdeleted elsewxheree
+    m_manualCount = resolveManualInvocationCount();
+    invocationCount = std::max(1, m_manualCount);
   }
-  m_audioSamplers.clear();
 
-  delete m_materialUBO;
-  m_materialUBO = nullptr;
+  auto* mainTex = pass.renderTarget.texture;
+  // Depth-only shaders have no colour attachment so mainTex is null;
+  // fall back to the depth attachment for the render-target size, then
+  // to the renderer's render-size as a last resort. PER_LAYER+depth
+  // specifically declares WIDTH/HEIGHT on its depth output (e.g.
+  // 2048×2048 for shadow maps) and we want the viewport to honour that
+  // rather than the window size.
+  QRhiTexture* sizeTex = mainTex
+                             ? mainTex
+                             : pass.renderTarget.depthTexture;
+  const QSize baseSize
+      = sizeTex ? sizeTex->pixelSize() : renderer.state.renderSize;
 
-  delete m_modelUBO;
-  m_modelUBO = nullptr;
+  QRhi& rhi = *renderer.state.rhi;
 
-  // Note: release() doesn't have access to the RenderList, so we use deleteLater.
-  // These buffers are only used in the SRB which is already released above.
-  for(auto& aux : m_auxiliarySSBOs)
+  // Grow the per-invocation UBO+SRB pool if invocationCount exceeds
+  // what we've already allocated. Each extra UBO gets its own dynamic
+  // slot (no inter-invocation aliasing of the underlying buffer — the
+  // QRhi Dynamic-UBO single-slot constraint is what made PASSINDEX
+  // collapse to the last-written value before this). SRB i clones the
+  // main SRB with the process-UBO binding swapped to UBO i.
+  const int needed_extra = std::max(0, invocationCount - 1);
+  while((int)m_perInvocationUBOs.size() < needed_extra)
   {
-    if(aux.owned && aux.buffer)
-      aux.buffer->deleteLater();
+    const int k = (int)m_perInvocationUBOs.size() + 1;
+
+    auto* ubo = rhi.newBuffer(
+        QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, sizeof(ProcessUBO));
+    ubo->setName(
+        ("RRPNode::MRT::perInvocationUBO::" + std::to_string(k)).c_str());
+    ubo->create();
+    m_perInvocationUBOs.push_back(ubo);
+
+    // Clone the main SRB's bindings, swap binding=1 (the process UBO
+    // per ISF convention — see isf.cpp's emitted `layout(std140,
+    // binding = 1) uniform process_t`) to point at our new buffer.
+    // The main pass's SRB is the layout-defining parent; new SRBs are
+    // structurally identical and therefore compatible with the main
+    // pipeline.
+    std::vector<QRhiShaderResourceBinding> tmp;
+    if(pass.p.srb)
+      tmp.assign(pass.p.srb->cbeginBindings(), pass.p.srb->cendBindings());
+    for(auto& b : tmp)
+    {
+      auto* d = reinterpret_cast<QRhiShaderResourceBinding::Data*>(&b);
+      if(d->type == QRhiShaderResourceBinding::Type::UniformBuffer
+         && d->binding == 1)
+      {
+        d->u.ubuf.buf = ubo;
+      }
+    }
+    auto* srb = rhi.newShaderResourceBindings();
+    srb->setName(
+        ("RRPNode::MRT::perInvocationSRB::" + std::to_string(k)).c_str());
+    srb->setBindings(tmp.begin(), tmp.end());
+    srb->create();
+    m_perInvocationSRBs.push_back(srb);
   }
-  m_auxiliarySSBOs.clear();
-}
+  for(int i = 0; i < invocationCount; ++i)
+  {
+    // Stamp the per-invocation index into ProcessUBO. For PerMip this
+    // doubles as the mip level; for Manual it's the 0-based loop index.
+    // Each invocation writes to ITS OWN UBO (one allocated per slot
+    // above) so Dynamic-UBO single-slot-per-frame doesn't collapse
+    // every draw to the last-uploaded value.
+    QRhiBuffer* invUBO
+        = (i == 0) ? pass.processUBO : m_perInvocationUBOs[i - 1];
+    QRhiShaderResourceBindings* invSRB
+        = (i == 0) ? pass.p.srb : m_perInvocationSRBs[i - 1];
+
+    auto* invBatch = (i == 0 && updateBatch)
+                         ? updateBatch
+                         : rhi.nextResourceUpdateBatch();
+    this->n.standardUBO.passIndex = i;
+    invBatch->updateDynamicBuffer(
+        invUBO, 0, sizeof(ProcessUBO), &this->n.standardUBO);
+    if(i == 0)
+      updateBatch = nullptr;
+
+    QRhiTextureRenderTarget* rtForPass
+        = dynamic_cast<QRhiTextureRenderTarget*>(pass.renderTarget.renderTarget);
+    QSize viewportSize = baseSize;
+    if(m_executionMode == ExecutionMode::PerMip
+       && i < (int)m_mipRTs.size() && m_mipRTs[i].renderTarget)
+    {
+      rtForPass = m_mipRTs[i].renderTarget;
+      viewportSize = QSize(
+          std::max(1, baseSize.width() >> i),
+          std::max(1, baseSize.height() >> i));
+    }
+    else if(m_executionMode == ExecutionMode::PerCubeFace
+            && i < (int)m_mipRTs.size() && m_mipRTs[i].renderTarget)
+    {
+      // Per-face cubemap RT. Face size = base (no per-face mipping in
+      // this first cut); viewport stays at baseSize.
+      rtForPass = m_mipRTs[i].renderTarget;
+    }
+    else if(m_executionMode == ExecutionMode::PerLayer)
+    {
+      // Color path: one RT per layer (stored in m_mipRTs, same shape as
+      // PerCubeFace). Depth path: a single shared RT bound to the
+      // scratch depth — we copy into the OUTPUT array layer-i after
+      // endPass below, so the same RT is reused across iterations.
+      if(m_perLayerIsDepth && m_perLayerSharedRT)
+      {
+        rtForPass = m_perLayerSharedRT;
+      }
+      else if(!m_perLayerIsDepth && i < (int)m_mipRTs.size()
+              && m_mipRTs[i].renderTarget)
+      {
+        rtForPass = m_mipRTs[i].renderTarget;
+      }
+    }
 
-void RenderedRawRasterPipelineNode::runInitialPasses(
-    RenderList& renderer, QRhiCommandBuffer& cb, QRhiResourceUpdateBatch*& updateBatch,
-    Edge& edge)
-{
+    cb.beginPass(rtForPass, Qt::transparent, {0.0f, 0}, invBatch);
+
+    cb.setGraphicsPipeline(pass.p.pipeline);
+    cb.setViewport(
+        QRhiViewport(0, 0, viewportSize.width(), viewportSize.height()));
+
+    // drawWithPerMeshAuxRebind sets shader resources and issues the
+    // draw call (or the per-sub-mesh loop for multi-mesh inputs).
+    // Pass the per-invocation SRB so each draw reads its own UBO.
+    // Forward the pass's fallback-binding plan so "REQUIRED: false"
+    // VERTEX_INPUTS get their identity buffers bound.
+    drawWithPerMeshAuxRebind(
+        *invSRB, cb,
+        std::span<const FallbackBindingPlan::Slot>{
+            pass.fallback_bindings.slots});
+
+    cb.endPass();
+
+    // PerLayer + depth: copy the just-rendered scratch into layer i of
+    // the OUTPUT depth array. Qt RHI 6.11 has no per-layer depth
+    // attachment API, so this scratch+copy dance is the only way to
+    // populate distinct depth-array layers in N sequential passes.
+    // Single-format / single-size copy; QRhi handles the
+    // depth-write→transfer-src and transfer-dst→depth-write barriers
+    // around it automatically.
+    if(m_executionMode == ExecutionMode::PerLayer && m_perLayerIsDepth
+       && m_perLayerScratchDepth && m_perLayerOutputDepthArray)
+    {
+      auto* copyBatch = rhi.nextResourceUpdateBatch();
+      QRhiTextureCopyDescription cdesc;
+      cdesc.setPixelSize(viewportSize);
+      cdesc.setSourceLayer(0);
+      cdesc.setSourceLevel(0);
+      cdesc.setSourceTopLeft(QPoint(0, 0));
+      cdesc.setDestinationLayer(i);
+      cdesc.setDestinationLevel(0);
+      cdesc.setDestinationTopLeft(QPoint(0, 0));
+      copyBatch->copyTexture(
+          m_perLayerOutputDepthArray, m_perLayerScratchDepth, cdesc);
+      cb.resourceUpdate(copyBatch);
+    }
+  }
+
+  // Transparent CUBEMAP + MULTIVIEW finaliser. After all render passes
+  // have ended, copy each layer of the shadow TextureArray into the
+  // matching face of the public CubeMap. QRhi cube face layer order
+  // is +X, -X, +Y, -Y, +Z, -Z — same ordering as our IBL shaders'
+  // gl_ViewIndex, so layer i maps to face i 1:1.
+  //
+  // When PER_MIP is also active, both array and cube are MipMapped
+  // and we loop across the full mip chain: N * 6 copyTexture calls
+  // for N mips. Still basically free (pure GPU blit) — a 512² cube
+  // with 10 mips is 60 ops taking microseconds.
+  if(m_cubeCopyShadowArray && m_cubeCopyCube)
+  {
+    auto* copyBatch = rhi.nextResourceUpdateBatch();
+    const QSize faceSize = m_cubeCopyCube->pixelSize();
+    const int mipLevels
+        = (m_executionMode == ExecutionMode::PerMip && m_mipCount > 0)
+              ? m_mipCount
+              : 1;
+    for(int mip = 0; mip < mipLevels; ++mip)
+    {
+      const QSize mipSize(
+          std::max(1, faceSize.width() >> mip),
+          std::max(1, faceSize.height() >> mip));
+      for(int face = 0; face < 6; ++face)
+      {
+        QRhiTextureCopyDescription desc;
+        desc.setPixelSize(mipSize);
+        desc.setSourceLayer(face);
+        desc.setSourceLevel(mip);
+        desc.setSourceTopLeft(QPoint(0, 0));
+        desc.setDestinationLayer(face);
+        desc.setDestinationLevel(mip);
+        desc.setDestinationTopLeft(QPoint(0, 0));
+        copyBatch->copyTexture(
+            m_cubeCopyCube, m_cubeCopyShadowArray, desc);
+      }
+    }
+    cb.resourceUpdate(copyBatch);
+  }
+
+  // GENERATE_MIPS: walk OUTPUTS and call generateMips() on every
+  // declared target. For cube-copy outputs the generated-on texture
+  // is the public cube (not the shadow array — downstream samples
+  // the cube, and the shadow array may not even have the MipMapped
+  // flag in non-PER_MIP cases). For all other outputs it's the
+  // colour attachment we allocated in colorTextures[].
+  //
+  // Skip when PER_MIP is active on the SAME output: the render loop
+  // has already authored distinct content per mip, and generateMips
+  // would overwrite those sub-mips with averaged base-level data.
+  {
+    auto* mipBatch = rhi.nextResourceUpdateBatch();
+    bool any = false;
+    int colorIdx = 0;
+    for(const auto& out : n.descriptor().outputs)
+    {
+      if(out.type == "depth")
+        continue;
+      if(out.generate_mips)
+      {
+        const bool perMipOwnsThis
+            = m_executionMode == ExecutionMode::PerMip
+              && colorIdx == m_perMipOutputIndex;
+        if(!perMipOwnsThis)
+        {
+          QRhiTexture* tgt
+              = (colorIdx == m_cubeCopyOutputIdx && m_cubeCopyCube)
+                    ? m_cubeCopyCube
+                    : (colorIdx == 0
+                           ? pass.renderTarget.texture
+                           : (colorIdx - 1
+                                      < (int)pass.renderTarget
+                                            .additionalColorTextures.size()
+                                  ? pass.renderTarget
+                                        .additionalColorTextures[colorIdx - 1]
+                                  : nullptr));
+          if(tgt)
+          {
+            mipBatch->generateMips(tgt);
+            any = true;
+          }
+        }
+      }
+      ++colorIdx;
+    }
+    if(any)
+      cb.resourceUpdate(mipBatch);
+    else
+      mipBatch->release();
+  }
 }
 
 void RenderedRawRasterPipelineNode::runRenderPass(
     RenderList& renderer, QRhiCommandBuffer& cb, Edge& edge)
 {
+  // Plan 09 S6: debug marker for capture-tool readability (RenderDoc /
+  // Nsight show the scope boundary + node name). No GPU timing
+  // attribution here — QRhi's lastCompletedGpuTime is CB-scope, not
+  // pass-scope. RAII via QByteArray lifetime keeps the end-marker
+  // paired even on early returns.
+  cb.debugMarkBegin(QByteArrayLiteral("RawRasterPipeline"));
+  struct MarkEnd
+  {
+    QRhiCommandBuffer* c;
+    ~MarkEnd() { c->debugMarkEnd(); }
+  } _me{&cb};
+
+  // MRT nodes render to their internal target in runInitialPasses,
+  // then blit the appropriate texture here.
+  if(m_hasMRT)
+  {
+    // Find the blit pass for this edge
+    auto it = ossia::find_if(this->m_passes, [&](auto& p) { return p.first == &edge; });
+    if(it == this->m_passes.end())
+      return;
+
+    auto& pass = it->second;
+    SCORE_ASSERT(pass.renderTarget.renderTarget);
+    SCORE_ASSERT(pass.p.pipeline);
+    SCORE_ASSERT(pass.p.srb);
+
+    cb.setGraphicsPipeline(pass.p.pipeline);
+    cb.setShaderResources(pass.p.srb);
+
+    auto* tex = pass.renderTarget.texture;
+    cb.setViewport(QRhiViewport(
+        0, 0, tex->pixelSize().width(), tex->pixelSize().height()));
+
+    m_blitMesh->draw(this->m_blitMeshbufs, cb);
+    return;
+  }
+
   auto it = ossia::find_if(this->m_passes, [&](auto& p) { return p.first == &edge; });
   // Maybe the shader could not be created
   if(it == this->m_passes.end())
     return;
-  if(!m_mesh)
-    return;
-  if(this->m_meshbufs.buffers.empty())
+  // Procedural draws (VERTEX_INPUTS: [] + VERTEX_COUNT) have no mesh
+  // and no vertex bindings — the draw issues cb.draw(vcount, icount)
+  // directly via drawWithPerMeshAuxRebind's VERTEX_COUNT branch.
+  const bool procedural = isProceduralDraw();
+  if(!procedural && (!m_mesh || this->m_meshbufs.buffers.empty()))
     return;
 
   auto& pass = it->second;
@@ -638,20 +3353,20 @@ void RenderedRawRasterPipelineNode::runRenderPass(
     SCORE_ASSERT(pass.renderTarget.renderTarget);
     SCORE_ASSERT(pass.p.pipeline);
     SCORE_ASSERT(pass.p.srb);
-    // TODO : combine all the uniforms..
 
     auto pipeline = pass.p.pipeline;
     auto srb = pass.p.srb;
     auto texture = pass.renderTarget.texture;
 
-    // TODO need to free stuff
     {
       cb.setGraphicsPipeline(pipeline);
-      cb.setShaderResources(srb);
       cb.setViewport(QRhiViewport(
           0, 0, texture->pixelSize().width(), texture->pixelSize().height()));
 
-      m_mesh->draw(this->m_meshbufs, cb);
+      drawWithPerMeshAuxRebind(
+          *srb, cb,
+          std::span<const FallbackBindingPlan::Slot>{
+              pass.fallback_bindings.slots});
     }
   }
 }
@@ -661,6 +3376,330 @@ void RenderedRawRasterPipelineNode::process(int32_t port, const ossia::transform
   m_modelTransform = v;
 }
 
+void RenderedRawRasterPipelineNode::drawWithPerMeshAuxRebind(
+    QRhiShaderResourceBindings& srb, QRhiCommandBuffer& cb,
+    std::span<const FallbackBindingPlan::Slot> fallback_slots)
+{
+  // Phase 2 unified MDI: ScenePreprocessor's output geometry is now
+  // ALWAYS a single sub-mesh (regular meshes + instance groups all
+  // ride through one drawIndexedIndirect / one cpu_draw_commands
+  // iteration). There is no per-sub-mesh SRB rebind to do — the SRB
+  // is bound once and the draw fans out via the indirect cmd list.
+  // The legacy name is preserved for now to avoid churning every
+  // call-site; rename pass deferred.
+  cb.setShaderResources(&srb);
+
+  // PIPELINE_STATE: { "VERTEX_COUNT": N, "INSTANCE_COUNT": M,
+  // "TOPOLOGY": "..." } — procedural/VSA-style draw override. Issue a
+  // single cb.draw(N, M, 0, 0) and ignore the incoming geometry's
+  // index/indirect buffers entirely; the vertex shader drives positions
+  // from gl_VertexIndex + gl_InstanceIndex. Used for fullscreen passes
+  // (skybox: VERTEX_COUNT=3), procedural geometry (VSA plasma:
+  // VERTEX_COUNT=10000, TOPOLOGY=line_strip), etc. Without this, a
+  // fullscreen pass wired to a complex scene rasterizes N/3 fullscreen
+  // triangles — devastating even with early-Z (SciFiHelmet → ~46k
+  // fullscreen tris → ~100ms/frame on a GTX 1080).
+  //
+  // Safety: if the shader declares non-empty VERTEX_INPUTS (i.e. reads
+  // vertex attributes), clamp the draw count to the incoming geometry's
+  // vertex_count so the VS can't fetch past the bound buffer. Shaders
+  // that live purely on gl_VertexIndex should declare `VERTEX_INPUTS:
+  // []` — the pipeline is then built with no vertex bindings and
+  // VERTEX_COUNT is used verbatim.
+  {
+    const auto& ds = n.descriptor().default_state;
+    if(ds.vertex_count.has_value())
+    {
+      uint32_t vcount = *ds.vertex_count;
+      const uint32_t icount = ds.instance_count.value_or(1u);
+
+      const bool hasVertexInputs = !n.descriptor().vertex_inputs.empty();
+      if(hasVertexInputs && this->geometry.meshes
+         && !this->geometry.meshes->meshes.empty())
+      {
+        const uint32_t incoming
+            = (uint32_t)this->geometry.meshes->meshes[0].vertices;
+        if(incoming > 0 && vcount > incoming)
+          vcount = incoming;
+      }
+
+      // Bind vertex buffers driven by the geometry's `input` list — NOT
+      // every entry in m_meshbufs.buffers. Since the scene preprocessor
+      // started appending the index buffer + scene-wide SSBOs (lights /
+      // materials / per-draws / …) to g.buffers for the auxiliary
+      // mapping, blindly binding the buffers array pushes STORAGE / INDEX
+      // buffers into vertex binding slots and Vulkan validation fires
+      // `VUID-vkCmdBindVertexBuffers-pBuffers-00627`. g.input is the
+      // authoritative vertex-binding list.
+      std::array<QRhiCommandBuffer::VertexInput, 8> inputs;
+      std::size_t nb = 0;
+      if(this->geometry.meshes && !this->geometry.meshes->meshes.empty())
+      {
+        const auto& g0 = this->geometry.meshes->meshes[0];
+        const std::size_t cap = inputs.size();
+        for(const auto& in : g0.input)
+        {
+          if(nb >= cap)
+            break;
+          const std::size_t idx = (std::size_t)in.buffer;
+          if(idx >= m_meshbufs.buffers.size())
+            continue;
+          auto* h = m_meshbufs.buffers[idx].handle;
+          if(!h)
+            continue;
+          inputs[nb++] = {h, (quint32)in.byte_offset};
+        }
+      }
+      if(nb > 0)
+        cb.setVertexInput(0, (int)nb, inputs.data());
+
+      if(vcount > 0 && icount > 0)
+        cb.draw(vcount, icount, 0, 0);
+      return;
+    }
+  }
+
+  // Single-mesh draw. ScenePreprocessor unified-MDI emits one sub-mesh
+  // covering every regular cmd + every instance group; the indirect cmd
+  // list fans out across them. Per-pass pipeline swapping (alpha-blend
+  // etc.) is NOT handled here — that's the job of a dedicated
+  // downstream node configured by the user as a separate render pass.
+  if(m_mesh)
+  {
+    // Fallback-aware draw when the shader declared "REQUIRED: false"
+    // VERTEX_INPUTS whose semantics are missing from upstream geometry.
+    // Plain pass-through otherwise (zero overhead when the plan is empty).
+    if(!fallback_slots.empty())
+    {
+      if(auto* cm2 = dynamic_cast<const CustomMesh*>(m_mesh))
+        cm2->drawWithFallbackBindings(m_meshbufs, cb, fallback_slots);
+      else
+        m_mesh->draw(m_meshbufs, cb);
+    }
+    else
+    {
+      m_mesh->draw(m_meshbufs, cb);
+    }
+  }
+}
+
 RenderedRawRasterPipelineNode::~RenderedRawRasterPipelineNode() { }
 
+bool RenderedRawRasterPipelineNode::isProceduralDraw() const noexcept
+{
+  const auto& desc = n.descriptor();
+  return desc.vertex_inputs.empty()
+         && desc.default_state.vertex_count.has_value()
+         && *desc.default_state.vertex_count > 0;
+}
+
+// Generic integer-expression evaluator. Shared by EXECUTION_MODEL=MANUAL
+// (COUNT) and OUTPUTS.WIDTH / HEIGHT. Pure-integer fast path avoids the
+// expression parser for the overwhelmingly common literal case.
+// Variable surface matches CSF dispatch expressions so all three sites
+// share a mental model: $WIDTH / $HEIGHT / $DEPTH / $LAYERS of the first
+// input image (unsuffixed + per-name variants), plus scalar input values
+// as $<inputName>. '$' → 'var_' rewrite follows the CSF convention.
+int RenderedRawRasterPipelineNode::resolveIntExpression(
+    const std::string& expr, int fallback) const
+{
+  if(expr.empty())
+    return fallback;
+
+  // Pure-integer fast path — std::stoi would otherwise silently accept
+  // "6 * $x" as 6 (ignoring the variable reference entirely).
+  {
+    std::size_t i = 0;
+    while(i < expr.size() && std::isspace((unsigned char)expr[i]))
+      ++i;
+    const std::size_t first_digit = i;
+    while(i < expr.size() && std::isdigit((unsigned char)expr[i]))
+      ++i;
+    const std::size_t last_digit = i;
+    while(i < expr.size() && std::isspace((unsigned char)expr[i]))
+      ++i;
+    if(first_digit < last_digit && i == expr.size())
+    {
+      try
+      {
+        return std::max(1, std::stoi(expr));
+      }
+      catch(...)
+      {
+      }
+    }
+  }
+
+  ossia::math_expression e;
+  ossia::small_pod_vector<double, 16> data;
+  data.reserve(16);
+
+  auto register_size = [&](const std::string& name, QRhiTexture* tex,
+                           bool& first) {
+    QSize px = tex ? tex->pixelSize() : QSize{1280, 720};
+    int depth = 1, layers = 1;
+    if(tex)
+    {
+      if((int)(tex->flags() & QRhiTexture::ThreeDimensional))
+        depth = std::max(1, tex->depth());
+      if((int)(tex->flags() & QRhiTexture::TextureArray))
+        layers = std::max(1, tex->arraySize());
+    }
+    if(px.width() <= 0)
+      px.setWidth(1280);
+    if(px.height() <= 0)
+      px.setHeight(720);
+    e.add_constant("var_WIDTH_" + name, data.emplace_back(px.width()));
+    e.add_constant("var_HEIGHT_" + name, data.emplace_back(px.height()));
+    e.add_constant("var_DEPTH_" + name, data.emplace_back(depth));
+    e.add_constant("var_LAYERS_" + name, data.emplace_back(layers));
+    if(first)
+    {
+      e.add_constant("var_WIDTH", data.emplace_back(px.width()));
+      e.add_constant("var_HEIGHT", data.emplace_back(px.height()));
+      e.add_constant("var_DEPTH", data.emplace_back(depth));
+      e.add_constant("var_LAYERS", data.emplace_back(layers));
+      first = false;
+    }
+  };
+
+  // Walk the descriptor's image-style inputs in declared order so the
+  // first one supplies the unsuffixed $WIDTH / $HEIGHT family, matching
+  // CSF's `registerCommonExpressionVariables` semantics.
+  bool first_image = true;
+  int sampler_idx = 0;
+  for(const auto& inp : n.descriptor().inputs)
+  {
+    if(ossia::get_if<isf::texture_input>(&inp.data)
+       || ossia::get_if<isf::image_input>(&inp.data))
+    {
+      QRhiTexture* t = nullptr;
+      if(sampler_idx < (int)m_inputSamplers.size())
+        t = m_inputSamplers[sampler_idx].texture;
+      register_size(inp.name, t, first_image);
+      ++sampler_idx;
+    }
+  }
+
+  // Scalar ports — mirror the $<inputName> surface. Walking node.input in
+  // parallel with descriptor.inputs lets us pull live values without
+  // reimplementing the port-dispatch plumbing.
+  int port_idx = 0;
+  for(const auto& inp : n.descriptor().inputs)
+  {
+    auto port = (port_idx < (int)n.input.size()) ? n.input[port_idx]
+                                                 : nullptr;
+    if(ossia::get_if<isf::float_input>(&inp.data))
+    {
+      if(port && port->value)
+        e.add_constant(
+            "var_" + inp.name, data.emplace_back(*(float*)port->value));
+    }
+    else if(ossia::get_if<isf::long_input>(&inp.data))
+    {
+      if(port && port->value)
+        e.add_constant(
+            "var_" + inp.name, data.emplace_back(*(int*)port->value));
+    }
+    ++port_idx;
+  }
+
+  // Register $COUNT_<bufferName> / $BYTESIZE_<bufferName> for every
+  // SSBO / UBO the raster pipeline binds (INPUTS storage_input /
+  // uniform_input, plus top-level AUXILIARY entries). Same semantics as
+  // CSF: COUNT = element count of the flexible array (or 1 for UBOs /
+  // fixed-layout SSBOs), BYTESIZE = raw byte size of the binding. Lets
+  // OUTPUTS.WIDTH / HEIGHT / MANUAL-count expressions size themselves
+  // against upstream buffer extents by name, matching the convention
+  // used by CSF compute passes.
+  //
+  // Live sizes come from m_auxiliarySSBOs (populated at init time from
+  // actual buffer allocations / upstream adoptions); layout comes from
+  // the descriptor. Cross-reference by name.
+  {
+    ossia::hash_set<std::string> registered;
+    const auto& desc = n.descriptor();
+
+    // Find the live byte size for a given aux name. Falls back to 0 if
+    // the binding isn't yet live (first frame, unbound edge, etc.) —
+    // count then resolves to 1, which is the zero-copy-safe default.
+    auto find_aux_size = [&](const std::string& name) -> int64_t {
+      for(const auto& aux : m_auxiliarySSBOs)
+        if(aux.name == name)
+          return aux.size;
+      return 0;
+    };
+
+    // Register a buffer whose storage-side layout is available. SSBOs
+    // use the layout to derive element stride (fixed part + flexible-
+    // array element), UBOs skip the layout lookup since they're always
+    // one struct instance with $COUNT = 1.
+    auto register_ssbo
+        = [&](const std::string& name, int64_t byte_size,
+              std::span<const isf::storage_input::layout_field> layout) {
+      if(name.empty() || registered.contains(name))
+        return;
+      int64_t element_count = 1;
+      const int64_t fixed_part
+          = score::gfx::calculateStorageBufferSize(layout, 0, desc);
+      const int64_t with_one
+          = score::gfx::calculateStorageBufferSize(layout, 1, desc);
+      const int64_t stride = with_one - fixed_part;
+      if(stride > 0 && byte_size > fixed_part)
+        element_count = (byte_size - fixed_part) / stride;
+      if(element_count < 1)
+        element_count = 1;
+      e.add_constant(
+          "var_COUNT_" + name, data.emplace_back((double)element_count));
+      e.add_constant(
+          "var_BYTESIZE_" + name, data.emplace_back((double)byte_size));
+      registered.insert(name);
+    };
+
+    auto register_ubo
+        = [&](const std::string& name, int64_t byte_size) {
+      if(name.empty() || registered.contains(name))
+        return;
+      e.add_constant("var_COUNT_" + name, data.emplace_back(1.0));
+      e.add_constant(
+          "var_BYTESIZE_" + name, data.emplace_back((double)byte_size));
+      registered.insert(name);
+    };
+
+    // INPUTS storage_input / uniform_input
+    for(const auto& inp : desc.inputs)
+    {
+      if(auto* s = ossia::get_if<isf::storage_input>(&inp.data))
+        register_ssbo(inp.name, find_aux_size(inp.name), s->layout);
+      else if(ossia::get_if<isf::uniform_input>(&inp.data))
+        register_ubo(inp.name, find_aux_size(inp.name));
+    }
+
+    // Top-level AUXILIARY entries (declared at descriptor root).
+    for(const auto& aux : desc.auxiliary)
+    {
+      if(aux.is_uniform)
+        register_ubo(aux.name, find_aux_size(aux.name));
+      else
+        register_ssbo(aux.name, find_aux_size(aux.name), aux.layout);
+    }
+  }
+
+  std::string eval_expr = expr;
+  boost::algorithm::replace_all(eval_expr, "$", "var_");
+  e.register_symbol_table();
+  if(e.set_expression(eval_expr))
+    return std::max(1, (int)e.value());
+
+  qWarning() << "RawRaster: integer expression failed:"
+             << e.error().c_str() << eval_expr.c_str();
+  return fallback;
+}
+
+int RenderedRawRasterPipelineNode::resolveManualInvocationCount() const
+{
+  return resolveIntExpression(
+      n.descriptor().execution_model.count_expression, 1);
+}
+
 }
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedRawRasterPipelineNode.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedRawRasterPipelineNode.hpp
index 296f384553..09cdcf585a 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedRawRasterPipelineNode.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedRawRasterPipelineNode.hpp
@@ -1,8 +1,14 @@
 #pragma once
 
 #include <Gfx/Graph/ISFNode.hpp>
+#include <Gfx/Graph/IsfBindingsBuilder.hpp>
 #include <Gfx/Graph/NodeRenderer.hpp>
 #include <Gfx/Graph/RenderedISFUtils.hpp>
+#include <Gfx/Graph/VertexFallbackPlan.hpp>
+
+#include <ossia/detail/small_flat_map.hpp>
+
+#include <span>
 
 namespace score::gfx
 {
@@ -14,13 +20,22 @@ struct RenderedRawRasterPipelineNode : score::gfx::NodeRenderer
 
   virtual ~RenderedRawRasterPipelineNode();
 
-  void updateInputTexture(const Port& input, QRhiTexture* tex) override;
+  void updateInputTexture(const Port& input, QRhiTexture* tex, QRhiTexture* depthTex = nullptr) override;
+  QRhiTexture* textureForOutput(const Port& output) override;
 
   void init(RenderList& renderer, QRhiResourceUpdateBatch& res) override;
   void update(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) override;
   bool updateMaterials(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge);
   void release(RenderList& r) override;
 
+  void initState(RenderList& renderer, QRhiResourceUpdateBatch& res) override;
+  void releaseState(RenderList& renderer) override;
+  void addOutputPass(RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) override;
+  void removeOutputPass(RenderList& renderer, Edge& edge) override;
+  bool hasOutputPassForEdge(Edge& edge) const override;
+  void addInputEdge(RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) override;
+  void removeInputEdge(RenderList& renderer, Edge& edge) override;
+
   void runInitialPasses(
       RenderList&, QRhiCommandBuffer& commands, QRhiResourceUpdateBatch*& res,
       Edge& edge) override;
@@ -30,7 +45,61 @@ struct RenderedRawRasterPipelineNode : score::gfx::NodeRenderer
   void process(int32_t port, const ossia::transform3d& v) override;
 
 private:
-  void initPass(const TextureRenderTarget& rt, RenderList& renderer, Edge& edge);
+  // Resolves every image-style INPUT against the incoming geometry's
+  // auxiliary_textures list and overrides the initial texture pointer in
+  // m_inputSamplers for matches. Also builds m_auxTextureBindings so
+  // update() can cheaply re-run the lookup when the geometry changes.
+  // Must be called AFTER initInputSamplers.
+  void bindAuxTexturesInit(RenderList& renderer);
+
+  // Per-frame update hook: walks m_auxTextureBindings, re-resolves each
+  // binding's texture pointer from the current geometry's aux textures,
+  // and returns true if at least one sampler's texture pointer changed
+  // (caller will flag mustRecreatePasses).
+  bool rebindAuxTextures();
+
+  void initPass(
+      const TextureRenderTarget& rt, RenderList& renderer,
+      QRhiResourceUpdateBatch& res, Edge& edge);
+  void initMRTPass(RenderList& renderer, QRhiResourceUpdateBatch& res);
+  void initMRTBlitPasses(RenderList& renderer, QRhiResourceUpdateBatch& res);
+  void initMRTBlitPass(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge& edge);
+
+  // EXECUTION_MODEL=MANUAL: evaluate the COUNT expression against the
+  // live input state (first input image's $WIDTH / $HEIGHT / $DEPTH /
+  // $LAYERS, scalar input values as $<inputName>). Pure-integer literal
+  // fast path; otherwise delegate to ossia::math_expression with '$' →
+  // 'var_' rewrite — same convention as CSF STRIDE / image-size
+  // expressions. Returns >= 1; unparseable expressions degrade to 1.
+  int resolveManualInvocationCount() const;
+
+  // True when the shader renders procedurally: no VERTEX_INPUTS
+  // (gl_VertexIndex-driven) and PIPELINE_STATE.VERTEX_COUNT specified.
+  // In that mode the node needs no upstream geometry — m_mesh stays
+  // null and the draw call skips vertex-buffer bindings entirely.
+  // Used to relax the "no mesh, bail out" guards that otherwise block
+  // fullscreen passes, test shaders, VSA-style procedural draws, and
+  // IBL precompute shaders from rendering when wired without a
+  // geometry input.
+  bool isProceduralDraw() const noexcept;
+
+  // Evaluate an integer-valued expression against the same variable
+  // surface as resolveManualInvocationCount ($WIDTH_<inp> / $HEIGHT /
+  // scalar inputs). Used for OUTPUTS.WIDTH / HEIGHT at init time.
+  // Returns `fallback` when the expression is empty, >=1 otherwise.
+  int resolveIntExpression(const std::string& expr, int fallback) const;
+
+  // Issue the draw for the currently bound pipeline + SRB. When the input
+  // geometry carries multiple sub-meshes with per-mesh aux buffers (e.g.
+  // ScenePreprocessor per-mesh mode: one `per_draw` SSBO per sub-mesh), this
+  // iterates sub-meshes and re-points the SRB bindings at the current
+  // sub-mesh's buffers before drawing it. For single-sub-mesh or MDI-mode
+  // geometries it delegates to the mesh's default draw(). The SRB is left
+  // pointing at the last sub-mesh's bindings on return — the next
+  // runRenderPass call rebinds from scratch.
+  void drawWithPerMeshAuxRebind(
+      QRhiShaderResourceBindings& srb, QRhiCommandBuffer& cb,
+      std::span<const FallbackBindingPlan::Slot> fallback_slots = {});
 
   std::vector<Sampler> allSamplers() const noexcept;
 
@@ -40,11 +109,16 @@ struct RenderedRawRasterPipelineNode : score::gfx::NodeRenderer
 
   std::vector<Sampler> m_inputSamplers;
   std::vector<Sampler> m_audioSamplers;
+  ossia::small_flat_map<Edge*, QRhiSampler*, 4> m_blitSamplersByEdge;
 
   int64_t meshChangedIndex{-1};
   const Mesh* m_mesh{};
   MeshBuffers m_meshbufs;
 
+  // Quad mesh used for MRT blit passes (separate from the geometry mesh)
+  const Mesh* m_blitMesh{};
+  MeshBuffers m_blitMeshbufs;
+
   QRhiBuffer* m_materialUBO{};
   int m_materialSize{};
 
@@ -53,19 +127,223 @@ struct RenderedRawRasterPipelineNode : score::gfx::NodeRenderer
   struct AuxiliarySSBO
   {
     QRhiBuffer* buffer{};
+    QRhiBuffer* prev_buffer{}; //!< Only set when persistent == true: the other half of the ping-pong pair.
     int64_t size{};
-    bool owned{true}; // false when adopted from upstream geometry
+    bool owned{true}; // false when adopted from upstream geometry / upstream port
+    bool is_uniform{false}; // true for uniform_input, false for storage_input
+    bool persistent{false}; //!< Ping-pong pair swapped each frame (raw raster AUXILIARY only)
     std::string name;
     std::string access;
+    // Index into n.input[] for the score port that may carry an upstream-
+    // supplied QRhiBuffer*. -1 when the buffer can only come from the
+    // input geometry's auxiliary list (e.g. desc.auxiliary entries without
+    // a matching INPUTS port).
+    int input_port_index{-1};
+    // SRB binding slot assigned at pipeline build time. Needed so the per-
+    // sub-mesh draw loop can patch `per_draw` (and any other per-mesh aux)
+    // to point at mesh[i]'s buffer before drawing sub-mesh i. -1 when the
+    // aux was filtered out of the SRB (e.g. visibility==none).
+    int binding{-1};
+    // For persistent aux only: binding slot of the <name>_prev (read-only)
+    // half of the ping-pong pair. prev_binding + 1 == binding.
+    int prev_binding{-1};
   };
   std::vector<AuxiliarySSBO> m_auxiliarySSBOs;
 
+  // Storage images (and the rest of the INPUTS storage trio: storage_input
+  // for SSBOs / csf_image_input for image2D/3D / uniform_input for UBOs)
+  // declared in the top-level INPUTS array. Wired via the shared
+  // IsfBindingsBuilder helpers so the SRB binding type matches the
+  // GLSL emission from `isf_emit_graphics_storage` (see
+  // `isf.cpp:3349-3395`). RenderedISFNode and SimpleRenderedISFNode use
+  // the same pattern. m_auxiliarySSBOs carries only the AUXILIARY-block
+  // entries for RawRaster — the dual-population kept here is intentional
+  // for the Q1 transition while the AUXILIARY path still has its own
+  // dispatch (line 1885+); a follow-up could fold that into m_storage too.
+  GraphicsStorageResources m_storage;
+  int m_firstStorageBinding{-1};
+
+  // Texture auxes carried on the input geometry (see
+  // ossia::geometry::auxiliary_textures). Each entry records a sampler
+  // slot in m_inputSamplers that auto-resolves its texture pointer from
+  // the incoming geometry's aux-texture list by name at init() time and
+  // again every time the geometry changes. Eliminates the need for a
+  // dedicated texture cable (base_color_array / skybox / ...).
+  struct AuxTextureBinding
+  {
+    int sampler_idx{-1}; // index into m_inputSamplers
+    std::string name;    // INPUT name, matched against auxiliary_texture::name
+  };
+  std::vector<AuxTextureBinding> m_auxTextureBindings;
+
+  // Non-owning per-port sampler overrides published by upstream
+  // geometry's `auxiliary_texture::sampler_handle`. Parallel to
+  // m_inputSamplers — index N's override (or null) applies to
+  // m_inputSamplers[N]'s effective sampler at SRB-build time. Stored
+  // separately from `Sampler` because the entries in m_inputSamplers
+  // are owned and `delete sampler.sampler` runs on every entry at
+  // release; overwriting `Sampler::sampler` with a registry-owned
+  // sampler would double-free at teardown.
+  std::vector<QRhiSampler*> m_inputSamplerOverrides;
+
+  // Textures declared in the top-level AUXILIARY array (TYPE: image /
+  // texture / cubemap / image_cube). Do NOT create a score input port —
+  // resolved only from ossia::geometry::auxiliary_textures by name, with
+  // a placeholder bound until the first matching handle arrives.
+  struct AuxTextureAuxSampler
+  {
+    QRhiSampler* sampler{};  // Null for storage-image entries.
+    QRhiTexture* texture{};
+    // Shape-matched empty fallback (one of the RenderList-owned empty
+    // textures). Set at init from is_cubemap / dimensions / is_array and
+    // never changes. When rebindAuxTextures stops finding a matching
+    // aux_texture upstream (producer stopped publishing the name, got
+    // disconnected, etc.) we revert `texture` to this placeholder rather
+    // than leaving the previous (possibly-freed) upstream handle in
+    // place. Never owned by us.
+    QRhiTexture* placeholder{};
+    std::string name;
+    int binding{-1};
+    // Storage-image variant: bound with imageLoad / imageStore /
+    // imageLoadStore instead of sampledTexture. `access` distinguishes
+    // which of the three — "read_only" / "write_only" / "read_write".
+    bool is_storage{false};
+    std::string access;
+  };
+  std::vector<AuxTextureAuxSampler> m_auxTextureSamplers;
+
   std::optional<AudioTextureUpload> m_audioTex;
 
+  // MRT: internally-owned render target with multiple attachments
+  TextureRenderTarget m_mrtRenderTarget;
+  bool m_hasMRT{false};
+  bool m_mrtRenderedThisFrame{false};
+
+  // EXECUTION_MODEL (top-level, RAW_RASTER only).
+  //   Single   — classic single-invocation pass (default; no extra loop).
+  //   PerMip   — N invocations, one per mip level of the TARGET output.
+  //              Each invocation binds a per-mip render target so the
+  //              single draw writes only that mip; ProcessUBO.passIndex
+  //              carries the mip index. Needed for prefiltered-GGX
+  //              roughness sweep.
+  //   PerLayer — N invocations, one per array layer of the TARGET output.
+  //              Each invocation binds the matching layer; ProcessUBO.
+  //              passIndex carries the layer index. Color targets bind
+  //              setLayer(i) directly. Depth targets render to a shared
+  //              scratch and copyTexture into layer i after the pass
+  //              (Qt RHI 6.11 has no per-layer depth attachment API).
+  //              Drives shadow_cascades.frag (one cascade per layer).
+  //   Manual   — N invocations decided every frame by evaluating a
+  //              COUNT expression via the math_expression parser (same
+  //              variable surface as CSF STRIDE / image-size expressions:
+  //              $WIDTH, $HEIGHT, $<inputName>, ...). All invocations
+  //              share the single MRT render target; the shader reads
+  //              ProcessUBO.passIndex to branch.
+  enum class ExecutionMode : std::uint8_t
+  {
+    Single,
+    PerMip,
+    PerCubeFace,   // Iterate 6 cube faces; target = CubeMap + setLayer(i)
+    PerLayer,      // Iterate N array layers; target = TextureArray + setLayer(i)
+    Manual
+  };
+  ExecutionMode m_executionMode{ExecutionMode::Single};
+
+  // PerCubeFace state. The target OUTPUT is allocated with
+  // QRhiTexture::CubeMap (6 implicit layers) and six per-face render
+  // targets are built at init; runInitialPasses iterates them in order,
+  // stamping the face index into ProcessUBO.passIndex. Shares the
+  // m_perMipOutputIndex resolution path (same "which colour output is
+  // the target" question) and reuses the m_mipRTs vector for storage
+  // — interpretation is mode-dependent (mip level vs face index).
+  int m_perCubeFaceOutputIndex{-1};
+
+  // PerMip state. When PerMip is active the MRT target texture is
+  // allocated with QRhiTexture::MipMapped and m_mipCount / m_mipRTs
+  // point at per-level render-pass views of it. m_perMipOutputIndex is
+  // the index into m_mrtRenderTarget{.texture, .additionalColorTextures}
+  // that we iterate. -1 in other modes.
+  int m_perMipOutputIndex{-1};
+  int m_mipCount{0};
+  struct MipRT
+  {
+    QRhiTextureRenderTarget* renderTarget{};
+    QRhiRenderPassDescriptor* renderPass{};
+    QRhiTexture* depth{}; // per-level depth — owned here.
+  };
+  std::vector<MipRT> m_mipRTs;
+
+  // PerLayer state. m_perLayerOutputIndex is the RAW index into
+  // descriptor().outputs[] (depth-inclusive — unlike the color-only
+  // m_perMipOutputIndex / m_perCubeFaceOutputIndex). m_perLayerIsDepth
+  // discriminates the two implementation paths:
+  //
+  //   - Color target (m_perLayerIsDepth == false): m_mipRTs holds N
+  //     entries (one per layer), each with a setLayer(i) attachment.
+  //     Mirrors PER_CUBE_FACE structurally with a variable layer count.
+  //
+  //   - Depth target (m_perLayerIsDepth == true): Qt RHI 6.11 doesn't
+  //     expose per-layer depth attachment, so m_perLayerScratchDepth is
+  //     a single 2D D32F render-target texture shared across iterations
+  //     (m_perLayerSharedRT/RP). After each iteration's endPass,
+  //     runInitialPasses emits copyTexture(scratch -> depthTex layer i).
+  //     m_perLayerOutputDepthArray aliases depthTex (the OUTPUT array),
+  //     used as the copy destination.
+  int  m_perLayerOutputIndex{-1};
+  bool m_perLayerIsDepth{false};
+  QRhiTexture*              m_perLayerScratchDepth{nullptr};
+  QRhiTexture*              m_perLayerDummyColor{nullptr};
+  QRhiTextureRenderTarget*  m_perLayerSharedRT{nullptr};
+  QRhiRenderPassDescriptor* m_perLayerSharedRP{nullptr};
+  QRhiTexture*              m_perLayerOutputDepthArray{nullptr};
+
+  // Manual state. Re-evaluated every frame in runInitialPasses.
+  int m_manualCount{1};
+
+  // Per-invocation UBO + SRB pool for PER_MIP / PER_CUBE_FACE / MANUAL.
+  //
+  // Dynamic UBOs in QRhi have a SINGLE slot per frame-in-flight:
+  // multiple updateDynamicBuffer calls to the same buffer within one
+  // frame overwrite each other on the host, and every draw submitted
+  // that frame ends up reading the LAST uploaded value. Stamping
+  // distinct PASSINDEX values per invocation into one shared UBO
+  // therefore collapses — all mips / faces render with the same
+  // (last) index, producing uniformly-blurred output at every mip.
+  //
+  // Fix: one UBO + one SRB per invocation, all pre-built at init so
+  // the render loop just swaps which SRB it binds per pass. Index 0
+  // corresponds to the main pass UBO/SRB (pass.processUBO /
+  // pass.p.srb) — the vectors below hold indices 1..N-1 only, which
+  // are allocated lazily when invocation count exceeds the current
+  // pool size (handles MANUAL whose count is per-frame-dynamic).
+  std::vector<QRhiBuffer*> m_perInvocationUBOs;
+  std::vector<QRhiShaderResourceBindings*> m_perInvocationSRBs;
+
+  // Transparent CUBEMAP + MULTIVIEW compatibility shim. QRhi forbids
+  // setMultiViewCount on a cube texture (qrhi.cpp:2561). When a shader
+  // declares both `CUBEMAP: true` and `MULTIVIEW: N`, we render into a
+  // hidden 2D TextureArray (the only shape multiview accepts) and then
+  // blit each array layer onto the corresponding cube face at the end
+  // of runInitialPasses. Downstream consumers see a real samplerCube
+  // via textureForOutput() → the cube; the shadow array never leaves
+  // this class.
+  //
+  // m_cubeCopyShadowArray  = TextureArray used as the multiview render
+  //                          target (6 layers, `UsedAsTransferSource`).
+  // m_cubeCopyCube         = public CubeMap handed to downstream.
+  // m_cubeCopyOutputIdx    = colour-attachment index (0-based among
+  //                          non-depth outputs) whose target is handled
+  //                          via the array-then-copy path; -1 otherwise.
+  //                          Only one output per shader gets this
+  //                          treatment in this first cut.
+  QRhiTexture* m_cubeCopyShadowArray{};
+  QRhiTexture* m_cubeCopyCube{};
+  int m_cubeCopyOutputIdx{-1};
+
   // The part of the m_materialUBO for which changes
   // trigger a pipeline recreation (blend status etc.)
   static constexpr int size_of_pipeline_material = 32;
-  char m_prevPipelineChangingMaterial[size_of_pipeline_material]{0};
+  alignas(4) char m_prevPipelineChangingMaterial[size_of_pipeline_material]{0};
   struct PipelineChangingMaterial
   {
     int32_t mode;         // tri, point, line
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedVSANode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedVSANode.cpp
index 8fd1037b5a..288586a76f 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedVSANode.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedVSANode.cpp
@@ -33,7 +33,7 @@ SimpleRenderedVSANode::SimpleRenderedVSANode(const ISFNode& node) noexcept
 {
 }
 
-void SimpleRenderedVSANode::updateInputTexture(const Port& input, QRhiTexture* tex)
+void SimpleRenderedVSANode::updateInputTexture(const Port& input, QRhiTexture* tex, QRhiTexture* depthTex)
 {
   int sampler_idx = 0;
   for(auto* p : node.input)
@@ -41,7 +41,11 @@ void SimpleRenderedVSANode::updateInputTexture(const Port& input, QRhiTexture* t
     if(p == &input)
       break;
     if(p->type == Types::Image)
+    {
       sampler_idx++;
+      if((p->flags & Flag::SamplableDepth) == Flag::SamplableDepth)
+        sampler_idx++;
+    }
   }
 
   if(sampler_idx < (int)m_inputSamplers.size())
@@ -54,6 +58,20 @@ void SimpleRenderedVSANode::updateInputTexture(const Port& input, QRhiTexture* t
         if(pd.main_pass.p.srb)
           score::gfx::replaceTexture(*pd.main_pass.p.srb, sampl.sampler, tex);
     }
+
+    if(depthTex
+       && (input.flags & Flag::SamplableDepth) == Flag::SamplableDepth
+       && sampler_idx + 1 < (int)m_inputSamplers.size())
+    {
+      auto& depthSampl = m_inputSamplers[sampler_idx + 1];
+      if(depthSampl.texture != depthTex)
+      {
+        depthSampl.texture = depthTex;
+        for(auto& pd : m_passes)
+          if(pd.main_pass.p.srb)
+            score::gfx::replaceTexture(*pd.main_pass.p.srb, depthSampl.sampler, depthTex);
+      }
+    }
   }
 }
 
@@ -118,35 +136,90 @@ void SimpleRenderedVSANode::initPass(
   pubo->setName("SimpleRenderedVSANode::initPass::pubo");
   pubo->create();
 
-  // Create the main pass
+  // Create the main pass.
+  // Apply cull-mode, front-face, and blend state BEFORE the first create()
+  // call so we only compile the PSO once instead of the previous two-compile
+  // pattern (buildPipeline::create + destroy + mutate + create).
   try
   {
     auto [v, s] = score::gfx::makeShaders(renderer.state, n.m_vertexS, n.m_fragmentS);
-    auto pip = score::gfx::buildPipeline(
-        renderer, *m_mesh, v, s, renderTarget, pubo, m_materialUBO, allSamplers());
-    if(pip.pipeline)
+    auto* srb = score::gfx::createDefaultBindings(
+        renderer, renderTarget, pubo, m_materialUBO, allSamplers());
+
+    // Inline the essential steps of buildPipeline(srb) so we can insert the
+    // VSA-specific cull/front-face/blend state before create().
+    auto* ps = rhi.newGraphicsPipeline();
+    SCORE_ASSERT(ps);
+    ps->setName("SimpleRenderedVSANode::initPass::ps");
+
+    // VSA blend: simple alpha blend (no premul factors needed here).
+    QRhiGraphicsPipeline::TargetBlend t{};
+    t.enable = true;
+    ps->setTargetBlends({t});
+
+    // API-specific cull mode for 3-D VSA meshes.
+    //
+    // Note: this is NOT a Y-up vs Y-down NDC issue. QRhi exposes
+    // QRhi::isYUpInNDC() and QRhi::clipSpaceCorrMatrix() (qrhi.h:2056,
+    // :2059) so a shader applying clipSpaceCorrMatrix uniformly across
+    // backends does not need a per-backend cull-flip. Other rendered-
+    // pipeline nodes (RenderedISFNode, RenderedRawRasterPipelineNode,
+    // CustomMesh) just use unconditional CullMode::Back.
+    //
+    // VSA emits its mesh procedurally (no clipSpaceCorrMatrix applied)
+    // and its triangle winding ends up CCW under GL's framebuffer-Y
+    // convention; flipping to CullMode::Front under GL is the workaround
+    // until VSA's procedural emit applies the corr matrix itself.
+    switch(renderer.state.api)
     {
-      QRhiGraphicsPipeline::TargetBlend t{};
-      t.enable = true;
-      pip.pipeline->destroy();
-      switch(renderer.state.api)
-      {
-        default:
-        case GraphicsApi::Vulkan:
-          pip.pipeline->setCullMode(QRhiGraphicsPipeline::CullMode::Back);
-          break;
-        case GraphicsApi::OpenGL:
-          pip.pipeline->setCullMode(QRhiGraphicsPipeline::CullMode::Front);
-          break;
-      }
-      pip.pipeline->setFrontFace(QRhiGraphicsPipeline::FrontFace::CW);
-      pip.pipeline->setTargetBlends({t});
-      pip.pipeline->create();
+      case GraphicsApi::Vulkan:
+      case GraphicsApi::D3D11:
+      case GraphicsApi::D3D12:
+      case GraphicsApi::Metal:
+      case GraphicsApi::Null:
+        ps->setCullMode(QRhiGraphicsPipeline::CullMode::Back);
+        break;
+      case GraphicsApi::OpenGL:
+        ps->setCullMode(QRhiGraphicsPipeline::CullMode::Front);
+        break;
+      default:
+        qWarning() << "RenderedVSANode: unhandled graphics API for cull mode; defaulting to Back";
+        ps->setCullMode(QRhiGraphicsPipeline::CullMode::Back);
+        break;
+    }
+    ps->setFrontFace(QRhiGraphicsPipeline::FrontFace::CW);
+
+    const int rtS = renderTarget.sampleCount();
+    ps->setSampleCount(rtS > 0 ? rtS : renderer.samples());
+
+    m_mesh->preparePipeline(*ps);
+
+    if(!renderer.anyNodeRequiresDepth())
+    {
+      ps->setDepthTest(false);
+      ps->setDepthWrite(false);
+    }
+
+    ps->setShaderStages(
+        {{QRhiShaderStage::Vertex, v}, {QRhiShaderStage::Fragment, s}});
+    ps->setShaderResourceBindings(srb);
+    SCORE_ASSERT(renderTarget.renderPass);
+    ps->setRenderPassDescriptor(renderTarget.renderPass);
+
+    Pipeline pip{};
+    if(ps->create())
+    {
+      pip = {ps, srb};
       m_passes.emplace_back(
           &edge, Pass{renderTarget, pip, pubo}, bg_pip, bg_srb, bg_ubo, bg_tri);
     }
     else
+    {
+      qDebug() << "Warning! VSA pipeline not created";
+      delete ps;
+      delete srb;
       delete pubo;
+    }
   }
   catch(...)
   {
@@ -154,6 +227,14 @@ void SimpleRenderedVSANode::initPass(
 }
 
 void SimpleRenderedVSANode::init(RenderList& renderer, QRhiResourceUpdateBatch& res)
+{
+  initState(renderer, res);
+
+  for(Edge* edge : n.output[0]->edges)
+    addOutputPass(renderer, *edge, res);
+}
+
+void SimpleRenderedVSANode::initState(RenderList& renderer, QRhiResourceUpdateBatch& res)
 {
   QRhi& rhi = *renderer.state.rhi;
 
@@ -195,6 +276,8 @@ void SimpleRenderedVSANode::init(RenderList& renderer, QRhiResourceUpdateBatch&
         = rhi.newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, m_materialSize);
     m_materialUBO->setName("SimpleRenderedVSANode::init::m_materialUBO");
     SCORE_ASSERT(m_materialUBO->create());
+    if(n.m_material_data)
+      res.updateDynamicBuffer(m_materialUBO, 0, m_materialSize, n.m_material_data.get());
   }
 
   // Create the samplers
@@ -202,22 +285,53 @@ void SimpleRenderedVSANode::init(RenderList& renderer, QRhiResourceUpdateBatch&
   SCORE_ASSERT(m_inputSamplers.empty());
   SCORE_ASSERT(m_audioSamplers.empty());
 
-  m_inputSamplers = initInputSamplers(this->n, renderer, n.input);
+  m_inputSamplers = initInputSamplers(this->n, renderer, n.input, &n.descriptor());
 
   m_audioSamplers = initAudioTextures(renderer, n.m_audio_textures);
 
-  // Create the passes
+  m_initialized = true;
+}
 
-  for(Edge* edge : n.output[0]->edges)
+void SimpleRenderedVSANode::addOutputPass(
+    RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res)
+{
+  auto rt = renderer.renderTargetForOutput(edge);
+  if(rt.renderTarget)
   {
-    auto rt = renderer.renderTargetForOutput(*edge);
-    if(rt.renderTarget)
-    {
-      initPass(rt, renderer, *edge, res);
-    }
+    initPass(rt, renderer, edge, res);
+  }
+}
+
+void SimpleRenderedVSANode::removeOutputPass(RenderList& renderer, Edge& edge)
+{
+  auto it
+      = ossia::find_if(m_passes, [&](const auto& p) { return p.edge == &edge; });
+  if(it != m_passes.end())
+  {
+    it->main_pass.p.release();
+
+    if(it->main_pass.processUBO)
+      it->main_pass.processUBO->deleteLater();
+
+    it->background_pipeline->destroy();
+    it->background_pipeline->deleteLater();
+
+    it->background_srb->destroy();
+    it->background_srb->deleteLater();
+
+    it->background_ubo->destroy();
+    it->background_ubo->deleteLater();
+
+    m_passes.erase(it);
   }
 }
 
+bool SimpleRenderedVSANode::hasOutputPassForEdge(Edge& edge) const
+{
+  return ossia::find_if(m_passes, [&](const auto& p) { return p.edge == &edge; })
+         != m_passes.end();
+}
+
 void SimpleRenderedVSANode::update(
     RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge)
 {
@@ -247,6 +361,7 @@ void SimpleRenderedVSANode::update(
   }
 
   bool audioChanged = false;
+  std::size_t audio_idx = 0;
   for(auto& audio : n.m_audio_textures)
   {
     if(std::optional<Sampler> sampl
@@ -255,13 +370,30 @@ void SimpleRenderedVSANode::update(
       // Texture changed -> material changed
       audioChanged = true;
 
-      auto& [rhiSampler, tex] = *sampl;
+      auto& [rhiSampler, tex, fb_] = *sampl;
+      QRhiTexture* boundTex = tex ? tex : &renderer.emptyTexture();
+
+      // Keep m_audioSamplers[i].texture in sync with the live GPU texture.
+      // If a pass is later torn down and rebuilt (e.g. rt_changed path in
+      // RenderList::render calling removeOutputPass + addOutputPass),
+      // allSamplers() must hand buildPipeline the current texture so the
+      // fresh SRB is bound correctly. Without this sync the rebuilt SRB
+      // would bind &renderer.emptyTexture() (because m_audioSamplers had
+      // texture=nullptr from initAudioTextures) and no subsequent
+      // updateAudioTexture would ever re-trigger replaceTexture — the
+      // post-no-change path returns {} — so the shader would read zero
+      // for the rest of the session. Observed as 1×1 empty texture in
+      // RenderDoc after a viewport resize.
+      if(audio_idx < m_audioSamplers.size())
+        m_audioSamplers[audio_idx].texture = tex;
+
       for(auto& pass : m_passes)
       {
         score::gfx::replaceTexture(
-            *pass.main_pass.p.srb, rhiSampler, tex ? tex : &renderer.emptyTexture());
+            *pass.main_pass.p.srb, rhiSampler, boundTex);
       }
     }
+    ++audio_idx;
   }
 
   // Update material
@@ -270,6 +402,7 @@ void SimpleRenderedVSANode::update(
     char* data = n.m_material_data.get();
     res.updateDynamicBuffer(m_materialUBO, 0, m_materialSize, data);
   }
+  materialChanged = false;
 
   // Update all the process UBOs
   for(auto& pass : m_passes)
@@ -288,7 +421,15 @@ void SimpleRenderedVSANode::update(
 
 void SimpleRenderedVSANode::release(RenderList& r)
 {
-  // customRelease
+  releaseState(r);
+}
+
+void SimpleRenderedVSANode::releaseState(RenderList& r)
+{
+  if(!m_initialized)
+    return;
+
+  // Release all remaining passes
   {
     for(auto& texture : n.m_audio_textures)
     {
@@ -300,6 +441,8 @@ void SimpleRenderedVSANode::release(RenderList& r)
           if(tex != &r.emptyTexture())
             tex->deleteLater();
         }
+        it->second.texture = nullptr;
+        it->second = {};
       }
     }
 
@@ -326,13 +469,11 @@ void SimpleRenderedVSANode::release(RenderList& r)
   for(auto sampler : m_inputSamplers)
   {
     delete sampler.sampler;
-    // texture isdeleted elsewxheree
   }
   m_inputSamplers.clear();
   for(auto sampler : m_audioSamplers)
   {
     delete sampler.sampler;
-    // texture isdeleted elsewxheree
   }
   m_audioSamplers.clear();
 
@@ -341,6 +482,8 @@ void SimpleRenderedVSANode::release(RenderList& r)
 
   delete m_mesh;
   m_mesh = nullptr;
+
+  m_initialized = false;
 }
 
 void SimpleRenderedVSANode::runInitialPasses(
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedVSANode.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedVSANode.hpp
index 64607503fd..09c4dfc9ca 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedVSANode.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedVSANode.hpp
@@ -12,12 +12,19 @@ struct SimpleRenderedVSANode : score::gfx::NodeRenderer
 
   virtual ~SimpleRenderedVSANode();
 
-  void updateInputTexture(const Port& input, QRhiTexture* tex) override;
+  void updateInputTexture(const Port& input, QRhiTexture* tex, QRhiTexture* depthTex = nullptr) override;
 
   void init(RenderList& renderer, QRhiResourceUpdateBatch& res) override;
   void update(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) override;
   void release(RenderList& r) override;
 
+  void initState(RenderList& renderer, QRhiResourceUpdateBatch& res) override;
+  void releaseState(RenderList& renderer) override;
+  void addOutputPass(
+      RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) override;
+  void removeOutputPass(RenderList& renderer, Edge& edge) override;
+  bool hasOutputPassForEdge(Edge& edge) const override;
+
   void runInitialPasses(
       RenderList&, QRhiCommandBuffer& commands, QRhiResourceUpdateBatch*& res,
       Edge& edge) override;
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RhiBufferCopyMetal.mm b/src/plugins/score-plugin-gfx/Gfx/Graph/RhiBufferCopyMetal.mm
index 61e288d7cd..587089806d 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/RhiBufferCopyMetal.mm
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RhiBufferCopyMetal.mm
@@ -13,6 +13,20 @@
 namespace score::gfx
 {
 
+// Pre-condition: cb must NOT have an active render or compute pass.
+// Metal allows only one encoder open on a command buffer at a time; calling
+// [MTLCommandBuffer blitCommandEncoder] while a render or compute encoder is
+// still open will trigger a Metal internal assertion or silent misbehaviour.
+// Call this between cb.endPass() and the next cb.beginPass().
+//
+// Hazard tracking: Metal's default MTLHazardTrackingModeTracked automatically
+// inserts a dependency between this blit encoder and any subsequent encoder on
+// the same command buffer that accesses the same buffer. No explicit MTLFence
+// or MTLBarrier is required for tracked resources.
+//
+// Note: QRhi's own QRhiResourceUpdateBatch::copyBuffer enforces the
+// no-active-pass contract internally. This native-handle path bypasses that
+// check, so the caller is responsible for ensuring no encoder is open.
 void copyBufferMetal(
     QRhi& rhi, QRhiCommandBuffer& cb,
     QRhiBuffer* src, QRhiBuffer* dst, int size,
@@ -52,6 +66,54 @@ void copyBufferMetal(
   [blit endEncoding];
 }
 
+// Pre-condition: cb must NOT have an active render or compute pass.
+// Same contract as copyBufferMetal above: only one encoder may be open on a
+// MTLCommandBuffer at a time. Caller is responsible for ensuring no render or
+// compute encoder is currently open before calling this function.
+//
+// Metal's default hazard tracking inserts the required memory dependency
+// between this blit and subsequent encoders on the same command buffer that
+// read the destination buffer; no explicit fence is needed.
+void copyBufferRegionsMetal(
+    QRhi& rhi, QRhiCommandBuffer& cb,
+    QRhiBuffer* src, QRhiBuffer* dst,
+    const BufferCopyRegion* regions, int count)
+{
+  if(!src || !dst || !regions || count <= 0)
+    return;
+
+  const auto* handles
+      = static_cast<const QRhiMetalCommandBufferNativeHandles*>(cb.nativeHandles());
+  if(!handles || !handles->commandBuffer)
+    return;
+
+  auto srcNative = src->nativeBuffer();
+  auto dstNative = dst->nativeBuffer();
+  if(!srcNative.objects[0] || !dstNative.objects[0])
+    return;
+
+  id<MTLCommandBuffer> cmdBuf = (id<MTLCommandBuffer>)handles->commandBuffer;
+  void* const* srcSlot = static_cast<void* const*>(srcNative.objects[0]);
+  void* const* dstSlot = static_cast<void* const*>(dstNative.objects[0]);
+  id<MTLBuffer> srcBuf = (__bridge id<MTLBuffer>) (*srcSlot);
+  id<MTLBuffer> dstBuf = (__bridge id<MTLBuffer>) (*dstSlot);
+  if(!srcBuf || !dstBuf)
+    return;
+
+  // One blit encoder, N copyFromBuffer calls. Amortizes encoder
+  // creation/teardown and any implicit GPU state transitions.
+  id<MTLBlitCommandEncoder> blit = [cmdBuf blitCommandEncoder];
+  for(int i = 0; i < count; ++i)
+  {
+    [blit copyFromBuffer:srcBuf
+            sourceOffset:(NSUInteger)regions[i].src_offset
+                toBuffer:dstBuf
+       destinationOffset:(NSUInteger)regions[i].dst_offset
+                    size:(NSUInteger)regions[i].size];
+  }
+  [blit endEncoding];
+}
+
 }
 
 #else
@@ -64,6 +126,12 @@ void copyBufferMetal(
     QRhiBuffer*, QRhiBuffer*, int, int, int)
 {
 }
+void copyBufferRegionsMetal(
+    QRhi&, QRhiCommandBuffer&,
+    QRhiBuffer*, QRhiBuffer*,
+    const BufferCopyRegion*, int)
+{
+}
 }
 
 #endif
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RhiClearBuffer.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RhiClearBuffer.cpp
new file mode 100644
index 0000000000..dc9bb99129
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RhiClearBuffer.cpp
@@ -0,0 +1,261 @@
+#include <Gfx/Graph/RhiClearBuffer.hpp>
+
+#include <Gfx/Graph/RhiComputeBarrier.hpp>
+
+#include <QtGui/private/qrhi_p.h>
+
+// Vulkan
+#if QT_HAS_VULKAN || (QT_CONFIG(vulkan) && __has_include(<vulkan/vulkan.h>))
+#include <score/gfx/Vulkan.hpp>
+#if __has_include(<QtGui/rhi/qrhi_platform.h>)
+#include <QtGui/rhi/qrhi_platform.h>
+#else
+#include <QtGui/private/qrhivulkan_p.h>
+#endif
+#include <QVulkanInstance>
+#define SCORE_HAS_VULKAN 1
+#endif
+
+#include <algorithm>
+#include <cstring>
+#include <vector>
+
+// On non-Apple, provide a no-op stub for clearBufferMetal
+// (the real implementation lives in RhiClearBufferMetal.mm)
+#if !defined(Q_OS_MACOS) && !defined(Q_OS_IOS)
+namespace score::gfx
+{
+bool clearBufferMetal(
+    QRhi&, QRhiCommandBuffer&, QRhiBuffer*, quint32, quint32, quint32)
+{
+  return false;
+}
+}
+#endif
+
+namespace score::gfx
+{
+namespace
+{
+
+// Thread-local zero-buffer pool. Amortises the std::vector<char>(N, 0)
+// allocation across every clearBuffer call site — at steady state the
+// vector grows once to the max requested size and is reused for every
+// subsequent call, so the per-call cost is just a memset of the
+// requested range (already zero, so the access is touched-page free
+// for the prefix that survived the last clear).
+//
+// Pattern != 0 hits a side path that materialises the requested
+// 4-byte pattern into a separate vector. The default-pattern (0) path
+// is the one every current call site uses.
+const char* getZeroBuffer(quint32 size)
+{
+  thread_local std::vector<char> zero_pool;
+  if(zero_pool.size() < size)
+    zero_pool.assign(size, 0);
+  return zero_pool.data();
+}
+
+// Pattern path — used when pattern != 0. Replicates the 4-byte pattern
+// across the requested size. The buffer is sticky per-thread so a hot
+// pattern (e.g. 0xFFFFFFFF for "invalid slot" sentinels) reuses the
+// same memory. Switching patterns rewrites the buffer.
+const char* getPatternBuffer(quint32 size, quint32 pattern)
+{
+  thread_local std::vector<char> pattern_pool;
+  thread_local quint32 last_pattern = 0u;
+  thread_local quint32 last_filled = 0u;
+  const bool grow = pattern_pool.size() < size;
+  if(grow)
+    pattern_pool.resize(size);
+  if(grow || last_pattern != pattern || last_filled < size)
+  {
+    auto* p = pattern_pool.data();
+    const quint32 n = size / 4u;
+    for(quint32 i = 0; i < n; ++i)
+      std::memcpy(p + i * 4u, &pattern, 4u);
+    // Tail bytes (size not 4-aligned). vkCmdFillBuffer requires
+    // 4-aligned size so this only matters for the batch fallback.
+    const quint32 tail = size - n * 4u;
+    if(tail)
+      std::memcpy(p + n * 4u, &pattern, tail);
+    last_pattern = pattern;
+    last_filled = size;
+  }
+  return pattern_pool.data();
+}
+
+const char* getSourceBytes(quint32 size, quint32 pattern)
+{
+  return pattern == 0u ? getZeroBuffer(size) : getPatternBuffer(size, pattern);
+}
+
+// Route a clear into a QRhiResourceUpdateBatch the way QRhi expects:
+// uploadStaticBuffer for Static, updateDynamicBuffer for Dynamic UBOs
+// (chunked at 65535 bytes — QRhi's documented maximum per call for
+// the host-coherent path).
+void clearViaBatch(
+    QRhiResourceUpdateBatch& batch, QRhiBuffer* buf,
+    quint32 offset, quint32 size, quint32 pattern)
+{
+  if(!buf || size == 0)
+    return;
+  const char* src = getSourceBytes(size, pattern);
+  if(buf->type() == QRhiBuffer::Dynamic)
+  {
+    quint32 off = 0;
+    while(off < size)
+    {
+      const quint32 chunk = std::min<quint32>(size - off, 65535u);
+      batch.updateDynamicBuffer(buf, offset + off, chunk, src + off);
+      off += chunk;
+    }
+  }
+  else
+  {
+    batch.uploadStaticBuffer(buf, offset, size, src);
+  }
+}
+
+}  // namespace
+
+// Returns true on success (native path took it), false to request the
+// shared fallback. Backend-specific helper to keep clearBuffer() free
+// of forward-flow control hazards.
+static bool clearBufferNative(
+    QRhi& rhi,
+    QRhiCommandBuffer& cb,
+    QRhiBuffer* buf,
+    quint32 offset,
+    quint32 size,
+    quint32 pattern)
+{
+  switch(rhi.backend())
+  {
+#if SCORE_HAS_VULKAN
+    case QRhi::Vulkan: {
+      // vkCmdFillBuffer is only legal on buffers with
+      // VK_BUFFER_USAGE_TRANSFER_DST_BIT. QRhi's QVkBuffer::create adds
+      // that bit only for non-Dynamic buffers (see qrhivulkan.cpp ~line
+      // 7212). Dynamic UBOs would trip the validation layer if we
+      // called vkCmdFillBuffer on them — fall back to the deferred
+      // path. (In practice none of the current call sites pass a
+      // Dynamic buffer through the CB variant; this is defence in
+      // depth.)
+      if(buf->type() == QRhiBuffer::Dynamic)
+        return false;
+
+      auto* inst = score::gfx::staticVulkanInstance();
+      if(!inst)
+        return false;
+
+      auto fn = reinterpret_cast<PFN_vkCmdFillBuffer>(
+          inst->getInstanceProcAddr("vkCmdFillBuffer"));
+      if(!fn)
+        return false;
+
+      auto* native
+          = static_cast<const QRhiVulkanCommandBufferNativeHandles*>(cb.nativeHandles());
+      if(!native || !native->commandBuffer)
+        return false;
+
+      auto bufNative = buf->nativeBuffer();
+      if(!bufNative.objects[0])
+        return false;
+
+      // QRhi NativeBuffer convention (Vulkan): objects[i] is `VkBuffer *`,
+      // i.e. a POINTER TO the handle. Dereference to obtain the actual
+      // VkBuffer. See the long comment in RhiComputeBarrier.cpp's copyBuffer
+      // for the per-backend convention table.
+      VkBuffer vkbuf = *static_cast<const VkBuffer*>(bufNative.objects[0]);
+      if(vkbuf == VK_NULL_HANDLE)
+        return false;
+
+      cb.beginExternal();
+      // vkCmdFillBuffer bypasses QRhi's resource tracking, so we must emit the
+      // same compute→transfer→compute/vertex/indirect barriers the copyBuffer
+      // path uses. Without the pre-barrier a prior compute write may not be
+      // visible to the fill; without the post-barrier a subsequent draw/compute
+      // read may race the fill. beginBufferCopyBarrier/endBufferCopyBarrier are
+      // designed to run inside an existing beginExternal/endExternal bracket
+      // (they record vkCmdPipelineBarrier directly), which is exactly here.
+      beginBufferCopyBarrier(rhi, cb);
+      // vkCmdFillBuffer signature: (cb, buffer, offset, size, data).
+      // - offset and size MUST be multiples of 4. Caller is required to
+      //   honour this; we don't silently round here because doing so
+      //   would clear bytes the caller didn't request.
+      // - data is a uint32_t replicated across the range (exactly the
+      //   contract the abstraction exposes via @p pattern).
+      // - The buffer must NOT be in a render pass; this path is
+      //   intended for resource setup / runInitialPasses-style sites
+      //   that have a CB but no active pass.
+      fn(native->commandBuffer, vkbuf,
+         static_cast<VkDeviceSize>(offset),
+         static_cast<VkDeviceSize>(size),
+         pattern);
+      endBufferCopyBarrier(rhi, cb);
+      cb.endExternal();
+      return true;
+    }
+#endif
+
+    case QRhi::Metal:
+      return clearBufferMetal(rhi, cb, buf, offset, size, pattern);
+
+#if QT_VERSION >= QT_VERSION_CHECK(6, 6, 0)
+    case QRhi::D3D12:
+#endif
+    case QRhi::D3D11:
+    case QRhi::OpenGLES2:
+    default:
+      // No native fast path wired yet.
+      return false;
+  }
+}
+
+void RhiClearBuffer::clearBuffer(
+    QRhi& rhi,
+    QRhiCommandBuffer& cb,
+    QRhiBuffer* buf,
+    quint32 offset,
+    quint32 size,
+    quint32 pattern)
+{
+  if(!buf || size == 0)
+    return;
+
+  if(clearBufferNative(rhi, cb, buf, offset, size, pattern))
+    return;
+
+  // No native path available. Allocate a one-shot QRhiResourceUpdateBatch
+  // and submit it to the rhi via the standard route. We deliberately do
+  // NOT borrow the caller's batch here (the caller doesn't have one in
+  // scope by definition — they passed us a CB). The cost: one batch
+  // allocation + queue insertion. Still much cheaper than a per-call
+  // std::vector<char>(size, 0) allocation thanks to the zero pool.
+  if(auto* batch = rhi.nextResourceUpdateBatch())
+  {
+    clearViaBatch(*batch, buf, offset, size, pattern);
+    cb.resourceUpdate(batch);
+  }
+}
+
+void RhiClearBuffer::clearBuffer(
+    QRhi& rhi,
+    QRhiResourceUpdateBatch& batch,
+    QRhiBuffer* buf,
+    quint32 offset,
+    quint32 size,
+    quint32 pattern)
+{
+  // Backend is not relevant here — every backend's update batch is a
+  // straight CPU→GPU upload, so the only thing the abstraction buys us
+  // is the zero pool (eliminating the per-call vector allocation that
+  // motivated this whole exercise). A future revision could record a
+  // pending native fill and apply it in the next CB-recording op, but
+  // that's a deeper refactor than the current bug warrants.
+  (void)rhi;
+  clearViaBatch(batch, buf, offset, size, pattern);
+}
+
+}  // namespace score::gfx
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RhiClearBuffer.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RhiClearBuffer.hpp
new file mode 100644
index 0000000000..a3a56d6bf9
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RhiClearBuffer.hpp
@@ -0,0 +1,103 @@
+#pragma once
+#include <score_plugin_gfx_export.h>
+
+#include <QtCore/qglobal.h>
+
+class QRhi;
+class QRhiBuffer;
+class QRhiCommandBuffer;
+class QRhiResourceUpdateBatch;
+
+namespace score::gfx
+{
+
+/**
+ * @brief Fill (a sub-range of) a QRhiBuffer with a 4-byte pattern.
+ *
+ * Replaces the wasteful `std::vector<char> zeros(size, 0); res.uploadStaticBuffer(buf, 0, size, zeros.data());`
+ * idiom that pays a per-call zero-vector allocation + a CPU→GPU upload of
+ * zero bytes. The new entry points either issue a native GPU-side fill
+ * (vkCmdFillBuffer / MTLBlitCommandEncoder fillBuffer:range:value:) or
+ * route to QRhi's update batch with a thread-local zero-buffer pool so
+ * the zero source bytes are amortised across calls.
+ *
+ * The motivating bug: Vulkan does NOT initialise VkBuffer memory — the
+ * underlying device-memory page contains whatever was there before. For
+ * sparse-uploaded SSBOs (RawLight arena, world_transforms, per_draws past
+ * drawCount, …), the un-touched bytes get read by shaders and feed
+ * garbage into the pipeline. Manifests as "wildly different lighting per
+ * resize" because each fresh VkBuffer lands on a different page. The
+ * defensive zero-fill via uploadStaticBuffer ships zeros from CPU to GPU
+ * — correct but slow; this abstraction picks the right native path.
+ *
+ * Per-backend behaviour:
+ *  - Vulkan : vkCmdFillBuffer (CB variant) — Static buffers only, since
+ *             QRhi's setupBuffer adds VK_BUFFER_USAGE_TRANSFER_DST_BIT
+ *             only when m_type != Dynamic. Dynamic UBOs fall back to the
+ *             update batch path. (See qrhivulkan.cpp QVkBuffer::create.)
+ *  - Metal  : id<MTLBlitCommandEncoder> fillBuffer:range:value: (CB variant)
+ *  - D3D12  : currently falls back to the update batch (a future
+ *             optimisation can use ClearUnorderedAccessViewUint or a
+ *             thread-local zero-resource + CopyBufferRegion).
+ *  - D3D11  : fall back to the update batch.
+ *  - GL/GLES: fall back to the update batch (drivers commonly zero
+ *             initialised buffer memory anyway, and GL exposes
+ *             glClearBufferSubData on 4.3+ which we don't currently wire).
+ *
+ * Both variants accept an arbitrary 4-byte @p pattern (replicated across
+ * the requested range). Default is 0 — the only pattern any current call
+ * site uses. @p offset and @p size MUST be 4-byte aligned (Vulkan
+ * vkCmdFillBuffer requires it; the batch fallback is permissive but the
+ * abstraction enforces the strict contract for portability).
+ */
+namespace RhiClearBuffer
+{
+
+/// CB-recording variant. Uses native fast paths inside
+/// beginExternal()/endExternal() per QRhi convention. Falls back to
+/// recording a host-side memset uploaded via a temporary update batch
+/// when no native path is available — but the batch variant is the
+/// preferred entry point for sites that aren't already inside a render
+/// pass and have only a QRhiResourceUpdateBatch in scope.
+SCORE_PLUGIN_GFX_EXPORT
+void clearBuffer(
+    QRhi& rhi,
+    QRhiCommandBuffer& cb,
+    QRhiBuffer* buf,
+    quint32 offset,
+    quint32 size,
+    quint32 pattern = 0u);
+
+/// Update-batch variant. Routes to QRhi's uploadStaticBuffer (Static
+/// buffers) or updateDynamicBuffer (Dynamic UBOs) using a thread-local
+/// zero-buffer pool — no per-call zero-vector allocation. This is the
+/// drop-in replacement for the existing
+/// `std::vector<char> zeros(size, 0); batch.uploadStaticBuffer(...)`
+/// pattern.
+///
+/// @p pattern other than 0 will allocate a small thread-local pattern
+/// buffer for the call (uncommon path); 0 hits the fast pool.
+SCORE_PLUGIN_GFX_EXPORT
+void clearBuffer(
+    QRhi& rhi,
+    QRhiResourceUpdateBatch& batch,
+    QRhiBuffer* buf,
+    quint32 offset,
+    quint32 size,
+    quint32 pattern = 0u);
+
+}  // namespace RhiClearBuffer
+
+// Metal-specific implementation hook (lives in RhiClearBufferMetal.mm).
+// On non-Apple platforms a no-op stub is provided in RhiClearBuffer.cpp.
+// Returns true on success, false if the native path is unavailable
+// (caller should fall back to the batch variant).
+bool clearBufferMetal(
+    QRhi& rhi,
+    QRhiCommandBuffer& cb,
+    QRhiBuffer* buf,
+    quint32 offset,
+    quint32 size,
+    quint32 pattern);
+
+}  // namespace score::gfx
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RhiClearBufferMetal.mm b/src/plugins/score-plugin-gfx/Gfx/Graph/RhiClearBufferMetal.mm
new file mode 100644
index 0000000000..05c44b5eb9
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RhiClearBufferMetal.mm
@@ -0,0 +1,87 @@
+#include <Gfx/Graph/RhiClearBuffer.hpp>
+
+#include <QtGui/private/qrhi_p.h>
+
+#if __has_include(<Metal/Metal.h>)
+#include <Metal/Metal.h>
+#if __has_include(<QtGui/rhi/qrhi_platform.h>)
+#include <QtGui/rhi/qrhi_platform.h>
+#else
+#include <QtGui/private/qrhimetal_p.h>
+#endif
+
+namespace score::gfx
+{
+
+// Pre-condition: cb must NOT have an active render or compute pass —
+// same contract as copyBufferMetal in RhiBufferCopyMetal.mm. Metal allows
+// only one encoder open on a command buffer at a time; opening a blit
+// encoder while a render/compute encoder is live triggers an internal
+// assertion or silent misbehaviour.
+//
+// Hazard tracking: the default MTLHazardTrackingModeTracked inserts a
+// dependency between this blit encoder and any subsequent encoder on
+// the same command buffer that touches the same buffer, so no explicit
+// MTLFence / MTLBarrier is needed.
+//
+// fillBuffer:range:value: takes a single byte value (uint8_t), not a
+// 4-byte word. We map 4-byte patterns to a Metal fill ONLY when all
+// four bytes are equal — the common case (pattern == 0 or pattern ==
+// 0xFFFFFFFF). For arbitrary patterns Metal would need a manual
+// stage-via-MTLBuffer + copyFromBuffer; we return false and let the
+// caller fall back to QRhi's update batch, which is the right vehicle
+// for general-purpose host writes anyway.
+bool clearBufferMetal(
+    QRhi& rhi,
+    QRhiCommandBuffer& cb,
+    QRhiBuffer* buf,
+    quint32 offset,
+    quint32 size,
+    quint32 pattern)
+{
+  (void)rhi;
+  if(!buf || size == 0)
+    return false;
+
+  const uint8_t b0 = static_cast<uint8_t>(pattern & 0xFFu);
+  const uint8_t b1 = static_cast<uint8_t>((pattern >> 8) & 0xFFu);
+  const uint8_t b2 = static_cast<uint8_t>((pattern >> 16) & 0xFFu);
+  const uint8_t b3 = static_cast<uint8_t>((pattern >> 24) & 0xFFu);
+  // fillBuffer: takes a single uint8_t. Refuse non-uniform-byte patterns.
+  if(b0 != b1 || b0 != b2 || b0 != b3)
+    return false;
+
+  const auto* handles
+      = static_cast<const QRhiMetalCommandBufferNativeHandles*>(cb.nativeHandles());
+  if(!handles || !handles->commandBuffer)
+    return false;
+
+  auto bufNative = buf->nativeBuffer();
+  if(!bufNative.objects[0])
+    return false;
+
+  id<MTLCommandBuffer> cmdBuf = (id<MTLCommandBuffer>)handles->commandBuffer;
+  // QRhi NativeBuffer convention (Metal): objects[i] is `id<MTLBuffer> *`,
+  // i.e. a POINTER TO the handle. Dereference once to obtain the handle.
+  // For Dynamic buffers QRhi presents N slots; the CB variant doesn't
+  // currently target Dynamic buffers (they fall back to the batch path)
+  // but if it ever does we'd want to clear all slots — same as Vulkan's
+  // Dynamic guard in RhiClearBuffer.cpp.
+  void* const* slot = static_cast<void* const*>(bufNative.objects[0]);
+  id<MTLBuffer> mtlBuf = (__bridge id<MTLBuffer>)(*slot);
+  if(!mtlBuf)
+    return false;
+
+  cb.beginExternal();
+  id<MTLBlitCommandEncoder> blit = [cmdBuf blitCommandEncoder];
+  [blit fillBuffer:mtlBuf
+             range:NSMakeRange((NSUInteger)offset, (NSUInteger)size)
+             value:b0];
+  [blit endEncoding];
+  cb.endExternal();
+  return true;
+}
+
+}  // namespace score::gfx
+
+#endif
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RhiComputeBarrier.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RhiComputeBarrier.cpp
index 45fca44847..2b21a65f60 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/RhiComputeBarrier.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RhiComputeBarrier.cpp
@@ -14,6 +14,11 @@ void copyBufferMetal(
     QRhi&, QRhiCommandBuffer&, QRhiBuffer*, QRhiBuffer*, int, int, int)
 {
 }
+void copyBufferRegionsMetal(
+    QRhi&, QRhiCommandBuffer&, QRhiBuffer*, QRhiBuffer*,
+    const BufferCopyRegion*, int)
+{
+}
 }
 #endif
 
@@ -54,8 +59,11 @@ void copyBufferMetal(
 
 // D3D12 / D3D11
 #if defined(Q_OS_WIN)
+// clang-format off
+#include <windows.h>
 #include <d3d11.h>
 #include <d3d12.h>
+// clang-format on
 #if __has_include(<QtGui/rhi/qrhi_platform.h>)
 #include <QtGui/rhi/qrhi_platform.h>
 #endif
@@ -111,7 +119,11 @@ void insertComputeBarrier(QRhi& rhi, QRhiCommandBuffer& cb)
     }
 #endif
 
-#if SCORE_HAS_D3D
+// The QRhi::D3D12 enum value and QRhiD3D12CommandBufferNativeHandles (declared
+// in qrhi_platform.h) only exist from Qt 6.6 onward — guard the whole case so
+// it doesn't break the Win build on Qt < 6.6. (RhiClearBuffer.cpp guards its
+// D3D12 case the same way.)
+#if SCORE_HAS_D3D && QT_VERSION >= QT_VERSION_CHECK(6, 6, 0)
     case QRhi::D3D12: {
       auto* native
           = static_cast<const QRhiD3D12CommandBufferNativeHandles*>(cb.nativeHandles());
@@ -142,14 +154,97 @@ void insertComputeBarrier(QRhi& rhi, QRhiCommandBuffer& cb)
   }
 }
 
+void beginBufferCopyBarrier(QRhi& rhi, QRhiCommandBuffer& cb)
+{
+  switch(rhi.backend())
+  {
+#if SCORE_HAS_VULKAN
+    case QRhi::Vulkan: {
+      auto* inst = score::gfx::staticVulkanInstance();
+      if(!inst)
+        break;
+      auto barrierFn = reinterpret_cast<PFN_vkCmdPipelineBarrier>(
+          inst->getInstanceProcAddr("vkCmdPipelineBarrier"));
+      if(!barrierFn)
+        break;
+      auto* native
+          = static_cast<const QRhiVulkanCommandBufferNativeHandles*>(cb.nativeHandles());
+      if(!native || !native->commandBuffer)
+        break;
+      VkMemoryBarrier pre{};
+      pre.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
+      pre.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
+      pre.dstAccessMask
+          = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
+      barrierFn(native->commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
+                VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1, &pre, 0, nullptr, 0, nullptr);
+      break;
+    }
+#endif
+    default:
+      // D3D11, D3D12, OpenGL, Metal: no explicit pre-barrier needed or
+      // handled by the backend when the encoder transitions.
+      break;
+  }
+}
+
+void endBufferCopyBarrier(QRhi& rhi, QRhiCommandBuffer& cb)
+{
+  switch(rhi.backend())
+  {
+#if SCORE_HAS_VULKAN
+    case QRhi::Vulkan: {
+      auto* inst = score::gfx::staticVulkanInstance();
+      if(!inst)
+        break;
+      auto barrierFn = reinterpret_cast<PFN_vkCmdPipelineBarrier>(
+          inst->getInstanceProcAddr("vkCmdPipelineBarrier"));
+      if(!barrierFn)
+        break;
+      auto* native
+          = static_cast<const QRhiVulkanCommandBufferNativeHandles*>(cb.nativeHandles());
+      if(!native || !native->commandBuffer)
+        break;
+      VkMemoryBarrier post{};
+      post.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
+      post.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+      post.dstAccessMask
+          = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT
+            | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT
+            | VK_ACCESS_INDEX_READ_BIT
+            | VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
+      barrierFn(native->commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
+                VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT
+                    | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT
+                    | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT,
+                0, 1, &post, 0, nullptr, 0, nullptr);
+      break;
+    }
+#endif
+    default:
+      break;
+  }
+}
+
 void copyBuffer(
     QRhi& rhi, QRhiCommandBuffer& cb,
     QRhiBuffer* src, QRhiBuffer* dst, int size,
-    int srcOffset, int dstOffset)
+    int srcOffset, int dstOffset,
+    BufferCopyBarrier barrier)
 {
   if(!src || !dst || size <= 0 || srcOffset < 0 || dstOffset < 0)
     return;
 
+  // Dynamic buffers rotate over 2-3 backing slots per frame, but every
+  // backend's nativeBuffer().objects[0] only exposes slot 0 — copying that
+  // slot would hit a stale/wrong frame's data. The compute/MDI callers of
+  // these helpers all use Static/Immutable storage buffers; bail on Dynamic
+  // as defence-in-depth, matching clearBufferNative()'s Dynamic bail.
+  if(src->type() == QRhiBuffer::Dynamic || dst->type() == QRhiBuffer::Dynamic)
+    return;
+
+  const bool emit_barriers = (barrier == BufferCopyBarrier::Auto);
+
   switch(rhi.backend())
   {
 #if SCORE_HAS_VULKAN
@@ -185,10 +280,11 @@ void copyBuffer(
       if(srcBuf == VK_NULL_HANDLE || dstBuf == VK_NULL_HANDLE)
         break;
 
-      // Barrier: compute write → transfer read/write
+      // Barrier: compute write → transfer read/write. Skipped when the
+      // caller batches multiple copies inside explicit begin/endBufferCopyBarrier.
       auto barrierFn = reinterpret_cast<PFN_vkCmdPipelineBarrier>(
           inst->getInstanceProcAddr("vkCmdPipelineBarrier"));
-      if(barrierFn)
+      if(emit_barriers && barrierFn)
       {
         VkMemoryBarrier pre{};
         pre.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
@@ -205,15 +301,22 @@ void copyBuffer(
 
       fn(native->commandBuffer, srcBuf, dstBuf, 1, &region);
 
-      // Barrier: transfer write → compute read
-      if(barrierFn)
+      // Barrier: transfer write → compute/vertex read
+      if(emit_barriers && barrierFn)
       {
         VkMemoryBarrier post{};
         post.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
         post.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
-        post.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
+        post.dstAccessMask
+            = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT
+              | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT
+              | VK_ACCESS_INDEX_READ_BIT
+              | VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
         barrierFn(native->commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
-            VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 1, &post, 0, nullptr, 0, nullptr);
+            VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT
+                | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT
+                | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT,
+            0, 1, &post, 0, nullptr, 0, nullptr);
       }
       break;
     }
@@ -255,7 +358,7 @@ void copyBuffer(
     }
 #endif
 
-#if SCORE_HAS_D3D
+#if SCORE_HAS_D3D && QT_VERSION >= QT_VERSION_CHECK(6, 6, 0)
     case QRhi::D3D12: {
       auto* native
           = static_cast<const QRhiD3D12CommandBufferNativeHandles*>(cb.nativeHandles());
@@ -269,22 +372,77 @@ void copyBuffer(
       if(!srcNative.objects[0] || !dstNative.objects[0])
         break;
 
-      // objects[0] is an `ID3D12Resource * *`, i.e. a pointer to the
-      // resource pointer slot. Same convention as Vulkan -- see the long
-      // comment in the Vulkan branch above.
-      auto* srcRes
-          = *static_cast<ID3D12Resource* const*>(srcNative.objects[0]);
-      auto* dstRes
-          = *static_cast<ID3D12Resource* const*>(dstNative.objects[0]);
+      // D3D12 is the ODD ONE OUT in QRhi: unlike Vulkan/Metal/D3D11/GL
+      // which store `&native_handle` (one extra indirection), the D3D12
+      // backend stores `res->resource` directly — i.e.
+      // `objects[0]` IS the `ID3D12Resource *`, NOT a pointer to it. See
+      // QD3D12Buffer::nativeBuffer in qrhid3d12.cpp:
+      //     b.objects[0] = res->resource;   // ID3D12Resource *
+      // vs. Vulkan/Metal:
+      //     b.objects[i] = &buffers[i];     // VkBuffer * / id<MTLBuffer> *
+      // vs. D3D11:
+      //     return { { &buffer }, 1 };      // ID3D11Buffer * *
+      // Dereferencing here as `**` would treat the COM vtable pointer as
+      // an `ID3D12Resource *` and hand garbage to CopyBufferRegion, which
+      // the D3D12 debug layer flags as
+      // "CORRUPTION: First parameter is corrupt — CORRUPTED_PARAMETER1".
+      // const_cast: NativeBuffer::objects is `const void *` (Qt's const-
+      // correct getter signal that the *array* is const for inspection),
+      // but CopyBufferRegion needs a non-const ID3D12Resource* — and the
+      // underlying resource is genuinely mutable (it is the GPU buffer
+      // we are about to write to).
+      auto* srcRes = static_cast<ID3D12Resource*>(
+          const_cast<void*>(srcNative.objects[0]));
+      auto* dstRes = static_cast<ID3D12Resource*>(
+          const_cast<void*>(dstNative.objects[0]));
       if(!srcRes || !dstRes)
         break;
 
+      // D3D12 has explicit resource states (unlike Vulkan's access masks the
+      // backend handles for tracked resources). The buffers are written by a
+      // compute pass as UAVs, so transition src→COPY_SOURCE and dst→COPY_DEST
+      // before CopyBufferRegion, then back to UNORDERED_ACCESS so subsequent
+      // compute/draw reads see the data. Mirrors the Vulkan compute→transfer→
+      // compute barrier intent and is gated on emit_barriers the same way.
+      const auto transition
+          = [cmdList](
+                ID3D12Resource* res, D3D12_RESOURCE_STATES before,
+                D3D12_RESOURCE_STATES after) {
+        D3D12_RESOURCE_BARRIER b{};
+        b.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+        b.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
+        b.Transition.pResource = res;
+        b.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+        b.Transition.StateBefore = before;
+        b.Transition.StateAfter = after;
+        cmdList->ResourceBarrier(1, &b);
+      };
+      if(emit_barriers)
+      {
+        transition(
+            srcRes, D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
+            D3D12_RESOURCE_STATE_COPY_SOURCE);
+        transition(
+            dstRes, D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
+            D3D12_RESOURCE_STATE_COPY_DEST);
+      }
+
       cmdList->CopyBufferRegion(
           dstRes,
           static_cast<UINT64>(dstOffset),
           srcRes,
           static_cast<UINT64>(srcOffset),
           static_cast<UINT64>(size));
+
+      if(emit_barriers)
+      {
+        transition(
+            srcRes, D3D12_RESOURCE_STATE_COPY_SOURCE,
+            D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
+        transition(
+            dstRes, D3D12_RESOURCE_STATE_COPY_DEST,
+            D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
+      }
       break;
     }
 #endif
@@ -334,4 +492,243 @@ void copyBuffer(
   }
 }
 
+void copyBufferRegions(
+    QRhi& rhi, QRhiCommandBuffer& cb,
+    QRhiBuffer* src, QRhiBuffer* dst,
+    const BufferCopyRegion* regions, int count,
+    BufferCopyBarrier barrier)
+{
+  if(!src || !dst || !regions || count <= 0)
+    return;
+
+  // See copyBuffer(): Dynamic buffers expose only slot 0 via objects[0], so a
+  // native copy would read/write the wrong frame slot. Bail like
+  // clearBufferNative() does.
+  if(src->type() == QRhiBuffer::Dynamic || dst->type() == QRhiBuffer::Dynamic)
+    return;
+
+  const bool emit_barriers = (barrier == BufferCopyBarrier::Auto);
+
+  switch(rhi.backend())
+  {
+#if SCORE_HAS_VULKAN
+    case QRhi::Vulkan: {
+      auto* inst = score::gfx::staticVulkanInstance();
+      if(!inst)
+        break;
+      auto fn = reinterpret_cast<PFN_vkCmdCopyBuffer>(
+          inst->getInstanceProcAddr("vkCmdCopyBuffer"));
+      if(!fn)
+        break;
+      auto* native
+          = static_cast<const QRhiVulkanCommandBufferNativeHandles*>(cb.nativeHandles());
+      if(!native || !native->commandBuffer)
+        break;
+
+      auto srcNative = src->nativeBuffer();
+      auto dstNative = dst->nativeBuffer();
+      if(!srcNative.objects[0] || !dstNative.objects[0])
+        break;
+      VkBuffer srcBuf = *static_cast<const VkBuffer*>(srcNative.objects[0]);
+      VkBuffer dstBuf = *static_cast<const VkBuffer*>(dstNative.objects[0]);
+      if(srcBuf == VK_NULL_HANDLE || dstBuf == VK_NULL_HANDLE)
+        break;
+
+      auto barrierFn = reinterpret_cast<PFN_vkCmdPipelineBarrier>(
+          inst->getInstanceProcAddr("vkCmdPipelineBarrier"));
+      if(emit_barriers && barrierFn)
+      {
+        VkMemoryBarrier pre{};
+        pre.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
+        pre.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
+        pre.dstAccessMask
+            = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
+        barrierFn(native->commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
+                  VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1, &pre, 0, nullptr, 0, nullptr);
+      }
+
+      // Build region array once and issue a single vkCmdCopyBuffer.
+      // Small-stack path for the common ≤1024 vertex case; heap fallback
+      // for larger point clouds.
+      constexpr int kStackMax = 1024;
+      VkBufferCopy stack_regions[kStackMax];
+      std::vector<VkBufferCopy> heap_regions;
+      VkBufferCopy* vk_regions;
+      if(count <= kStackMax)
+      {
+        vk_regions = stack_regions;
+      }
+      else
+      {
+        heap_regions.resize(count);
+        vk_regions = heap_regions.data();
+      }
+      for(int i = 0; i < count; ++i)
+      {
+        vk_regions[i].srcOffset = static_cast<VkDeviceSize>(regions[i].src_offset);
+        vk_regions[i].dstOffset = static_cast<VkDeviceSize>(regions[i].dst_offset);
+        vk_regions[i].size = static_cast<VkDeviceSize>(regions[i].size);
+      }
+      fn(native->commandBuffer, srcBuf, dstBuf, (uint32_t)count, vk_regions);
+
+      if(emit_barriers && barrierFn)
+      {
+        VkMemoryBarrier post{};
+        post.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
+        post.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+        post.dstAccessMask
+            = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT
+              | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT
+              | VK_ACCESS_INDEX_READ_BIT
+              | VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
+        barrierFn(native->commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
+                  VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT
+                      | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT
+                      | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT,
+                  0, 1, &post, 0, nullptr, 0, nullptr);
+      }
+      break;
+    }
+#endif
+
+#if SCORE_HAS_GL
+    case QRhi::OpenGLES2: {
+      auto* native = static_cast<const QRhiGles2NativeHandles*>(rhi.nativeHandles());
+      if(!native || !native->context)
+        break;
+      auto* f = native->context->extraFunctions();
+      if(!f)
+        break;
+      auto srcNative = src->nativeBuffer();
+      auto dstNative = dst->nativeBuffer();
+      if(!srcNative.objects[0] || !dstNative.objects[0])
+        break;
+      GLuint srcId = *static_cast<const GLuint*>(srcNative.objects[0]);
+      GLuint dstId = *static_cast<const GLuint*>(dstNative.objects[0]);
+      if(srcId == 0 || dstId == 0)
+        break;
+      auto* gl = native->context->functions();
+      gl->glBindBuffer(GL_COPY_READ_BUFFER, srcId);
+      gl->glBindBuffer(GL_COPY_WRITE_BUFFER, dstId);
+      for(int i = 0; i < count; ++i)
+      {
+        f->glCopyBufferSubData(
+            GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER,
+            static_cast<GLintptr>(regions[i].src_offset),
+            static_cast<GLintptr>(regions[i].dst_offset),
+            static_cast<GLsizeiptr>(regions[i].size));
+      }
+      gl->glBindBuffer(GL_COPY_READ_BUFFER, 0);
+      gl->glBindBuffer(GL_COPY_WRITE_BUFFER, 0);
+      break;
+    }
+#endif
+
+#if SCORE_HAS_D3D && QT_VERSION >= QT_VERSION_CHECK(6, 6, 0)
+    case QRhi::D3D12: {
+      auto* native
+          = static_cast<const QRhiD3D12CommandBufferNativeHandles*>(cb.nativeHandles());
+      if(!native || !native->commandList)
+        break;
+      auto* cmdList = static_cast<ID3D12GraphicsCommandList*>(native->commandList);
+      auto srcNative = src->nativeBuffer();
+      auto dstNative = dst->nativeBuffer();
+      if(!srcNative.objects[0] || !dstNative.objects[0])
+        break;
+      // D3D12 stores the raw ID3D12Resource* directly (no extra
+      // indirection). See the long comment in copyBuffer's D3D12 branch
+      // above for the Qt-source-level details.
+      auto* srcRes = static_cast<ID3D12Resource*>(
+          const_cast<void*>(srcNative.objects[0]));
+      auto* dstRes = static_cast<ID3D12Resource*>(
+          const_cast<void*>(dstNative.objects[0]));
+      if(!srcRes || !dstRes)
+        break;
+
+      // UAV(compute-write) → COPY_SOURCE/COPY_DEST around the copies, then
+      // back to UAV. One transition pair brackets all regions (same src/dst).
+      // See the matching comment in copyBuffer's D3D12 branch.
+      const auto transition
+          = [cmdList](
+                ID3D12Resource* res, D3D12_RESOURCE_STATES before,
+                D3D12_RESOURCE_STATES after) {
+        D3D12_RESOURCE_BARRIER b{};
+        b.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+        b.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
+        b.Transition.pResource = res;
+        b.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+        b.Transition.StateBefore = before;
+        b.Transition.StateAfter = after;
+        cmdList->ResourceBarrier(1, &b);
+      };
+      if(emit_barriers)
+      {
+        transition(
+            srcRes, D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
+            D3D12_RESOURCE_STATE_COPY_SOURCE);
+        transition(
+            dstRes, D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
+            D3D12_RESOURCE_STATE_COPY_DEST);
+      }
+
+      for(int i = 0; i < count; ++i)
+      {
+        cmdList->CopyBufferRegion(
+            dstRes, static_cast<UINT64>(regions[i].dst_offset),
+            srcRes, static_cast<UINT64>(regions[i].src_offset),
+            static_cast<UINT64>(regions[i].size));
+      }
+
+      if(emit_barriers)
+      {
+        transition(
+            srcRes, D3D12_RESOURCE_STATE_COPY_SOURCE,
+            D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
+        transition(
+            dstRes, D3D12_RESOURCE_STATE_COPY_DEST,
+            D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
+      }
+      break;
+    }
+#endif
+
+    case QRhi::D3D11: {
+#if SCORE_HAS_D3D
+      auto* native = static_cast<const QRhiD3D11NativeHandles*>(rhi.nativeHandles());
+      if(!native || !native->context)
+        break;
+      auto srcNative = src->nativeBuffer();
+      auto dstNative = dst->nativeBuffer();
+      if(!srcNative.objects[0] || !dstNative.objects[0])
+        break;
+      auto* ctx = static_cast<ID3D11DeviceContext*>(native->context);
+      auto* srcBuf
+          = *static_cast<ID3D11Buffer* const*>(srcNative.objects[0]);
+      auto* dstBuf
+          = *static_cast<ID3D11Buffer* const*>(dstNative.objects[0]);
+      if(!srcBuf || !dstBuf)
+        break;
+      for(int i = 0; i < count; ++i)
+      {
+        D3D11_BOX box{};
+        box.left = static_cast<UINT>(regions[i].src_offset);
+        box.right = static_cast<UINT>(regions[i].src_offset + regions[i].size);
+        box.top = 0; box.bottom = 1; box.front = 0; box.back = 1;
+        ctx->CopySubresourceRegion(
+            dstBuf, 0, static_cast<UINT>(regions[i].dst_offset), 0, 0,
+            srcBuf, 0, &box);
+      }
+#endif
+      break;
+    }
+
+    case QRhi::Metal:
+      copyBufferRegionsMetal(rhi, cb, src, dst, regions, count);
+      break;
+
+    default:
+      break;
+  }
+}
+
 }
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RhiComputeBarrier.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RhiComputeBarrier.hpp
index f7e4b41a96..02cb4ac16d 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/RhiComputeBarrier.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RhiComputeBarrier.hpp
@@ -42,15 +42,76 @@ void insertComputeBarrier(QRhi& rhi, QRhiCommandBuffer& cb);
  *  - D3D11  : CopySubresourceRegion (offsets supported via D3D11_BOX)
  *  - Metal  : MTLBlitCommandEncoder copyFromBuffer
  */
+// Controls whether the copy helpers emit their own pre/post pipeline
+// barriers. Default: Auto (each call emits a compute→transfer +
+// transfer→compute pair). Use `None` when you are batching N calls
+// inside explicit beginBufferCopyBarrier / endBufferCopyBarrier brackets
+// to avoid N−1 redundant pipeline stalls.
+enum class BufferCopyBarrier
+{
+  Auto,
+  None
+};
+
+/// Emit the compute→transfer barrier that must precede a buffer copy
+/// consuming data written by a compute shader. Pair with
+/// endBufferCopyBarrier(). No-op on backends that handle the transition
+/// implicitly (D3D11, Metal).
+SCORE_PLUGIN_GFX_EXPORT
+void beginBufferCopyBarrier(QRhi& rhi, QRhiCommandBuffer& cb);
+
+/// Emit the transfer→compute barrier after a batch of buffer copies so
+/// downstream compute/graphics reads observe the writes.
+SCORE_PLUGIN_GFX_EXPORT
+void endBufferCopyBarrier(QRhi& rhi, QRhiCommandBuffer& cb);
+
 SCORE_PLUGIN_GFX_EXPORT
 void copyBuffer(
     QRhi& rhi, QRhiCommandBuffer& cb,
     QRhiBuffer* src, QRhiBuffer* dst, int size,
-    int srcOffset = 0, int dstOffset = 0);
+    int srcOffset = 0, int dstOffset = 0,
+    BufferCopyBarrier barrier = BufferCopyBarrier::Auto);
 
 // Metal-specific implementation (defined in RhiBufferCopyMetal.mm)
 void copyBufferMetal(
     QRhi& rhi, QRhiCommandBuffer& cb,
     QRhiBuffer* src, QRhiBuffer* dst, int size,
     int srcOffset = 0, int dstOffset = 0);
+
+/**
+ * @brief Region-based GPU buffer copy for strided / gather patterns.
+ *
+ * One src buffer → one dst buffer, with @p count distinct {srcOffset,
+ * dstOffset, size} regions. Emits ONE pre-barrier and ONE post-barrier
+ * for the whole batch on backends that need them (Vulkan), then issues
+ * the minimum native work:
+ *   - Vulkan : single vkCmdCopyBuffer call with `count` regions
+ *   - OpenGL : N glCopyBufferSubData (bindings reused)
+ *   - D3D12  : N CopyBufferRegion (no per-call barriers needed)
+ *   - D3D11  : N CopySubresourceRegion
+ *   - Metal  : N copyFromBuffer within one MTLBlitCommandEncoder
+ *
+ * Replaces what would otherwise be N copyBuffer() calls (each with its
+ * own barrier pair) for strided source layouts — the
+ * std430-vec3-padded-to-vec4 case in particular. Must be called inside
+ * beginExternal()/endExternal() like copyBuffer().
+ */
+struct BufferCopyRegion
+{
+  int src_offset{};
+  int dst_offset{};
+  int size{};
+};
+SCORE_PLUGIN_GFX_EXPORT
+void copyBufferRegions(
+    QRhi& rhi, QRhiCommandBuffer& cb,
+    QRhiBuffer* src, QRhiBuffer* dst,
+    const BufferCopyRegion* regions, int count,
+    BufferCopyBarrier barrier = BufferCopyBarrier::Auto);
+
+// Metal-specific implementation
+void copyBufferRegionsMetal(
+    QRhi& rhi, QRhiCommandBuffer& cb,
+    QRhiBuffer* src, QRhiBuffer* dst,
+    const BufferCopyRegion* regions, int count);
 }
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/SceneFilterNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/SceneFilterNode.cpp
new file mode 100644
index 0000000000..f31c806137
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/SceneFilterNode.cpp
@@ -0,0 +1,250 @@
+#include <Gfx/Graph/NodeRenderer.hpp>
+#include <Gfx/Graph/RenderList.hpp>
+#include <Gfx/Graph/SceneFilterNode.hpp>
+
+#include <ossia/network/value/value_conversion.hpp>
+
+#include <algorithm>
+
+namespace score::gfx
+{
+
+namespace
+{
+
+struct SceneFilterVisitor
+{
+  int mode{};
+
+  // Returns true if this payload should be kept in the output tree. When
+  // returning true, `out_children` may be populated with rewritten children
+  // (for scene_node subtrees that have been partially filtered).
+  bool filter_payload(
+      const ossia::scene_payload& in, ossia::scene_payload& out) const
+  {
+    if(auto* n = ossia::get_if<ossia::scene_node_ptr>(&in))
+    {
+      ossia::scene_node_ptr rewritten = rewrite_node(*n);
+      if(!rewritten)
+        return false;
+      out = rewritten;
+      return true;
+    }
+    // Non-node payloads: pass-through (lights, cameras, materials, meshes,
+    // transforms). Hierarchy filtering only drops scene_nodes; payloads
+    // carried as direct siblings of a kept node follow their parent.
+    out = in;
+    return true;
+  }
+
+  ossia::scene_node_ptr rewrite_node(const ossia::scene_node_ptr& src) const
+  {
+    if(!src)
+      return nullptr;
+
+    // Mode 1: drop invisible subtrees outright.
+    if(mode == 1 && !src->visible)
+      return nullptr;
+
+    // Recurse into children.
+    if(!src->has_children())
+    {
+      // Leaf node — keep as-is if it passed the visibility check above.
+      return src;
+    }
+
+    auto newChildren = std::make_shared<std::vector<ossia::scene_payload>>();
+    newChildren->reserve(src->children->size());
+    for(const auto& child : *src->children)
+    {
+      ossia::scene_payload out;
+      if(filter_payload(child, out))
+        newChildren->push_back(std::move(out));
+    }
+
+    // If nothing survived under this node, drop the node itself.
+    if(newChildren->empty())
+      return nullptr;
+
+    // Share-copy: if children were unchanged identity-wise, reuse src.
+    if(newChildren->size() == src->children->size())
+    {
+      bool identical = true;
+      for(std::size_t i = 0; i < newChildren->size(); ++i)
+      {
+        const auto& a = (*newChildren)[i];
+        const auto& b = (*src->children)[i];
+        if(a.index() != b.index())
+        {
+          identical = false;
+          break;
+        }
+        // scene_payload is a variant of shared_ptr-to-component types
+        // (plus scene_transform). For shared_ptr alternatives, identity
+        // is the correct check: a freshly-rewritten subtree returns a
+        // different shared_ptr than the original, while pass-through
+        // payloads keep the same pointer. scene_transform is always
+        // pass-through in filter_payload so equality of the variant
+        // index is sufficient — no transform value is mutated here.
+        const bool same = ossia::visit(
+            [&]<typename T>(const T& av) -> bool {
+              const auto* bv = ossia::get_if<T>(&b);
+              if(!bv)
+                return false;
+              if constexpr(requires { av.get() == bv->get(); })
+                return av.get() == bv->get();
+              else
+                return true; // scene_transform: pass-through, treat as same
+            },
+            a);
+        if(!same)
+        {
+          identical = false;
+          break;
+        }
+      }
+      if(identical)
+        return src;
+    }
+
+    auto copy = std::make_shared<ossia::scene_node>(*src);
+    copy->children = std::move(newChildren);
+    return copy;
+  }
+
+  ossia::scene_spec rewrite(const ossia::scene_spec& in) const
+  {
+    ossia::scene_spec out;
+    if(!in.state)
+      return out;
+
+    // Mode 0: pass-through, no copy needed.
+    if(mode == 0)
+      return in;
+
+    auto newState = std::make_shared<ossia::scene_state>(*in.state);
+    auto newRoots
+        = std::make_shared<std::vector<ossia::scene_node_ptr>>();
+    if(in.state->roots)
+    {
+      newRoots->reserve(in.state->roots->size());
+      for(const auto& r : *in.state->roots)
+      {
+        if(auto rw = rewrite_node(r))
+          newRoots->push_back(std::move(rw));
+      }
+    }
+    newState->roots = std::move(newRoots);
+    newState->version++;
+    newState->dirty_index++;
+
+    out.state = std::move(newState);
+    out.delta = in.delta;
+    return out;
+  }
+};
+
+}
+
+struct RenderedSceneFilterNode final : NodeRenderer
+{
+  const SceneFilterNode& m_node;
+  ossia::scene_spec m_outputScene;
+  const ossia::scene_state* m_cachedInputState{};
+  int64_t m_cachedInputVersion{-1};
+  int m_cachedMode{-1};
+
+  RenderedSceneFilterNode(const SceneFilterNode& n)
+      : NodeRenderer{n}
+      , m_node{n}
+  {
+  }
+
+  void init(RenderList&, QRhiResourceUpdateBatch&) override { m_initialized = true; }
+  void release(RenderList&) override
+  {
+    m_outputScene = {};
+    m_cachedInputState = nullptr;
+    m_cachedInputVersion = -1;
+    m_cachedMode = -1;
+    m_initialized = false;
+  }
+
+  void update(RenderList&, QRhiResourceUpdateBatch&, Edge*) override
+  {
+    const auto* inState = this->scene.state.get();
+    const int64_t inVersion = this->scene.state ? this->scene.state->version : -1;
+
+    bool rebuild = !m_outputScene.state
+                   || inState != m_cachedInputState
+                   || inVersion != m_cachedInputVersion
+                   || m_node.m_mode != m_cachedMode
+                   || this->sceneChanged;
+    if(!rebuild)
+      return;
+
+    SceneFilterVisitor vis{m_node.m_mode};
+    m_outputScene = vis.rewrite(this->scene);
+    m_cachedInputState = inState;
+    m_cachedInputVersion = inVersion;
+    m_cachedMode = m_node.m_mode;
+    this->sceneChanged = false;
+  }
+
+  void runInitialPasses(
+      RenderList& renderer, QRhiCommandBuffer&, QRhiResourceUpdateBatch*&,
+      Edge& edge) override
+  {
+    if(!m_outputScene.state)
+      return;
+    auto* sink = edge.sink;
+    if(!sink || !sink->node)
+      return;
+    auto rn_it = sink->node->renderedNodes.find(&renderer);
+    if(rn_it == sink->node->renderedNodes.end())
+      return;
+    auto it = std::find(sink->node->input.begin(), sink->node->input.end(), sink);
+    if(it == sink->node->input.end())
+      return;
+    int port_idx = (int)(it - sink->node->input.begin());
+    rn_it->second->process(port_idx, m_outputScene, edge.source);
+  }
+
+  void runRenderPass(RenderList&, QRhiCommandBuffer&, Edge&) override { }
+
+  // Data-only renderer — no per-edge GPU pass state to release.
+  void removeOutputPass(RenderList&, Edge&) override { }
+};
+
+SceneFilterNode::SceneFilterNode()
+{
+  input.push_back(new Port{this, {}, Types::Scene, {}});
+  {
+    auto* data = new int{0};
+    input.push_back(new Port{this, data, Types::Int, {}});
+  }
+  output.push_back(new Port{this, {}, Types::Scene, {}});
+}
+
+SceneFilterNode::~SceneFilterNode() = default;
+
+void SceneFilterNode::process(int32_t port, const ossia::value& v)
+{
+  switch(port)
+  {
+    case 1:
+      m_mode = ossia::convert<int>(v);
+      materialChange();
+      break;
+    default:
+      ProcessNode::process(port, v);
+      break;
+  }
+}
+
+NodeRenderer* SceneFilterNode::createRenderer(RenderList&) const noexcept
+{
+  return new RenderedSceneFilterNode{*this};
+}
+
+}
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/SceneFilterNode.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/SceneFilterNode.hpp
new file mode 100644
index 0000000000..c1402e0e4a
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/SceneFilterNode.hpp
@@ -0,0 +1,40 @@
+#pragma once
+#include <Gfx/Graph/Node.hpp>
+
+namespace score::gfx
+{
+
+/**
+ * @brief Tree-level filter on a scene_spec.
+ *
+ * Walks the incoming scene hierarchy and rebuilds it with only the
+ * subtrees matching the predicate. Runs on the render thread but does
+ * exclusively CPU work — no GPU allocation; shared_ptr reuse keeps cost
+ * minimal when the scene is unchanged.
+ *
+ * Inputs:
+ *   - Port 0: Scene (Types::Scene)
+ *   - Port 1: Mode (Types::Int):
+ *        0 = pass-through (no filtering)
+ *        1 = keep only scene_nodes with visible == true
+ *        2 = keep only subtrees whose node name contains the substring set
+ *            in the "Name" control (future-wired; string port missing in the
+ *            renderer for now, so behaves like mode 1 until wired)
+ *
+ * Outputs:
+ *   - Port 0: Scene (Types::Scene)
+ */
+class SCORE_PLUGIN_GFX_EXPORT SceneFilterNode : public ProcessNode
+{
+public:
+  SceneFilterNode();
+  ~SceneFilterNode() override;
+
+  score::gfx::NodeRenderer* createRenderer(RenderList& r) const noexcept override;
+
+  void process(int32_t port, const ossia::value& v) override;
+
+  int m_mode{0};
+};
+
+}
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/SceneGPUState.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/SceneGPUState.cpp
new file mode 100644
index 0000000000..ce68981455
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/SceneGPUState.cpp
@@ -0,0 +1,1012 @@
+#include <Gfx/Graph/SceneGPUState.hpp>
+
+#include <ossia/detail/hash_map.hpp>
+#include <ossia/detail/ptr_set.hpp>
+
+#include <QDebug>
+#include <QQuaternion>
+
+#include <array>
+#include <cmath>
+#include <cstdint>
+#include <cstring>
+#include <functional>
+
+namespace score::gfx
+{
+
+static QMatrix4x4 toQMatrix(const ossia::transform3d& t)
+{
+  // ossia::transform3d::matrix stores column-major data.
+  // QMatrix4x4(values, cols, rows) with cols=4, rows=4 reads column-major.
+  return QMatrix4x4(t.matrix, 4, 4);
+}
+
+static QMatrix4x4 toQMatrix(const ossia::scene_transform& t)
+{
+  QMatrix4x4 mat;
+  mat.translate(t.translation[0], t.translation[1], t.translation[2]);
+  mat.rotate(QQuaternion(t.rotation[3], t.rotation[0], t.rotation[1], t.rotation[2]));
+  mat.scale(t.scale[0], t.scale[1], t.scale[2]);
+  return mat;
+}
+
+// packLight removed in task 28c. The Light producer owns a RawLight
+// arena slot and writes RawLightData directly in its own update() hook
+// (see Threedim/Light.cpp); the preprocessor no longer CPU-composes
+// world-space light bytes. Consumer shaders compose direction / position
+// on the fly from world_transforms[RawLight.transform_slot].
+
+// ---- mesh_primitive → ossia::geometry ------------------------------------
+//
+// Builds a transient `ossia::geometry` on the heap that wraps a
+// `mesh_primitive`'s buffers and attribute layout. The downstream
+// preprocessor copies those handles into its own output, so the converted
+// geometry only needs to survive the current flatten pass. CPU-backed
+// `buffer_data` flows
+// through as `cpu_buffer` (the rendering layer handles the upload); GPU
+// handles flow through as `gpu_buffer`.
+
+static decltype(ossia::geometry::attribute::format)
+toGeomAttrFormat(ossia::vertex_format f) noexcept
+{
+  using V = ossia::vertex_format;
+  using A = decltype(ossia::geometry::attribute::format);
+  switch(f)
+  {
+    case V::float1:      return ossia::geometry::attribute::float1;
+    case V::float2:      return ossia::geometry::attribute::float2;
+    case V::float3:      return ossia::geometry::attribute::float3;
+    case V::float4:      return ossia::geometry::attribute::float4;
+    case V::half1:       return ossia::geometry::attribute::half1;
+    case V::half2:       return ossia::geometry::attribute::half2;
+    case V::half3:       return ossia::geometry::attribute::half3;
+    case V::half4:       return ossia::geometry::attribute::half4;
+    case V::unorm8x1:    return ossia::geometry::attribute::unormbyte1;
+    case V::unorm8x2:    return ossia::geometry::attribute::unormbyte2;
+    case V::unorm8x4:    return ossia::geometry::attribute::unormbyte4;
+    case V::uint16x1:    return ossia::geometry::attribute::ushort1;
+    case V::uint16x2:    return ossia::geometry::attribute::ushort2;
+    case V::uint16x4:    return ossia::geometry::attribute::ushort4;
+    case V::sint16x1:    return ossia::geometry::attribute::sshort1;
+    case V::sint16x2:    return ossia::geometry::attribute::sshort2;
+    case V::sint16x4:    return ossia::geometry::attribute::sshort4;
+    case V::uint32x1:    return ossia::geometry::attribute::uint1;
+    case V::uint32x2:    return ossia::geometry::attribute::uint2;
+    case V::uint32x3:    return ossia::geometry::attribute::uint3;
+    case V::uint32x4:    return ossia::geometry::attribute::uint4;
+    case V::sint32x1:    return ossia::geometry::attribute::sint1;
+    case V::sint32x2:    return ossia::geometry::attribute::sint2;
+    case V::sint32x3:    return ossia::geometry::attribute::sint3;
+    case V::sint32x4:    return ossia::geometry::attribute::sint4;
+    default:             return ossia::geometry::attribute::float3;
+  }
+}
+
+static auto toGeomTopology(ossia::primitive_topology t) noexcept
+{
+  using P = ossia::primitive_topology;
+  using G = decltype(ossia::geometry::topology);
+  switch(t)
+  {
+    case P::points:         return G::points;
+    case P::lines:          return G::lines;
+    case P::line_strip:     return G::line_strip;
+    case P::triangles:      return G::triangles;
+    case P::triangle_strip: return G::triangle_strip;
+    case P::triangle_fan:   return G::triangle_fan;
+    default:                return G::triangles;
+  }
+}
+
+static void appendBufferResource(
+    ossia::geometry& g, const ossia::buffer_resource& br)
+{
+  if(auto* cpu = ossia::get_if<ossia::buffer_data>(&br.resource))
+  {
+    ossia::geometry::cpu_buffer cb;
+    // buffer_data::data is shared_ptr<const void>; geometry::cpu_buffer::raw_data
+    // is shared_ptr<void>. The contents are immutable in practice, but the types
+    // differ — const_pointer_cast reuses the control block without a copy.
+    cb.raw_data = std::const_pointer_cast<void>(cpu->data);
+    cb.byte_size = cpu->byte_size;
+    g.buffers.push_back(ossia::geometry::buffer{.data = cb, .dirty = true});
+  }
+  else if(auto* gpu = ossia::get_if<ossia::gpu_buffer_handle>(&br.resource))
+  {
+    ossia::geometry::gpu_buffer gb;
+    gb.handle = gpu->native_handle;
+    gb.byte_size = gpu->byte_size;
+    g.buffers.push_back(ossia::geometry::buffer{.data = gb, .dirty = true});
+  }
+}
+
+std::shared_ptr<ossia::geometry>
+primitiveToGeometry(const ossia::mesh_primitive& prim)
+{
+  auto out = std::make_shared<ossia::geometry>();
+
+  // 1) Buffers: one entry per vertex_buffer, optionally plus the index buffer.
+  out->buffers.reserve(prim.vertex_buffers.size() + (prim.index_buffer ? 1 : 0));
+  for(const auto& vb : prim.vertex_buffers)
+  {
+    if(vb)
+      appendBufferResource(*out, *vb);
+    else
+      out->buffers.push_back(ossia::geometry::buffer{
+          .data = ossia::geometry::gpu_buffer{}, .dirty = false});
+  }
+  const int index_buffer_idx = prim.index_buffer ? (int)out->buffers.size() : -1;
+  if(prim.index_buffer)
+    appendBufferResource(*out, *prim.index_buffer);
+
+  // 2) Bindings: one per unique (buffer_index, byte_stride, rate) tuple.
+  //    Deduping by buffer_index alone is wrong for SceneFromMeshes-style
+  //    primitives, which pack planar pos(12)/uv(8)/color(16) blocks all into
+  //    buffer 0 with distinct strides: collapsing them to a single binding
+  //    would force every attribute through the first stride (12) and produce
+  //    garbage UVs/colors/tangents. The glTF path uses one buffer per
+  //    attribute, so this keying leaves it unchanged.
+  struct BindingInfo
+  {
+    uint32_t buffer_index{};
+    uint32_t stride{};
+    bool per_instance{};
+  };
+  std::vector<BindingInfo> bindings;
+  auto findBinding = [&](uint32_t bi, uint32_t stride, bool per_instance) -> int {
+    for(std::size_t k = 0; k < bindings.size(); ++k)
+      if(bindings[k].buffer_index == bi && bindings[k].stride == stride
+         && bindings[k].per_instance == per_instance)
+        return (int)k;
+    return -1;
+  };
+  auto attrBinding = [&](const ossia::vertex_attribute& a) -> int {
+    return findBinding(
+        a.buffer_index, a.byte_stride,
+        a.rate == ossia::vertex_attribute::input_rate::per_instance);
+  };
+  for(const auto& a : prim.attributes)
+  {
+    const bool per_instance
+        = (a.rate == ossia::vertex_attribute::input_rate::per_instance);
+    if(findBinding(a.buffer_index, a.byte_stride, per_instance) < 0)
+    {
+      BindingInfo b;
+      b.buffer_index = a.buffer_index;
+      b.stride = a.byte_stride;
+      b.per_instance = per_instance;
+      bindings.push_back(b);
+    }
+  }
+  out->bindings.reserve(bindings.size());
+  for(const auto& b : bindings)
+  {
+    ossia::geometry::binding gb{};
+    gb.byte_stride = b.stride;
+    gb.classification = b.per_instance
+        ? ossia::geometry::binding::per_instance
+        : ossia::geometry::binding::per_vertex;
+    gb.step_rate = 1;
+    out->bindings.push_back(gb);
+  }
+
+  // 3) Input: one entry per binding, pointing to the corresponding buffer.
+  out->input.reserve(bindings.size());
+  for(const auto& b : bindings)
+  {
+    // `input` resolves to an ossia-level type in this scope, so reference
+    // the member type explicitly via a `struct` elaborated tag.
+    struct ossia::geometry::input entry{};
+    entry.buffer = (int)b.buffer_index;
+    entry.byte_offset = 0;
+    out->input.push_back(entry);
+  }
+
+  // 4) Attributes: remap buffer_index → binding index.
+  out->attributes.reserve(prim.attributes.size());
+  for(const auto& a : prim.attributes)
+  {
+    ossia::geometry::attribute ga{};
+    ga.binding = attrBinding(a);
+    ga.location = 0; // resolved by the renderer's semantic remap
+    ga.format = toGeomAttrFormat(a.format);
+    ga.byte_offset = a.byte_offset;
+    ga.semantic = a.semantic;
+    out->attributes.push_back(ga);
+  }
+
+  // 5) Counts and topology.
+  out->vertices = (int)prim.vertex_count;
+  out->indices = (int)prim.index_count;
+  out->instances = 1;
+  out->topology = toGeomTopology(prim.topology);
+  out->cull_mode = ossia::geometry::none;
+  out->front_face = ossia::geometry::counter_clockwise;
+
+  // 6) Index buffer reference.
+  if(index_buffer_idx >= 0)
+  {
+    out->index.buffer = index_buffer_idx;
+    out->index.byte_offset = 0;
+    out->index.format = (prim.index_type == ossia::index_format::uint16)
+        ? decltype(out->index)::uint16
+        : decltype(out->index)::uint32;
+  }
+  else
+  {
+    out->index.buffer = -1;
+  }
+
+  // 7) Bounds.
+  std::memcpy(out->bounds.min, prim.bounds.min, sizeof(float) * 3);
+  std::memcpy(out->bounds.max, prim.bounds.max, sizeof(float) * 3);
+
+  return out;
+}
+
+// Pack the CPU-side material_component into the 64-byte GPU-layout struct.
+// Only factor fields are packed here; `textureRefs[]` are deliberately left
+// at their default tex_ref_none() sentinel. ScenePreprocessorNode runs
+// `rebuildChannel(ch)` for each of the four channels (BaseColor /
+// MetalRough / Normal / Emissive) after the scene walk, which in turn
+// calls `patchMaterialRefsFromCache(ch, fs)` (ScenePreprocessorNode.cpp:1944)
+// to fill `fs.materials[i].textureRefs[ch]` with the assigned texture-array
+// layer index per material per channel. Consumer shaders sample the
+// per-channel arrays via `mat.textureRefs.x / .y / .z / .w` against
+// `baseColorArray` / `metalRoughArray` / `normalArray` / `emissiveArray`.
+MaterialGPU packMaterial(const ossia::material_component& mc)
+{
+  MaterialGPU gpu;
+  std::memcpy(gpu.baseColor, mc.base_color_factor, sizeof(float) * 4);
+  gpu.metallicRoughnessOcclusionUnlit[0] = mc.metallic_factor;
+  gpu.metallicRoughnessOcclusionUnlit[1] = mc.roughness_factor;
+  gpu.metallicRoughnessOcclusionUnlit[2] = mc.occlusion_strength;
+  gpu.metallicRoughnessOcclusionUnlit[3] = mc.unlit ? 1.f : 0.f;
+  gpu.emissive_strength[0] = mc.emissive_factor[0];
+  gpu.emissive_strength[1] = mc.emissive_factor[1];
+  gpu.emissive_strength[2] = mc.emissive_factor[2];
+  gpu.emissive_strength[3] = mc.emissive_strength;
+
+  // Feature mask — OR in a bit for each active BRDF lobe / texture.
+  // Producers can override this at authoring time; when writing from
+  // a scene_state.materials entry we derive from the CPU-side fields.
+  // Used as SER reorder key + shader-side specialization branch.
+  uint32_t fm = 0;
+  using namespace material_feature;
+  if(mc.base_color_texture.valid())         fm |= has_base_color_texture;
+  if(mc.metallic_roughness_texture.valid()) fm |= has_metal_rough_texture;
+  if(mc.normal_texture.valid())             fm |= has_normal_texture;
+  if(mc.emissive_texture.valid())           fm |= has_emissive_texture;
+  if(mc.unlit)                              fm |= unlit;
+  if(mc.alpha != ossia::alpha_mode::opaque_) fm |= alpha_non_opaque;
+  if(mc.alpha == ossia::alpha_mode::mask)   fm |= alpha_mask;
+  if(mc.alpha == ossia::alpha_mode::blend)  fm |= alpha_blend;
+  if(mc.double_sided)                       fm |= double_sided;
+  // Scene-filter opt-outs — "disabled" semantics keep the common case
+  // (caster = true) at 0. CSF filter shaders test these bits.
+  if(!mc.shadow_caster)                     fm |= shadow_caster_disabled;
+  if(!mc.reflection_caster)                 fm |= reflection_caster_disabled;
+  // Occlusion: set the flag whenever the material has an occlusionTexture
+  // at all — the shader samples through `mat.occlusion_textureRef`
+  // unconditionally in the "separate" branch, which works for both
+  // distinct-source and shared-with-MR (ORM) packings. Routing through
+  // mr.r as a fallback when no occlusion_texture is present is unsafe:
+  // the glTF spec leaves pbrMetallicRoughness.R undefined and most
+  // authoring tools leave it at 0, which silently zeroes the ambient
+  // floor / IBL occlusion multiplier and turns dark metals pitch-black.
+  if(mc.occlusion_texture.valid())
+    fm |= has_separate_occlusion;
+
+  // Per-channel texcoord_set bits (20-29). Clamp to 1 — glTF allows
+  // up to TEXCOORD_7 but our MDI layout carries TEXCOORD_0/1 only.
+  auto pack_tcset = [](uint32_t set_idx, uint32_t shift) -> uint32_t {
+    return (set_idx > 1u ? 1u : set_idx) << shift;
+  };
+  fm |= pack_tcset(mc.base_color_texture.texcoord_set,         20);
+  fm |= pack_tcset(mc.metallic_roughness_texture.texcoord_set, 22);
+  fm |= pack_tcset(mc.normal_texture.texcoord_set,             24);
+  fm |= pack_tcset(mc.emissive_texture.texcoord_set,           26);
+  fm |= pack_tcset(mc.occlusion_texture.texcoord_set,          28);
+  if(mc.clearcoat.factor > 0.f)             fm |= has_clearcoat;
+  if(mc.sheen.color_factor[0] > 0.f
+     || mc.sheen.color_factor[1] > 0.f
+     || mc.sheen.color_factor[2] > 0.f)     fm |= has_sheen;
+  if(mc.transmission.factor > 0.f)          fm |= has_transmission;
+  if(mc.volume.thickness_factor > 0.f)      fm |= has_volume;
+  if(mc.specular.factor != 1.f
+     || mc.specular.color_factor[0] != 1.f
+     || mc.specular.color_factor[1] != 1.f
+     || mc.specular.color_factor[2] != 1.f) fm |= has_specular;
+  if(mc.iridescence.factor > 0.f)           fm |= has_iridescence;
+  if(mc.anisotropy.strength != 0.f)         fm |= has_anisotropy;
+  if(mc.diffuse_transmission.factor > 0.f)  fm |= has_diffuse_transmission;
+  // Subsurface: OpenPBR; no equivalent in ossia material today.
+  // thin_walled: OpenPBR; not in ossia today either.
+  gpu.feature_mask = fm;
+
+  // hit_group_id stays at default (0 = standard lit). A future
+  // pipeline-build step can map feature_mask to a dedicated hit-group
+  // index when RT lands; producers with a pre-computed mapping can
+  // set this directly.
+  gpu.hit_group_id = 0u;
+
+  // alpha_cutoff: glTF spec default is 0.5; only consulted by the
+  // shader when feature_mask carries `alpha_mask`.
+  gpu.alpha_cutoff = mc.alpha_cutoff;
+
+  // occlusion_textureRef stays at tex_ref_none() here — the texture
+  // ref needs the resolved (bucket, layer) from
+  // patchMaterialRefsFromCache. ScenePreprocessor patches it in the
+  // 5th-channel pass.
+
+  return gpu;
+}
+
+// Pack the OpenPBR / KHR extension fields from `material_component` into
+// MaterialExtensionsGPU (272 B). Field order matches the struct's
+// declaration — if you reorder there, reorder here.
+//
+// `textureRefs[]` is left at the default tex_ref_none() sentinels here.
+// The encoded refs are written by ScenePreprocessor::patchMaterialRefs
+// FromCache in lockstep with the base-channel refs: the
+// `kExtTextureSlots` table in ScenePreprocessorNode.cpp routes each
+// MaterialExtensionsGPU::textureRefs[slot] through one of the existing
+// 5 channel pools (BaseColor / MetalRough / Normal) based on format
+// expectation. No separate ext-channel pool / sampler set — the same
+// bucket samplers serve both the main 5 channels and every glTF
+// KHR_materials_* extension texture.
+MaterialExtensionsGPU packMaterialExtensions(const ossia::material_component& mc)
+{
+  MaterialExtensionsGPU gpu{};  // default-init = OpenPBR spec defaults
+
+  // Coat — maps to KHR_materials_clearcoat; coat_darkening is an
+  // OpenPBR extension not in glTF today (defaults to 0 → no darkening).
+  gpu.coat[0] = mc.clearcoat.factor;
+  gpu.coat[1] = mc.clearcoat.roughness_factor;
+  gpu.coat[2] = 1.5f;      // coat_ior default (glTF doesn't expose a per-coat IOR)
+  gpu.coat[3] = 0.f;       // coat_darkening
+  // Base-layer IOR — glTF's KHR_materials_ior applies here.
+  // No OpenPBR field for base IOR directly; we use it in the specular lobe.
+
+  // Fuzz / sheen
+  gpu.fuzz_color[0] = mc.sheen.color_factor[0];
+  gpu.fuzz_color[1] = mc.sheen.color_factor[1];
+  gpu.fuzz_color[2] = mc.sheen.color_factor[2];
+  gpu.fuzz_color[3] = mc.sheen.roughness_factor;
+
+  // Transmission + volume. glTF separates thin-walled (transmission) from
+  // volumetric (volume); OpenPBR folds them: transmission_weight is the
+  // scalar knob, transmission_depth makes it volumetric. An infinite
+  // attenuation_distance effectively means "no absorption" → depth = 0.
+  gpu.transmission[0] = mc.transmission.factor;
+  gpu.transmission[1] = std::isfinite(mc.volume.attenuation_distance)
+                            ? mc.volume.attenuation_distance : 0.f;
+  gpu.transmission[2] = 0.f;    // dispersion_scale — not in glTF
+  gpu.transmission[3] = 20.f;   // dispersion Abbe number — crown-glass default
+  gpu.transmission_color[0] = mc.volume.attenuation_color[0];
+  gpu.transmission_color[1] = mc.volume.attenuation_color[1];
+  gpu.transmission_color[2] = mc.volume.attenuation_color[2];
+  gpu.transmission_color[3] = 0.f;    // scatter_anisotropy — not in glTF
+  // transmission_scatter stays at zero (no volumetric scattering in glTF).
+
+  // Specular (KHR_materials_specular)
+  gpu.specular_weight_color[0] = mc.specular.factor;
+  gpu.specular_weight_color[1] = mc.specular.color_factor[0];
+  gpu.specular_weight_color[2] = mc.specular.color_factor[1];
+  gpu.specular_weight_color[3] = mc.specular.color_factor[2];
+  gpu.specular_ior_anisotropy[0] = mc.ior;
+  gpu.specular_ior_anisotropy[1] = mc.anisotropy.strength;
+  // Anisotropy rotation comes from material_component as a scalar angle
+  // in radians; OpenPBR wants it split into cos/sin to skip per-fragment
+  // trig. Bake it here.
+  gpu.specular_ior_anisotropy[2] = std::cos(mc.anisotropy.rotation);
+  gpu.specular_ior_anisotropy[3] = std::sin(mc.anisotropy.rotation);
+
+  // Thin-film iridescence. glTF carries min/max thickness; OpenPBR
+  // reference impl uses a single thickness (the film is nominally
+  // uniform; spatial variation would need a texture). Average the two.
+  gpu.thin_film[0] = mc.iridescence.factor;
+  gpu.thin_film[1]
+      = (mc.iridescence.thickness_min + mc.iridescence.thickness_max) * 0.5f;
+  gpu.thin_film[2] = mc.iridescence.ior;
+
+  // Diffuse transmission (KHR_materials_diffuse_transmission)
+  gpu.diffuse_transmission[0] = mc.diffuse_transmission.factor;
+  gpu.diffuse_transmission[1] = mc.diffuse_transmission.color_factor[0];
+  gpu.diffuse_transmission[2] = mc.diffuse_transmission.color_factor[1];
+  gpu.diffuse_transmission[3] = mc.diffuse_transmission.color_factor[2];
+
+  // Subsurface — stock glTF has no SSS. FbxParser maps FBX
+  // subsurface_factor / subsurface_color into
+  // mc.diffuse_transmission as the nearest equivalent slot
+  // (see FbxParser.cpp's KHR-extension mapping). We leave
+  // subsurface_* at OpenPBR spec defaults (weight = 0) for the pure-
+  // glTF case; when a loader grows a dedicated subsurface channel on
+  // material_component we'll fill it here.
+
+  // Flags: base diffuse roughness + thin-walled.
+  // `thin_walled` lives in scene_property_map["thin_walled"] when
+  // FbxParser sees an Arnold thin-walled feature. Presence of the key
+  // alone means true — the loader inserts the entry only when the flag
+  // is enabled. Application-level properties outside this hardcoded
+  // list aren't consumed here.
+  if(mc.properties.find("thin_walled") != mc.properties.end())
+    gpu.flags[1] = 1.f;
+
+  return gpu;
+}
+
+// Dedup key combining a payload identity pointer with the accumulated
+// world transform on the walk path that reached it. Plain pointer dedup
+// (threedim#1) collapses every instance of a shared prototype into one:
+// when an upstream SceneDuplicator references a single prototype
+// scene_node_ptr under N distinct transforms, the pointer-only `seenNodes`
+// set lets only the first through and silently drops the other N-1
+// instances. Keying by (pointer, world-matrix) instead keeps genuinely
+// distinct instances (same prototype, different transform) apart while
+// still deduping true DAG re-references reached through an identical
+// transform path (bit-identical accumulated matrix → same key). Mesh GPU
+// vertex uploads are deduped separately downstream by DrawCall::stable_id,
+// so emitting N draws here still uploads the prototype's bytes once.
+struct InstanceKey
+{
+  const void* ptr{};
+  std::array<float, 16> world{};
+
+  bool operator==(const InstanceKey& o) const noexcept
+  {
+    return ptr == o.ptr && world == o.world;
+  }
+};
+
+struct InstanceKeyHash
+{
+  // No is_avalanching marker: the combined pointer+matrix mix below is not
+  // guaranteed well-distributed (std::hash<void*> is often identity), so we
+  // let unordered_dense apply its own final avalanche step.
+  std::size_t operator()(const InstanceKey& k) const noexcept
+  {
+    std::size_t h = std::hash<const void*>{}(k.ptr);
+    for(float f : k.world)
+    {
+      // Normalize -0.0f to +0.0f so the two compare/hash identically; the
+      // exact float compare in operator== handles the rest.
+      std::uint32_t bits;
+      const float v = (f == 0.f) ? 0.f : f;
+      std::memcpy(&bits, &v, sizeof(bits));
+      h ^= std::size_t(bits) + 0x9e3779b97f4a7c15ULL + (h << 6) + (h >> 2);
+    }
+    return h;
+  }
+};
+
+static InstanceKey makeInstanceKey(const void* p, const QMatrix4x4& m)
+{
+  InstanceKey k;
+  k.ptr = p;
+  // QMatrix4x4::constData() is column-major, 16 contiguous floats.
+  std::memcpy(k.world.data(), m.constData(), sizeof(float) * 16);
+  return k;
+}
+
+// Visitor that walks the scene_payload tree and collects draw calls, lights, cameras.
+struct FlattenVisitor
+{
+  FlatScene& out;
+  QMatrix4x4 parentWorld;
+  ossia::scene_node_id currentNodeId{};
+  // KHR_materials_variants: set from scene_state::active_variant_index
+  // at flatten-start. -1 = use each primitive's default material.
+  int32_t activeVariant{-1};
+
+  // Most recently encountered producer-authored scene_transform slot on
+  // the current walk path. 0xFFFFFFFF = none yet. Stamped on each
+  // DrawCall so PerDrawGPU.transform_slot can point at the corresponding
+  // world_transforms / world_transforms_prev entry for motion vectors.
+  std::uint32_t currentTransformSlot{0xFFFFFFFFu};
+
+  // Identity-based dedup for shared payload pointers reachable through
+  // multiple tree paths. The visitor's contract is "one entry per unique
+  // payload object" — repeating the same shared_ptr (e.g. a single
+  // primitive_cloud_component_ptr referenced by four distinct scene_node
+  // children, or a mesh_component shared across LOD levels) should
+  // contribute one bucket / draw call, not N. merge_scenes / SceneGroup
+  // already dedup roots, so this only triggers on actually-shared
+  // sub-tree references (the cases the upstream layers can't see).
+  // Nodes / meshes / clouds dedup by (pointer, accumulated world transform)
+  // so distinct instances of a shared prototype (SceneDuplicator) survive
+  // — see InstanceKey above. Lights / cameras / scene_data / instances keep
+  // plain pointer dedup: they aren't multiplied by the duplicator path here.
+  ossia::hash_set<InstanceKey, InstanceKeyHash> seenNodes;
+  ossia::hash_set<InstanceKey, InstanceKeyHash> seenClouds;
+  // Secondary dedup key for clouds: the raw_data pointer. FormatOverride
+  // clones the primitive_cloud_component to rewrite format_id but keeps
+  // the underlying raw_data (~1 GB for a 4M-splat scan) shared via
+  // shared_ptr — two distinct components pointing at the same raw_data
+  // are still one upload's worth of GPU bytes. Dedup by raw_data when
+  // present, fall back to component pointer when raw_data is null.
+  // Still combined with the world transform so a cloud reused under two
+  // duplicator transforms renders twice.
+  ossia::hash_set<InstanceKey, InstanceKeyHash> seenCloudRawData;
+  ossia::hash_set<InstanceKey, InstanceKeyHash> seenMeshes;
+  ossia::ptr_set<const ossia::light_component*> seenLights;
+  ossia::ptr_set<const ossia::camera_component*> seenCameras;
+  ossia::ptr_set<const ossia::scene_data*> seenSceneData;
+  ossia::ptr_set<const ossia::instance_component*> seenInstances;
+
+  void visitPayload(const ossia::scene_payload& payload)
+  {
+    if(auto* subnode = ossia::get_if<ossia::scene_node_ptr>(&payload))
+    {
+      // Key on (node, parentWorld): the same prototype node reached under a
+      // different accumulated transform (duplicator) is a distinct instance.
+      if(*subnode
+         && seenNodes.insert(makeInstanceKey(subnode->get(), parentWorld)).second)
+        visitNode(**subnode);
+    }
+    else if(auto* mesh = ossia::get_if<ossia::mesh_component_ptr>(&payload))
+    {
+      if(*mesh
+         && seenMeshes.insert(makeInstanceKey(mesh->get(), parentWorld)).second)
+        visitMesh(**mesh);
+    }
+    else if(auto* light = ossia::get_if<ossia::light_component_ptr>(&payload))
+    {
+      if(*light && seenLights.insert(light->get()).second)
+      {
+        // Arena slot index for shader-side arena-direct light reads
+        // (task 28b/c — packLight path removed). 0xFFFFFFFF sentinel
+        // for producer-less lights (e.g. FBX/glTF-embedded lights that
+        // don't own a RawLight slot yet). Such lights are filtered out
+        // when building scene_light_indices.
+        out.lightArenaSlots.push_back(
+            (*light)->raw_slot.size != 0
+                ? (*light)->raw_slot.internal_index
+                : 0xFFFFFFFFu);
+      }
+    }
+    else if(auto* camera = ossia::get_if<ossia::camera_component_ptr>(&payload))
+    {
+      if(*camera && seenCameras.insert(camera->get()).second)
+      {
+        FlatScene::CameraEntry e;
+        e.component = *camera;
+        e.worldTransform = parentWorld;
+        e.node_id = currentNodeId;
+        out.cameras.push_back(std::move(e));
+      }
+    }
+    else if(auto* xform = ossia::get_if<ossia::scene_transform>(&payload))
+    {
+      // A bare transform applies to subsequent siblings — update parentWorld
+      parentWorld = parentWorld * toQMatrix(*xform);
+      // Emit the composed world matrix in walk order so the preprocessor
+      // can upload it into its private world-transforms SSBO. Only
+      // producer-authored transforms (stamped raw_slot) get an entry —
+      // loader-interior transforms participate in hierarchy accumulation
+      // but aren't individually addressable on GPU.
+      if(xform->raw_slot.size != 0)
+      {
+        out.worldTransforms.push_back(
+            WorldTransformEmit{parentWorld, xform->raw_slot.internal_index});
+        // Remember this slot as the "nearest producer transform" so
+        // subsequent sibling / child draws can reference it for
+        // motion-vector / TAA lookups via world_transforms_prev[slot].
+        currentTransformSlot = xform->raw_slot.internal_index;
+      }
+    }
+    else if(auto* sd = ossia::get_if<ossia::scene_data_ptr>(&payload))
+    {
+      // Generic escape hatch: stash it; the ScenePreprocessor forwards every entry
+      // as an auxiliary_buffer on the output geometry.
+      if(*sd && seenSceneData.insert(sd->get()).second)
+        out.scene_data.push_back(*sd);
+    }
+    else if(auto* inst = ossia::get_if<ossia::instance_component_ptr>(&payload))
+    {
+      // GPU-instanced mesh: collect — the ScenePreprocessor emits one DrawCall with
+      // instances=instance_count and forwards the instance SSBOs.
+      if(*inst && seenInstances.insert(inst->get()).second)
+        out.instances.push_back({*inst, parentWorld});
+    }
+    else if(auto* pc
+            = ossia::get_if<ossia::primitive_cloud_component_ptr>(&payload))
+    {
+      // Format-agnostic point cloud / splat: collect — the
+      // ScenePreprocessor's primitive-cloud branch buckets these by
+      // format_id and emits one indirect-draw geometry per bucket
+      // alongside the existing mesh MDI. The cloud's data lives in
+      // raw_data + format_params; the bucket geometry's auxiliary
+      // ("raw_splats") forwards it to the format's CSF chain.
+      //
+      // Dedup by raw_data pointer rather than the component pointer:
+      // FormatOverride deliberately clones the component (fresh
+      // primitive_cloud_component shared_ptr) but keeps the heavy
+      // raw_data shared, and we don't want format-override to defeat
+      // dedup. Two distinct components with distinct raw_data are
+      // independent uploads and are kept; same raw_data through
+      // multiple paths counts once.
+      if(*pc)
+      {
+        const ossia::buffer_resource* raw = (*pc)->raw_data.get();
+        const bool unique
+            = raw ? seenCloudRawData.insert(makeInstanceKey(raw, parentWorld))
+                        .second
+                  : seenClouds.insert(makeInstanceKey(pc->get(), parentWorld))
+                        .second;
+        if(unique)
+        {
+          FlatScene::PrimitiveCloudDraw d;
+          d.cloud = *pc;
+          d.worldTransform = parentWorld;
+          d.transform_slot = currentTransformSlot;
+          out.primitive_clouds.push_back(std::move(d));
+        }
+      }
+    }
+    // gaussian_splat, voxel_field, point_cloud, volume — not rendered yet,
+    // but the types are transported. Renderers will handle them later.
+  }
+
+  void visitNode(const ossia::scene_node& node)
+  {
+    // Inactive nodes are skipped entirely — no transforms, no children,
+    // no payload contributions. USD-style non-destructive prune: the
+    // data stays in the scene tree so downstream toggles can
+    // re-activate without re-uploading geometry.
+    if(!node.active)
+      return;
+
+    // scene_node has no transform of its own in the new design.
+    // Transforms are scene_payload children (scene_transform).
+    // We process children in order; transform payloads affect subsequent siblings.
+    if(!node.has_children())
+      return;
+
+    // Save current world so sibling transforms don't leak. Also remember the
+    // parent node id so camera payloads can be attributed to it for
+    // active_camera_id resolution. currentTransformSlot is save/restored
+    // alongside parentWorld — a scene_transform encountered inside this
+    // node's children scope shouldn't leak to unrelated siblings.
+    QMatrix4x4 savedWorld = parentWorld;
+    auto savedNodeId = currentNodeId;
+    auto savedTransformSlot = currentTransformSlot;
+    currentNodeId = node.id;
+
+    for(auto& child : *node.children)
+    {
+      visitPayload(child);
+    }
+
+    parentWorld = savedWorld;
+    currentNodeId = savedNodeId;
+    currentTransformSlot = savedTransformSlot;
+  }
+
+  void visitMesh(const ossia::mesh_component& mc)
+  {
+    // Modern path: mesh_primitive[]. Build a transient ossia::geometry per
+    // primitive so the ScenePreprocessor can treat it uniformly with legacy geometry.
+    for(const auto& prim : mc.primitives)
+    {
+      if(prim.vertex_buffers.empty() || prim.vertex_count == 0)
+        continue;
+      DrawCall dc;
+      dc.owned_mesh = primitiveToGeometry(prim);
+      dc.mesh = dc.owned_mesh.get();
+      // Prefer the producer-stamped stable_id (identity survives merge
+      // reshuffles AND source-primitive pointer churn on rebuilds).
+      // Fall back to the pointer bits when the producer hasn't stamped
+      // one yet — legacy behaviour.
+      dc.stable_id
+          = prim.stable_id != 0
+                ? prim.stable_id
+                : reinterpret_cast<uint64_t>(&prim);
+      dc.worldTransform = parentWorld;
+      // Direct pointers — identity survives merge_scenes without a bias
+      // table. flattenScene dedups these into FlatScene::materials /
+      // ::skins after the walk and stamps the corresponding indices.
+      dc.material = prim.material;
+      // KHR_materials_variants override: when the active variant has
+      // a non-null mapping for this primitive, swap in the variant's
+      // material. Out-of-range / null entries fall through to default.
+      if(activeVariant >= 0
+         && (std::size_t)activeVariant < prim.material_variants.size()
+         && prim.material_variants[activeVariant])
+      {
+        dc.material = prim.material_variants[activeVariant];
+      }
+      dc.skin = mc.skin;
+      dc.local_bounds = prim.bounds;
+      dc.transform_slot = currentTransformSlot;
+      out.draws.push_back(std::move(dc));
+    }
+
+    // Legacy geometry_spec path (backward compat for loaders that still use
+    // mesh_component::legacy_geometry).
+    auto& geom_spec = mc.legacy_geometry;
+    if(geom_spec.meshes && !geom_spec.meshes->meshes.empty())
+    {
+      for(auto& geom : geom_spec.meshes->meshes)
+      {
+        DrawCall dc;
+        dc.mesh = &geom;
+        // Legacy geometry has no producer-stamped stable_id field;
+        // fall back to its address.
+        dc.stable_id = reinterpret_cast<uint64_t>(&geom);
+        dc.geometry_ref = geom_spec;
+        dc.worldTransform = parentWorld;
+        // Material comes from the first primitive if any, else null.
+        if(!mc.primitives.empty())
+          dc.material = mc.primitives[0].material;
+        dc.skin = mc.skin;
+        // Legacy path: fall back to mesh_component bounds (primitive
+        // bounds may be absent on the old path). The preprocessor
+        // treats empty bounds as "never cull".
+        dc.local_bounds = mc.bounds;
+        dc.transform_slot = currentTransformSlot;
+        out.draws.push_back(std::move(dc));
+      }
+    }
+  }
+
+};
+
+void flattenScene(const ossia::scene_spec& scene, FlatScene& out, float aspectRatio)
+{
+  out.clear();
+
+  if(!scene.state || scene.state->empty())
+    return;
+
+  // Pack materials — base + extensions in lockstep. Both vectors grow
+  // together so `material_extensions[i]` always corresponds to
+  // `materials[i]`. Missing extension data (no KHR_* extension on a
+  // given glTF material) lands as the default-constructed struct,
+  // which is the OpenPBR spec default (all lobe weights = 0, IORs at
+  // 1.5, etc.) — consumer shaders can blindly read it and get
+  // identity behaviour where the file didn't opt in.
+  if(scene.state->materials)
+  {
+    for(auto& mat : *scene.state->materials)
+    {
+      if(mat)
+      {
+        out.materials.push_back(packMaterial(*mat));
+        out.material_extensions.push_back(packMaterialExtensions(*mat));
+      }
+      else
+      {
+        out.materials.push_back(MaterialGPU{});
+        out.material_extensions.push_back(MaterialExtensionsGPU{});
+      }
+    }
+  }
+
+  // Pack skeletons: forward kinematics through joint hierarchy, then
+  // joint_matrix[i] = world_joint[i] × inverse_bind_matrix[i]. Matches the
+  // glTF skinning convention; consumer shaders multiply vertex position by
+  // Σ(w_j × joint_matrix[j]).
+  if(scene.state->skeletons)
+  {
+    auto jointLocal = [](const ossia::skeleton_joint& j) {
+      QMatrix4x4 m;
+      m.translate(j.translation[0], j.translation[1], j.translation[2]);
+      m.rotate(QQuaternion(j.rotation[3], j.rotation[0], j.rotation[1], j.rotation[2]));
+      m.scale(j.scale[0], j.scale[1], j.scale[2]);
+      return m;
+    };
+
+    out.skins.reserve(scene.state->skeletons->size());
+    for(const auto& sk : *scene.state->skeletons)
+    {
+      SkeletonGPU sg;
+      if(!sk)
+      {
+        out.skins.push_back(std::move(sg));
+        continue;
+      }
+
+      // Multi-pass forward kinematics: resolve any joint whose parent has
+      // already been resolved, looping until all are done. The glTF 2.0
+      // spec does NOT guarantee topological ordering of skin.joints, so
+      // we cannot assume parent_index < i. For DFS-ordered skins (the
+      // common case) this converges in a single pass.
+      const std::size_t N = sk->joints.size();
+      std::vector<QMatrix4x4> world(N);
+      std::vector<bool> resolved(N, false);
+      sg.joint_matrices.resize(N);
+      std::size_t resolvedCount = 0;
+      int passes = 0;
+      constexpr int maxPasses = 64; // covers any real skeleton depth
+      while(resolvedCount < N && passes < maxPasses)
+      {
+        bool changed = false;
+        for(std::size_t i = 0; i < N; ++i)
+        {
+          if(resolved[i])
+            continue;
+          const auto& j = sk->joints[i];
+          // Root joint or invalid parent index: resolve immediately.
+          if(j.parent_index < 0 || j.parent_index >= (int32_t)N)
+          {
+            world[i] = jointLocal(j);
+            resolved[i] = true;
+            ++resolvedCount;
+            changed = true;
+            continue;
+          }
+          // Otherwise, parent must be resolved first.
+          if(!resolved[(std::size_t)j.parent_index])
+            continue;
+          world[i] = world[j.parent_index] * jointLocal(j);
+          resolved[i] = true;
+          ++resolvedCount;
+          changed = true;
+        }
+        ++passes;
+        if(!changed)
+          break; // cycle or orphan: bail out instead of spinning
+      }
+      if(resolvedCount < N)
+      {
+        qWarning() << "SceneGPUState: skeleton FK did not converge —"
+                   << (N - resolvedCount) << "joint(s) unresolved (cycle or"
+                   << "orphan parent). Falling back to local matrices.";
+        for(std::size_t i = 0; i < N; ++i)
+        {
+          if(!resolved[i])
+            world[i] = jointLocal(sk->joints[i]);
+        }
+      }
+      // Stamp joint_matrices = world × inverse_bind_matrix once FK is done.
+      for(std::size_t i = 0; i < N; ++i)
+      {
+        const QMatrix4x4 ibm
+            = QMatrix4x4(sk->joints[i].inverse_bind_matrix, 4, 4);
+        sg.joint_matrices[i] = world[i] * ibm;
+      }
+      out.skins.push_back(std::move(sg));
+    }
+  }
+
+  // Walk the node tree. mesh_primitive / mesh_component now carry
+  // direct shared_ptr references to their material and skin, so no
+  // per-root index-bias bookkeeping is required.
+  QMatrix4x4 identity;
+  FlattenVisitor vis{out, identity};
+  // KHR_materials_variants: seed the visitor from scene_state. When
+  // no variants are declared (typical) this stays at -1 and the
+  // per-draw override branch compiles to a cheap null-check.
+  vis.activeVariant = scene.state->active_variant_index;
+  const auto& roots = *scene.state->roots;
+  for(std::size_t ri = 0; ri < roots.size(); ++ri)
+  {
+    // Same dedup contract as visitPayload's scene_node_ptr branch:
+    // skip roots whose (pointer, world transform) was already walked.
+    // merge_scenes / SceneGroup are expected to dedup before this point,
+    // but a scene_state assembled by hand could still place the same root
+    // in `roots[]` more than once. Roots are walked at the visitor's
+    // current world (identity here), matching the key visitPayload uses.
+    if(!roots[ri]
+       || !vis.seenNodes.insert(makeInstanceKey(roots[ri].get(), vis.parentWorld))
+               .second)
+      continue;
+    vis.visitNode(*roots[ri]);
+  }
+
+  // Resolve DrawCall::materialIndex / ::skinIndex from the direct
+  // shared_ptr references stamped on each draw. materialIndex is the
+  // position of dc.material inside scene.state->materials (packed
+  // above into out.materials in the same order), so the shaders can
+  // continue to SSBO-index into scene_materials[draw.material_index].
+  if(scene.state->materials && !scene.state->materials->empty())
+  {
+    ossia::hash_map<const ossia::material_component*, int> mat_index;
+    mat_index.reserve(scene.state->materials->size());
+    for(std::size_t i = 0; i < scene.state->materials->size(); ++i)
+    {
+      const auto& m = (*scene.state->materials)[i];
+      if(m)
+        mat_index[m.get()] = (int)i;
+    }
+    for(auto& dc : out.draws)
+    {
+      if(!dc.material)
+        continue;
+      auto it = mat_index.find(dc.material.get());
+      dc.materialIndex = (it != mat_index.end()) ? it->second : -1;
+    }
+  }
+  if(scene.state->skeletons && !scene.state->skeletons->empty())
+  {
+    ossia::hash_map<const ossia::skeleton_component*, int> skin_index;
+    skin_index.reserve(scene.state->skeletons->size());
+    for(std::size_t i = 0; i < scene.state->skeletons->size(); ++i)
+    {
+      const auto& s = (*scene.state->skeletons)[i];
+      if(s)
+        skin_index[s.get()] = (int)i;
+    }
+    for(auto& dc : out.draws)
+    {
+      if(!dc.skin)
+        continue;
+      auto it = skin_index.find(dc.skin.get());
+      dc.skinIndex = (it != skin_index.end()) ? it->second : -1;
+    }
+  }
+
+  // Also surface any cameras registered at scene_state level (producers
+  // that don't want to embed a camera node can publish via `cameras` only).
+  // Dedup against the set the tree walk already collected: a camera that
+  // appears both as a tree payload (with worldTransform) AND in
+  // scene_state.cameras would otherwise be entered twice — once with
+  // its real placement, once at identity — and the active-camera resolver
+  // would pick the wrong one half the time.
+  if(scene.state->cameras)
+  {
+    for(const auto& cam : *scene.state->cameras)
+    {
+      if(!cam || !vis.seenCameras.insert(cam.get()).second)
+        continue;
+      FlatScene::CameraEntry e;
+      e.component = cam;
+      // No world transform context at this level — identity placement.
+      e.worldTransform = QMatrix4x4{};
+      out.cameras.push_back(std::move(e));
+    }
+  }
+
+  // Resolve active camera: match scene_state.active_camera_id against the
+  // collected camera entries; fall back to the first camera if the id is
+  // unset or not found.
+  if(!out.cameras.empty())
+  {
+    out.activeCameraIndex = 0;
+    if(scene.state->active_camera_id.value != 0)
+    {
+      for(std::size_t i = 0; i < out.cameras.size(); ++i)
+      {
+        if(out.cameras[i].node_id == scene.state->active_camera_id)
+        {
+          out.activeCameraIndex = (int)i;
+          break;
+        }
+      }
+    }
+  }
+
+  // Populate legacy single-camera mirror fields so consumers that haven't
+  // migrated to `cameras[activeCameraIndex]` keep working.
+  if(out.activeCameraIndex >= 0)
+  {
+    const auto& e = out.cameras[(std::size_t)out.activeCameraIndex];
+    const auto& cam = *e.component;
+    out.cameraPosition = e.worldTransform.column(3).toVector3D();
+    out.viewMatrix = e.worldTransform.inverted();
+    out.cameraFov = cam.yfov * (180.f / float(M_PI));
+    out.cameraNear = cam.znear;
+    out.cameraFar = cam.zfar;
+    out.projectionMatrix.setToIdentity();
+    out.projectionMatrix.perspective(
+        out.cameraFov, aspectRatio, out.cameraNear, out.cameraFar);
+    out.hasCamera = true;
+  }
+  else
+  {
+    out.cameraPosition = QVector3D(0.f, 0.f, 3.f);
+    out.viewMatrix.setToIdentity();
+    out.viewMatrix.lookAt(
+        out.cameraPosition, QVector3D(0.f, 0.f, 0.f), QVector3D(0.f, 1.f, 0.f));
+    out.projectionMatrix.setToIdentity();
+    out.projectionMatrix.perspective(60.f, aspectRatio, 0.1f, 1000.f);
+    out.cameraFov = 60.f;
+    out.cameraNear = 0.1f;
+    out.cameraFar = 1000.f;
+    out.hasCamera = false;
+  }
+}
+
+}
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/SceneGPUState.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/SceneGPUState.hpp
new file mode 100644
index 0000000000..792df38622
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/SceneGPUState.hpp
@@ -0,0 +1,656 @@
+#pragma once
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <QMatrix4x4>
+
+#include <cstdint>
+#include <vector>
+
+namespace score::gfx
+{
+
+// GPU-friendly structures for packing scene data into UBOs/SSBOs.
+// All matrices are column-major (OpenGL/Vulkan convention).
+//
+// The structs split into two families:
+//
+//   Raw*   — written by source nodes (Camera, Light, Transform3D,
+//            EnvironmentLoader) into their own GpuResourceRegistry arena
+//            slot at their operator()() time. View-independent — no
+//            aspect-ratio math, no scene-graph composition applied.
+//
+//   <Cooked> (CameraUBOData / LightGPU / MaterialGPU / PerDrawGPU /
+//            WorldTransformMat4) — produced by ScenePreprocessor from
+//            Raw* arenas + render-target state + scene-topology chain.
+//            These are what consumer shaders bind.
+//
+// Materials and env are scene-composition-independent so Raw == Cooked
+// for them — no separate RawMaterial / RawEnv structs below, MaterialGPU
+// and EnvParamsUBO are used directly from source nodes.
+
+#pragma pack(push, 1)
+
+// LightGPU removed in task 28c. Consumer shaders read RawLightData
+// directly from the RawLight arena and compose world-space direction
+// via world_transforms[transform_slot].
+
+// Scene-level UBO: camera + global scene data.
+struct SceneUBO
+{
+  float view[16]{};
+  float projection[16]{};
+  float viewProjection[16]{};
+  float cameraPosition[4]{}; // xyz = position, w = padding
+  float time{};
+  int32_t lightCount{};
+  int32_t materialCount{};
+  float padding0{};
+  float ambientColor[4]{0.03f, 0.03f, 0.03f, 1.f};
+};
+
+// Per-mesh UBO: model transform for the current draw call.
+struct MeshUBO
+{
+  float model[16]{};
+  float modelViewProjection[16]{};
+  float normalMatrix[12]{}; // mat3 in std140 = 3 × vec4 (48 bytes)
+  int32_t materialIndex{};
+  float padding[3]{};
+};
+
+// Packed 32-bit texture reference stored in MaterialGPU::textureRefs[].
+// Layout (MSB → LSB):
+//   bits 31..30 : source (0 = NONE, 1 = STATIC pool, 2 = DYNAMIC pool)
+//   bits 29..24 : bucket index (0..63) within the selected pool
+//   bits 23.. 0 : layer index (0..16M) within the bucket's texture array
+//
+// 0xFFFFFFFF is the "no texture" sentinel — shader should fall back to
+// the constant baseColor factor, metallic_factor, etc.
+//
+// Step 1 of the texture rollout uses only source=STATIC, bucket=0, so the
+// low 24 bits hold the layer index directly. Bucketing + dynamic pools will
+// slot into this same encoding without a material layout change.
+inline constexpr uint32_t tex_ref_none() { return 0xFFFFFFFFu; }
+inline constexpr uint32_t tex_ref_static(uint32_t bucket, uint32_t layer)
+{
+  // Packed layout: source:2 | bucket:7 | layer:23
+  //
+  // The 7-bit bucket field (0..127) gives encoding headroom for up to
+  // 128 buckets; the runtime cap is kMaxBuckets = 16 in
+  // GpuResourceRegistry.hpp. Growing the cap requires enlarging the
+  // shader sampler arrays but needs no change to this encoding. Layer
+  // field at 23 bits holds 8M layers — 8000× kTextureLayerSize of 1024.
+  //
+  // Shader-side decode mirror: `(ref >> 23) & 0x7Fu` for the bucket,
+  // `ref & 0x007FFFFFu` for the layer. See classic_pbr_full.frag et al.
+  return (1u << 30) | ((bucket & 0x7Fu) << 23) | (layer & 0x007FFFFFu);
+}
+// Dynamic texture slot encoding: source=2, bucket unused (0), low 24 bits
+// hold the per-channel slot index (0..kMaxDynamicSlots-1). Consumer shaders
+// branch on the source bits and sample one of a small fixed set of direct
+// sampler2D uniforms named `<channel>Dyn0`, `<channel>Dyn1`, etc. — no
+// CPU decode, no array layer, upstream texture handle is forwarded as-is.
+// Used for large runtime textures (8K video, HDR shader outputs) that
+// don't fit the 1024² scaled-and-uploaded array path.
+inline constexpr uint32_t tex_ref_dynamic(uint32_t slot)
+{
+  return (2u << 30) | (slot & 0x00FFFFFFu);
+}
+
+// Per-material data for the material SSBO. 80 bytes (5 × vec4).
+//
+// VJ context → few materials, each potentially heavy (full OpenPBR
+// extension set + feature-mask-driven SER sorting). 16 B of runtime
+// metadata is a rounding error on a few-dozen materials and leaves
+// headroom for future fields (animation ID, LOD hint, shader
+// permutation hash) without another ABI break.
+struct MaterialGPU
+{
+  float baseColor[4]{1.f, 1.f, 1.f, 1.f};
+  // x = metallic, y = roughness, z = occlusion, w = unlit flag
+  float metallicRoughnessOcclusionUnlit[4]{0.f, 0.5f, 1.f, 0.f};
+  // xyz = emissive, w = emissive strength
+  float emissive_strength[4]{0.f, 0.f, 0.f, 1.f};
+  // Packed texture refs: [0] = base color, [1..3] reserved for MR, normal,
+  // emissive. See tex_ref_* helpers for encoding.
+  uint32_t textureRefs[4]{
+      0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu};
+
+  // --- Runtime metadata (16 B) ----------------------------------------
+  // Producer-derived bitmask of "which BRDF lobes / features are active"
+  // for this material. Used as:
+  //   - Coherence key for NVIDIA Shader Execution Reordering
+  //     (`reorderThread(feature_mask)` before closest-hit shading) so
+  //     threads in the same warp converge on the same shading path.
+  //   - Shader-side specialization in the main closest-hit / fragment
+  //     body: `if(fm & HAS_TRANSMISSION) { ... }`.
+  // Bit layout:
+  //   bit 0  : has_base_color_texture
+  //   bit 1  : has_metal_rough_texture
+  //   bit 2  : has_normal_texture
+  //   bit 3  : has_emissive_texture
+  //   bit 4  : unlit
+  //   bit 5  : alpha_non_opaque (mask OR blend)
+  //   bit 6  : has_clearcoat  (KHR_materials_clearcoat)
+  //   bit 7  : has_sheen      (KHR_materials_sheen)
+  //   bit 8  : has_transmission (KHR_materials_transmission)
+  //   bit 9  : has_volume     (KHR_materials_volume)
+  //   bit 10 : has_specular   (KHR_materials_specular)
+  //   bit 11 : has_iridescence (KHR_materials_iridescence)
+  //   bit 12 : has_anisotropy
+  //   bit 13 : has_diffuse_transmission
+  //   bit 14 : has_subsurface
+  //   bit 15 : thin_walled
+  //   bit 16 : alpha_mask                  (glTF alphaMode = MASK)
+  //   bit 17 : alpha_blend                 (glTF alphaMode = BLEND)
+  //   bit 18 : double_sided                (glTF doubleSided)
+  //   bit 19 : has_separate_occlusion      (occlusion ≠ MR source)
+  //   bits 20-21 : BC       texcoord_set (0 or 1, glTF TEXCOORD_0/1)
+  //   bits 22-23 : MR       texcoord_set
+  //   bits 24-25 : Normal   texcoord_set
+  //   bits 26-27 : Emissive texcoord_set
+  //   bits 28-29 : Occlusion texcoord_set
+  //   bit 30 : shadow_caster_disabled      (material.shadow_caster == false)
+  //   bit 31 : reflection_caster_disabled  (material.reflection_caster == false)
+  uint32_t feature_mask{0u};
+
+  // Shader binding table hit-group index for ray tracing pipelines.
+  // Producers with a pre-computed hit-group mapping stamp this at
+  // material-authoring time; 0 means "default lit material" and is the
+  // safe fallback for renderers that haven't computed the mapping yet.
+  uint32_t hit_group_id{0u};
+
+  // 5th texture channel (occlusion). glTF separates occlusionTexture
+  // from metallicRoughnessTexture; conventionally both are sometimes
+  // packed into the same image (occlusion in R, roughness in G,
+  // metallic in B). When they're distinct sources, this slot points
+  // at the occlusion array layer; when they're the same, this stays
+  // at tex_ref_none() and the shader uses MR.r * occlusion_factor.
+  uint32_t occlusion_textureRef{0xFFFFFFFFu};
+
+  // glTF alphaMode = MASK cutoff. Shader does `if(alpha < cutoff)
+  // discard;` when the `alpha_mask` feature_mask bit is set.
+  // Default 0.5 matches the glTF spec default.
+  float alpha_cutoff{0.5f};
+};
+static_assert(sizeof(MaterialGPU) == 80, "MaterialGPU layout must match shader");
+
+// Feature-mask bit flags. Producers OR these together to derive the
+// per-material feature_mask; hit-group shaders branch on them to
+// select the relevant BRDF lobe code path.
+namespace material_feature
+{
+inline constexpr uint32_t has_base_color_texture   = 1u << 0;
+inline constexpr uint32_t has_metal_rough_texture  = 1u << 1;
+inline constexpr uint32_t has_normal_texture       = 1u << 2;
+inline constexpr uint32_t has_emissive_texture     = 1u << 3;
+inline constexpr uint32_t unlit                    = 1u << 4;
+inline constexpr uint32_t alpha_non_opaque         = 1u << 5;
+inline constexpr uint32_t has_clearcoat            = 1u << 6;
+inline constexpr uint32_t has_sheen                = 1u << 7;
+inline constexpr uint32_t has_transmission         = 1u << 8;
+inline constexpr uint32_t has_volume               = 1u << 9;
+inline constexpr uint32_t has_specular             = 1u << 10;
+inline constexpr uint32_t has_iridescence          = 1u << 11;
+inline constexpr uint32_t has_anisotropy           = 1u << 12;
+inline constexpr uint32_t has_diffuse_transmission = 1u << 13;
+inline constexpr uint32_t has_subsurface           = 1u << 14;
+inline constexpr uint32_t thin_walled              = 1u << 15;
+// glTF alpha mode (parsed from material.alphaMode). MASK → shader
+// discards fragments with alpha < alpha_cutoff. BLEND → shader emits
+// translucent alpha (caller handles depth/sort separately).
+inline constexpr uint32_t alpha_mask               = 1u << 16;
+inline constexpr uint32_t alpha_blend              = 1u << 17;
+// glTF doubleSided. When set, shader flips the surface normal for
+// back-facing fragments (so lighting works on both sides). When unset
+// AND the pipeline cull mode is `none` (MDI default), shader discards
+// back-facing fragments to mimic single-sided culling.
+inline constexpr uint32_t double_sided             = 1u << 18;
+// Separate occlusion texture present (independent from MR texture).
+// Shader samples mat.occlusion_textureRef instead of using mr.r.
+inline constexpr uint32_t has_separate_occlusion   = 1u << 19;
+// Scene-filter opt-outs. "Disabled" semantics (default 0 = participates
+// in the pass) so the common case stays bit-clear. Packed at bits
+// 30/31 — CSF filter shaders test these to drop draws from auxiliary
+// passes (shadow-map, reflection capture).
+inline constexpr uint32_t shadow_caster_disabled     = 1u << 30;
+inline constexpr uint32_t reflection_caster_disabled = 1u << 31;
+}
+
+// Per-material EXTENSION data — parallel SSBO, indexed by the same
+// `material_index` as MaterialGPU. Shaders that only need the 64-byte
+// base material (classic_pbr / classic_pbr_textured / …) ignore this.
+// OpenPBR-grade shaders declare `scene_materials_ext` and read the
+// full lobe set.
+//
+// Layout is std430-friendly: every member starts on a 16-byte boundary
+// (vec4 / uvec4 alignment rule). Field names track OpenPBR_
+// ResolvedInputs / glTF KHR extension names so translation on the shader
+// side is a 1:1 copy.
+//
+// Texture refs (`textureRefs[16]`) are encoded with the same
+// `tex_ref_static / tex_ref_dynamic / tex_ref_none` helpers as
+// `MaterialGPU.textureRefs` — shaders branch on the top bits and either
+// sample the corresponding per-channel texture array (static) or a
+// direct sampler2D slot (dynamic). Slot ordering is documented below;
+// the indices MUST match what `packMaterialExtensions` writes and what
+// the consumer shader's Material_Ext struct reads.
+struct MaterialExtensionsGPU
+{
+  // --- Coat / clearcoat (KHR_materials_clearcoat) ---------------------
+  // x = coat_weight, y = coat_roughness, z = coat_ior, w = coat_darkening
+  float coat[4]{0.f, 0.f, 1.5f, 0.f};
+  // x = roughness_anisotropy, y = rotation_cos, z = rotation_sin, w = _pad
+  float coat_anisotropy[4]{0.f, 1.f, 0.f, 0.f};
+
+  // --- Fuzz / sheen (KHR_materials_sheen) -----------------------------
+  // xyz = color, w = roughness
+  float fuzz_color[4]{0.f, 0.f, 0.f, 0.f};
+
+  // --- Transmission + volume (KHR_materials_transmission + _volume) ---
+  // x = transmission_weight, y = transmission_depth,
+  // z = dispersion_scale,    w = dispersion_abbe_number
+  float transmission[4]{0.f, 0.f, 0.f, 20.f};
+  // xyz = transmission_color, w = scatter_anisotropy
+  float transmission_color[4]{1.f, 1.f, 1.f, 0.f};
+  // xyz = transmission_scatter (vec3), w = _pad
+  float transmission_scatter[4]{0.f, 0.f, 0.f, 0.f};
+
+  // --- Specular (KHR_materials_specular) + base specular anisotropy ---
+  // x = specular_weight, yzw = specular_color
+  float specular_weight_color[4]{1.f, 1.f, 1.f, 1.f};
+  // x = specular_ior,   y = roughness_anisotropy,
+  // z = rotation_cos,   w = rotation_sin
+  float specular_ior_anisotropy[4]{1.5f, 0.f, 1.f, 0.f};
+
+  // --- Thin-film iridescence (KHR_materials_iridescence) --------------
+  // x = thin_film_weight (iridescence factor),
+  // y = thin_film_thickness (glTF average of min/max),
+  // z = thin_film_ior, w = _pad
+  float thin_film[4]{0.f, 400.f, 1.3f, 0.f};
+
+  // --- Diffuse transmission (KHR_materials_diffuse_transmission) ------
+  // x = factor, yzw = color
+  float diffuse_transmission[4]{0.f, 1.f, 1.f, 1.f};
+
+  // --- Subsurface (OpenPBR subsurface; not present in stock glTF) -----
+  // x = weight, yzw = color
+  float subsurface_weight_color[4]{0.f, 0.8f, 0.8f, 0.8f};
+  // x = radius, yzw = radius_scale
+  float subsurface_radius_scale[4]{1.f, 1.f, 0.5f, 0.25f};
+
+  // --- Misc scalars + flags -------------------------------------------
+  // x = base_diffuse_roughness (OpenPBR Oren-Nayar knob),
+  // y = thin_walled (bool-as-float 0/1),
+  // z = _pad, w = _pad
+  float flags[4]{0.f, 0.f, 0.f, 0.f};
+
+  // --- Texture refs ---------------------------------------------------
+  // Slot layout:
+  //   0  = coat factor
+  //   1  = coat roughness
+  //   2  = coat normal
+  //   3  = fuzz color (sheen)
+  //   4  = fuzz roughness
+  //   5  = transmission
+  //   6  = specular factor
+  //   7  = specular color
+  //   8  = iridescence (thin-film)
+  //   9  = iridescence thickness
+  //   10 = anisotropy
+  //   11 = diffuse transmission
+  //   12 = diffuse transmission color
+  //   13 = subsurface factor
+  //   14 = subsurface color
+  //   15 = reserved
+  uint32_t textureRefs[16]{
+      0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu,
+      0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu,
+      0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu,
+      0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu};
+};
+
+// ─── Raw layouts (source-owned arena slots) ────────────────────────────
+//
+// Written by source halp nodes directly into their GpuResourceRegistry
+// arena slot at their own operator()() time. ScenePreprocessor reads
+// these, applies aspect-ratio / scene-graph composition, and writes the
+// cooked equivalents (CameraUBOData / LightGPU / world-transform mat4 /
+// …) that consumer shaders bind.
+
+// Camera parameters before matrix composition. No aspect ratio, no
+// view / projection matrices — the preprocessor builds those per render
+// target.
+struct RawCameraData
+{
+  float eye[4]{0.f, 0.f, 3.f, 0.f};      // xyz = world-space eye, w = pad
+  float target[4]{0.f, 0.f, 0.f, 0.f};   // xyz = look-at target,  w = pad
+  float up[4]{0.f, 1.f, 0.f, 0.f};       // xyz = up,              w = pad
+  float yfov{60.f * 3.14159265f / 180.f}; // vertical FOV, radians
+  float znear{0.1f};
+  float zfar{1000.f};
+  uint32_t projection{0};                // 0 = perspective, 1 = orthographic
+};
+
+// Light parameters in local frame. The final world-space direction
+// depends on the node's world transform (composed by the preprocessor
+// from its scene-node parent chain); this struct stores only what the
+// node itself knows.
+struct RawLightData
+{
+  float color[4]{1.f, 1.f, 1.f, 1.f};          // xyz = color, w = intensity
+  float local_direction[4]{0.f, 0.f, -1.f, 0.f}; // xyz = dir (local),
+                                                  // w = type enum:
+                                                  //   0 = directional
+                                                  //   1 = point
+                                                  //   2 = spot
+                                                  // (area / dome modes
+                                                  // collapse to point /
+                                                  // directional; dome
+                                                  // lights are served by
+                                                  // the scene-global env
+                                                  // path, see EnvParamsUBO.)
+  float range_cone[4]{                          // x = range (point/spot;
+      0.f, 1.f, 0.7071f, 0.005f};                //     0 = infinite)
+                                                  // y = inner cone cos
+                                                  // z = outer cone cos
+                                                  // w = shadow depth bias
+  // Shadow gate — consumer shadow-receiving shaders (classic_pbr_shadowed,
+  // etc.) MUST multiply the computed shadow term by `shadow_enabled != 0`
+  // so lights with shadow casting disabled fall through to unoccluded
+  // lighting. Per-light, per-frame opt-out; separate from the
+  // per-material shadow_caster_disabled bit (which controls whether a
+  // draw participates in the depth-only cast pass).
+  uint32_t shadow_enabled{0};
+  uint32_t decay_mode{2};                        // 0=const 1=lin 2=quad 3=cubic
+  // RawTransform arena slot index for this light's scene_transform.
+  // Consumer shader reads world_transforms.data[transform_slot] to
+  // get the world matrix, composes world-space direction / position
+  // from local_direction on the fly. Replaces the preprocessor's
+  // CPU-side packLight world composition (task 28b).
+  uint32_t transform_slot{0};
+  // Receiver-plane / slope-scaled bias for shadow sampling. The UI
+  // already exposes this via Light::inputs.shadow_normal_bias; the
+  // slot was previously dead padding. PCF shaders add
+  // `normal_bias * (1 - max(dot(N, Ldir), 0))` to the receiver depth
+  // before the comparison to kill shadow acne on grazing surfaces.
+  float normal_bias{0.01f};
+};
+static_assert(sizeof(RawLightData) == 64, "RawLightData must stay 64 B");
+
+// Local TRS for a scene_transform. Stamped by the producer and uploaded
+// into a RawTransform arena slot. Hierarchy resolution (parent-chain
+// composition) stays on the CPU side inside ScenePreprocessor's
+// FlattenVisitor — the 2026-standard pattern across UE5 / Bevy /
+// Unity DOTS / Godot: scene hierarchy is too small-N for GPU-side
+// wavefront evaluation to win. The composed world matrix for each
+// transform ends up in the WorldTransform arena at the same offset
+// that the RawTransform slot occupies.
+struct RawLocalTransform
+{
+  float translation[4]{0.f, 0.f, 0.f, 0.f}; // xyz + pad
+  float rotation[4]{0.f, 0.f, 0.f, 1.f};    // quaternion xyzw
+  float scale[4]{1.f, 1.f, 1.f, 0.f};       // xyz + pad
+  float _pad[4]{};                           // std430 alignment
+};
+
+// Environment parameters (ambient, fog, exposure, gamma). Already
+// view-independent — this is both Raw (source-written) and Cooked
+// (shader-bound) in one struct. Published here so EnvironmentLoader
+// can write its own slot bytes matching what ScenePreprocessor expects
+// on the other end.
+struct EnvParamsUBO
+{
+  float ambient[4]{0.03f, 0.03f, 0.03f, 1.f};        // xyz = color, w = intensity
+  float fog_color_density[4]{0.8f, 0.8f, 0.8f, 0.f}; // xyz = color, w = density
+  float fog_range[4]{10.f, 100.f, 0.f, 0.f};         // x = start, y = end,
+                                                      // z = mode, w = enabled (0/1)
+  float exposure_gamma[4]{1.f, 2.2f, 0.f, 0.f};      // x = exposure (linear),
+                                                      // y = gamma, zw = pad
+};
+
+// World-space mat4 emitted by ScenePreprocessor's FlattenVisitor from
+// the scene_node tree (CPU walk with parent-chain accumulation). One
+// entry per producer-authored scene_transform, laid out at the same
+// byte offset as the producer's RawTransform slot so shaders can
+// address either side by `scene_transform::raw_slot.offset`.
+struct WorldTransformMat4
+{
+  float m[16]{1.f, 0.f, 0.f, 0.f,
+              0.f, 1.f, 0.f, 0.f,
+              0.f, 0.f, 1.f, 0.f,
+              0.f, 0.f, 0.f, 1.f};
+};
+
+// Shadow cascades UBO — scene-wide, published by ScenePreprocessor as
+// the `shadow_cascades` aux on the output geometry. Shading shaders
+// (classic_pbr_shadowed) read this to pick the right cascade per
+// fragment and sample the depth-array texture. The depth-only pass
+// (shadow_cascades.vert / .frag) also reads light_view_proj from this
+// UBO to transform vertices into cascade clip-space; its per-invocation
+// `cascade_index` lives in a separate `shadow_draw_cfg` UBO so the
+// two use-cases don't fight for the same binding.
+//
+// std140 layout, 560 B total. Fields mirror
+// `ossia::shadow_cascades_info` in geometry_port.hpp:
+//   light_view_proj[8]           — world → cascade clip-space per cascade
+//   cascade_split_distances[8]   — view-space far-plane Z for cascades 0..7;
+//                                  entry k is the far plane of cascade k.
+//                                  Slots >= cascade_count read as 0.
+//   cascade_count                — how many cascade entries are live (0..8)
+struct ShadowCascadesUBO
+{
+  float light_view_proj[8][16]{};
+  // 8 split distances symmetric with light_view_proj[8].
+  // std140: two consecutive vec4 rows (32 B total).
+  float cascade_split_distances[8]{};
+  uint32_t cascade_count{0};
+  uint32_t _pad0{};
+  uint32_t _pad1{};
+  uint32_t _pad2{};
+};
+static_assert(sizeof(ShadowCascadesUBO) == 560,
+              "ShadowCascadesUBO size = mat4[8] (512) + float[8] (32) + 4×uint (16) = 560 B");
+
+#pragma pack(pop)
+
+// CPU-side flattened scene representation.
+struct DrawCall
+{
+  // Points at either a mesh from geometry_ref (legacy_geometry path) OR at
+  // owned_mesh (mesh_primitive path). `mesh` is always non-null for a valid
+  // draw; one of geometry_ref or owned_mesh keeps the target alive.
+  const ossia::geometry* mesh{};
+  ossia::geometry_spec geometry_ref;            // Legacy path: keeps source alive.
+  std::shared_ptr<ossia::geometry> owned_mesh;  // Primitive path: built from mesh_primitive.
+
+  // Stable cross-frame identity of the source mesh primitive. Unlike
+  // `mesh`, which for the primitive path points into a freshly-allocated
+  // ossia::geometry wrapper (different pointer every flatten call), this
+  // is the source mesh_primitive's stable_id (or the raw pointer bits as
+  // a fallback when the primitive was emitted by a legacy producer that
+  // hasn't stamped a stable_id yet). Used by ScenePreprocessor to detect
+  // "mesh list unchanged vs last frame" and skip vertex/index re-uploads.
+  uint64_t stable_id{};
+
+  QMatrix4x4 worldTransform;
+
+  // Direct shared_ptr to the material — null means "no material / use
+  // the renderer's default factors". Carries the material's gpu_slot_ref
+  // for GPU-side lookup without any scene-wide index array.
+  ossia::material_component_ptr material;
+
+  // Direct shared_ptr to the skin — null means "no skinning". When
+  // present, the ScenePreprocessor attaches a `joint_matrices` auxiliary
+  // buffer to this draw's output geometry; a downstream skinning compute
+  // pass (or user shader) deforms positions/normals using
+  // joints0/weights0 vertex attributes.
+  ossia::skeleton_component_ptr skin;
+
+  // Index into FlatScene::materials after the flatten pass has
+  // deduplicated the material pointers into its flat materials array.
+  // -1 means "material was null / default factors only". Set by
+  // flattenScene after collecting all draws.
+  int materialIndex{-1};
+
+  // Index into FlatScene::skins after dedup. -1 = no skinning.
+  int skinIndex{-1};
+
+  // Local-space AABB of the source mesh_primitive. Copied by the
+  // FlattenVisitor from mesh_primitive::bounds. Empty (inverted) if the
+  // source didn't compute bounds — downstream per_draw_bounds emitter
+  // writes an infinite AABB in that case so GPU culling shaders never
+  // cull the draw.
+  ossia::aabb local_bounds{};
+
+  // RawTransform arena slot of the nearest producer-authored
+  // scene_transform on this draw's walk path (0xFFFFFFFF = none). Stamped
+  // into PerDrawGPU.transform_slot so shaders can look up
+  // world_transforms_prev[slot] for motion vectors / TAA / reprojection.
+  std::uint32_t transform_slot{0xFFFFFFFFu};
+};
+
+// Per-skeleton packed joint matrices: joint_matrix[i] = world_joint × inverse_bind.
+// One std::vector<QMatrix4x4> per skeleton index (parallel to scene_state.skeletons).
+struct SkeletonGPU
+{
+  std::vector<QMatrix4x4> joint_matrices;
+};
+
+// World-matrix emission: one entry per producer-authored
+// scene_transform seen during the walk. The preprocessor's private
+// world-transforms SSBO (m_worldTransformsBuffer) is laid out as a
+// packed array indexed by the scene_transform's `raw_slot.internal_index`
+// (the RawTransform arena slot index). Consumer shaders read
+// `world_transforms.data[transform_slot]` for any light / particle /
+// compute pass that needs to transform a local-space quantity into
+// world space for a specific slot-addressable transform.
+//
+// Multi-preprocessor correctness: each preprocessor owns its own
+// m_worldTransformsBuffer, so two preprocessors with different filtered
+// views of the same source scene legitimately compute different world
+// matrices for the same scene_transform without stomping each other.
+struct WorldTransformEmit
+{
+  QMatrix4x4 world;
+  uint32_t transform_slot;  // RawTransform arena slot index
+};
+
+struct FlatScene
+{
+  std::vector<DrawCall> draws;
+  // RawLight arena slot index per light the walk encountered.
+  // 0xFFFFFFFF for producer-less lights (filtered out when building
+  // scene_light_indices, the shader-facing compact indices list).
+  std::vector<uint32_t> lightArenaSlots;
+  std::vector<MaterialGPU> materials;
+  // Parallel to `materials` — same size, same indexing. Zeroed
+  // (OpenPBR spec defaults) for materials whose scene material_component
+  // doesn't set any extension fields. Consumer shaders either ignore
+  // this SSBO entirely (classic_pbr, classic_pbr_textured, …) or bind
+  // it as `scene_materials_ext` to pick up the full OpenPBR parameter
+  // set (classic_pbr_openpbr).
+  std::vector<MaterialExtensionsGPU> material_extensions;
+  std::vector<SkeletonGPU> skins;  // Parallel to scene_state.skeletons.
+
+  // World matrices to upload into the WorldTransform arena, one per
+  // producer-authored scene_transform encountered in the walk whose
+  // raw_slot is valid. Sparse: the arena is indexed by offset, not
+  // by position in this vector.
+  std::vector<WorldTransformEmit> worldTransforms;
+
+  // Loader-emitted scene_data payloads, collected during the walk.
+  // ScenePreprocessor forwards each entry as an auxiliary_buffer on every output
+  // geometry (by name). Lifetime held via shared_ptr.
+  std::vector<ossia::scene_data_ptr> scene_data;
+
+  // Instance components encountered during the walk. Each pair is a
+  // (worldTransform, instance_component_ptr) that the ScenePreprocessor emits as
+  // a dedicated instanced DrawCall with per-instance auxiliaries.
+  struct InstanceDraw
+  {
+    ossia::instance_component_ptr instance;
+    QMatrix4x4 worldTransform;
+  };
+  std::vector<InstanceDraw> instances;
+
+  // Primitive cloud (splat / point-cloud) entries. Format-agnostic
+  // payloads whose schema is described by their CSF chain (one
+  // AUXILIARY with LAYOUT). ScenePreprocessor buckets these by
+  // `format_id` and emits one indirect-draw geometry per bucket;
+  // entries with empty format_id are bucketed individually keyed on
+  // their stable id.
+  struct PrimitiveCloudDraw
+  {
+    ossia::primitive_cloud_component_ptr cloud;
+    QMatrix4x4 worldTransform;
+    // RawTransform arena slot index, or 0xFFFFFFFFu if no producer
+    // transform was on the walk path. Mirrors PerDrawGPU.transform_slot.
+    uint32_t transform_slot{0xFFFFFFFFu};
+  };
+  std::vector<PrimitiveCloudDraw> primitive_clouds;
+
+  // Cameras collected from the scene tree. Each entry keeps its source
+  // camera_component alive, its accumulated world transform (column 3 =
+  // eye position, inverse = view matrix), and the scene_node_id of the
+  // node it was attached to so consumers can resolve `active_camera_id`.
+  struct CameraEntry
+  {
+    ossia::camera_component_ptr component;
+    QMatrix4x4 worldTransform;
+    ossia::scene_node_id node_id{};
+  };
+  std::vector<CameraEntry> cameras;
+
+  // Index into `cameras` of the currently-active camera. -1 when the scene
+  // has no cameras; in that case downstream falls back to a default eye
+  // placement (see the legacy single-camera fields below, populated from
+  // this slot if valid or from a default otherwise).
+  int activeCameraIndex{-1};
+
+  // Camera (from scene or override) — legacy mirror fields. Kept populated
+  // for consumers that haven't migrated to `cameras[activeCameraIndex]`
+  // yet. Resolved by flattenScene() after the tree walk:
+  //   - cameras empty   → sensible default (eye at (0,1,3))
+  //   - cameras nonempty → copied from cameras[activeCameraIndex]
+  QMatrix4x4 viewMatrix;
+  QMatrix4x4 projectionMatrix;
+  QVector3D cameraPosition;
+  float cameraFov{60.f};
+  float cameraNear{0.1f};
+  float cameraFar{1000.f};
+
+  bool hasCamera{false};
+
+  void clear()
+  {
+    draws.clear();
+    lightArenaSlots.clear();
+    materials.clear();
+    material_extensions.clear();
+    skins.clear();
+    scene_data.clear();
+    instances.clear();
+    primitive_clouds.clear();
+    cameras.clear();
+    worldTransforms.clear();
+    activeCameraIndex = -1;
+    hasCamera = false;
+  }
+};
+
+// Flatten a scene_spec into a FlatScene for GPU consumption.
+void flattenScene(
+    const ossia::scene_spec& scene,
+    FlatScene& out,
+    float aspectRatio);
+
+// Build a transient ossia::geometry that wraps a mesh_primitive's buffers
+// and attributes. The result is heap-allocated and owned by shared_ptr so
+// callers can keep it alive beyond the flatten pass (see DrawCall::owned_mesh).
+std::shared_ptr<ossia::geometry>
+primitiveToGeometry(const ossia::mesh_primitive& prim);
+
+MaterialGPU packMaterial(const ossia::material_component& mc);
+MaterialExtensionsGPU packMaterialExtensions(const ossia::material_component& mc);
+}
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/ScenePreprocessorNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/ScenePreprocessorNode.cpp
new file mode 100644
index 0000000000..6217a96d82
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/ScenePreprocessorNode.cpp
@@ -0,0 +1,5559 @@
+#include "Gfx/Graph/GpuResourceRegistry.hpp"
+
+#include <Gfx/AssetTable.hpp>
+#include <Gfx/Graph/CameraMath.hpp>
+#include <Gfx/Graph/CustomMesh.hpp>
+#include <Gfx/Graph/NodeRenderer.hpp>
+#include <Gfx/Graph/RenderList.hpp>
+#include <Gfx/Graph/RhiClearBuffer.hpp>
+#include <Gfx/Graph/RhiComputeBarrier.hpp>
+#include <Gfx/Graph/SceneGPUState.hpp>
+#include <Gfx/Graph/ScenePreprocessorNode.hpp>
+#include <Gfx/Graph/TextureLoader.hpp>
+
+#include <ossia/dataflow/geometry_port.hpp>
+#include <ossia/detail/flat_map.hpp>
+#include <ossia/detail/hash.hpp>
+#include <ossia/detail/hash_map.hpp>
+#include <ossia/network/value/value_conversion.hpp>
+
+#include <QByteArray>
+#include <QImage>
+#include <QQuaternion>
+
+#include <algorithm>
+#include <array>
+#include <chrono>
+#include <cstring>
+#include <limits>
+
+namespace score::gfx
+{
+
+namespace
+{
+
+// std430 layout matching the `per_draw` AUXILIARY block declared in the
+// preset rasterizer shaders. Lays down model + normal matrices, the
+// material index, and a 32-bit tag hash (rapidhash of material.tag,
+// truncated to 32 bits — same primitive that produces filter_tag and
+// content_hash everywhere else in the pipeline) for downstream
+// per-pass filtering.
+//
+// `transform_slot` indexes into the `world_transforms` /
+// `world_transforms_prev` SSBOs — motion-vector / TAA / reprojection
+// shaders do `world_transforms_prev.data[pd.transform_slot]` to recover
+// the previous-frame world matrix of this draw. 0xFFFFFFFF = no
+// producer-authored transform on the walk path (draw anchored to the
+// identity or a loader-interior transform); shaders must treat this as
+// "motion = zero" / "no prev data".
+//
+// `skeleton_offset` is the offset (in joint-matrix units) where this
+// draw's skeleton begins inside a consolidated joint_matrices buffer.
+// 0xFFFFFFFF = unskinned draw. Today joint_matrices is bound per-draw
+// and the offset is functionally always 0 for skinned draws, but we
+// stamp the correct concat-offset here so a future consolidation that
+// switches to a single arena-style joint_matrices SSBO does not need a
+// PerDrawGPU layout change.
+struct PerDrawGPU
+{
+  float model[16]{};
+  float normal[16]{};   // mat3 padded as mat4 to keep std430 alignment trivial
+  uint32_t material_index{};
+  uint32_t tag_hash{};
+  uint32_t transform_slot{0xFFFFFFFFu};
+  uint32_t skeleton_offset{0xFFFFFFFFu};
+};
+static_assert(sizeof(PerDrawGPU) == 144, "PerDrawGPU layout must match shader");
+
+// Local-space AABB per draw. Emitted as the `per_draw_bounds` auxiliary
+// SSBO (sidecar to `per_draws`, same indexing by drawID / gl_BaseInstance).
+// Consumer shaders transform to world space via Arvo's algorithm against
+// PerDrawGPU.model and test against the camera's frustum planes for
+// GPU frustum / HiZ occlusion culling.
+//
+// Sentinel convention: when the source mesh didn't compute bounds, we
+// emit an "infinite" AABB (min = -FLT_MAX, max = +FLT_MAX) so culling
+// shaders leave the draw alone rather than degenerating to a point at
+// the origin.
+struct PerDrawBoundsGPU
+{
+  float aabb_min[4]{};  // xyz = local-space min, w = unused (padding)
+  float aabb_max[4]{};  // xyz = local-space max, w = unused (padding)
+};
+static_assert(sizeof(PerDrawBoundsGPU) == 32,
+              "PerDrawBoundsGPU layout must match shader (2 × vec4)");
+
+// Pack an ossia::aabb into PerDrawBoundsGPU. Empty (inverted) input means
+// the source mesh didn't compute bounds — emit a ±FLT_MAX "infinite" box
+// so culling shaders never cull the draw. This keeps sources that can't
+// easily supply bounds (GPU-resident procedural meshes like PBRMesh)
+// rendering correctly through a cull pass.
+inline PerDrawBoundsGPU packBounds(const ossia::aabb& b) noexcept
+{
+  PerDrawBoundsGPU g{};
+  if(b.empty())
+  {
+    constexpr float kPos = std::numeric_limits<float>::max();
+    constexpr float kNeg = -std::numeric_limits<float>::max();
+    g.aabb_min[0] = kNeg; g.aabb_min[1] = kNeg; g.aabb_min[2] = kNeg;
+    g.aabb_max[0] = kPos; g.aabb_max[1] = kPos; g.aabb_max[2] = kPos;
+  }
+  else
+  {
+    g.aabb_min[0] = b.min[0]; g.aabb_min[1] = b.min[1]; g.aabb_min[2] = b.min[2];
+    g.aabb_max[0] = b.max[0]; g.aabb_max[1] = b.max[1]; g.aabb_max[2] = b.max[2];
+  }
+  return g;
+}
+
+// MaterialGPU = 4 × vec4 in the shader (baseColor, MR-occlusion-unlit,
+// emissive_strength, textureRefs). Layout drift here silently corrupts
+// every textured draw — keep the size check.
+static_assert(sizeof(MaterialGPU) == 80, "MaterialGPU layout must match shader");
+
+// Per-material per-channel UV transforms (KHR_texture_transform).
+// 5 channels × (offset.xy + scale.xy) + rotations packed in 2 vec4
+// = 7 vec4 = 112 B. Channels match MaterialChannel enum: 0=BC, 1=MR,
+// 2=Normal, 3=Em, 4=Occlusion. Identity transform: offset=(0,0),
+// scale=(1,1), rotation=0 — the default-constructed value, which
+// makes glTFs without the extension pass through `(uv) → uv` and
+// incur zero shader cost.
+struct MaterialUVTransformGPU
+{
+  float bc_offset_scale[4]{0.f, 0.f, 1.f, 1.f};      // ox, oy, sx, sy
+  float mr_offset_scale[4]{0.f, 0.f, 1.f, 1.f};
+  float normal_offset_scale[4]{0.f, 0.f, 1.f, 1.f};
+  float em_offset_scale[4]{0.f, 0.f, 1.f, 1.f};
+  float occ_offset_scale[4]{0.f, 0.f, 1.f, 1.f};
+  float rotations0[4]{0.f, 0.f, 0.f, 0.f};           // bc, mr, nrm, em (radians)
+  float rotations1[4]{0.f, 0.f, 0.f, 0.f};           // occ, _pad×3
+};
+static_assert(sizeof(MaterialUVTransformGPU) == 112,
+              "MaterialUVTransformGPU layout must match shader (7 × vec4)");
+
+// Material texture channels. Each channel has its own QRhiTextureArray with
+// the appropriate pixel format (sRGB vs linear) and dedup map. Index into
+// MaterialGPU::textureRefs[].
+enum MaterialChannel : int
+{
+  ChannelBaseColor = 0,
+  ChannelMetalRough = 1,
+  ChannelNormal = 2,
+  ChannelEmissive = 3,
+  ChannelOcclusion = 4,  // Separate glTF occlusionTexture (when distinct from MR).
+  ChannelCount = 5
+};
+
+// Whole texture_ref for a given channel, or nullptr for out-of-range.
+// Used by both the static path (reads .source) and the dynamic path
+// (reads .texture.native_handle).
+inline const ossia::texture_ref*
+channelRef(MaterialChannel ch, const ossia::material_component& m) noexcept
+{
+  switch(ch)
+  {
+    case ChannelBaseColor:  return &m.base_color_texture;
+    case ChannelMetalRough: return &m.metallic_roughness_texture;
+    case ChannelNormal:     return &m.normal_texture;
+    case ChannelEmissive:   return &m.emissive_texture;
+    case ChannelOcclusion:  return &m.occlusion_texture;
+    default:                return nullptr;
+  }
+}
+
+// Shader-visible name for each channel — matches the INPUT entries consuming
+// shaders declare (sampler2DArray baseColorArray; etc). Names follow the
+// existing classic_pbr_textured convention (camelCase) so the aux-texture
+// auto-resolve path slots in without shader edits.
+inline const char* channelName(MaterialChannel ch) noexcept
+{
+  switch(ch)
+  {
+    case ChannelBaseColor:  return "baseColorArray";
+    case ChannelMetalRough: return "metalRoughArray";
+    case ChannelNormal:     return "normalArray";
+    case ChannelEmissive:   return "emissiveArray";
+    case ChannelOcclusion:  return "occlusionArray";
+    default:                return "";
+  }
+}
+
+// Dynamic-slot aux-texture name base. The full name is
+// `<base><slot_index>` (e.g., "baseColorDyn0"), matching the uniform
+// names consumer shaders declare for the dynamic branch.
+inline const char* channelDynBaseName(MaterialChannel ch) noexcept
+{
+  switch(ch)
+  {
+    case ChannelBaseColor:  return "baseColorDyn";
+    case ChannelMetalRough: return "metalRoughDyn";
+    case ChannelNormal:     return "normalDyn";
+    case ChannelEmissive:   return "emissiveDyn";
+    case ChannelOcclusion:  return "occlusionDyn";
+    default:                return "";
+  }
+}
+
+// Authoritative kMaxDynamicSlots constant lives on
+// GpuResourceRegistry::kMaxDynamicSlots (header). Removed the local
+// duplicate that drifted out of sync; the registry value is what actually
+// gates the dynamic-slot cap (see resolveDynamicSlot at line ~386 in
+// GpuResourceRegistry.cpp).
+
+// sRGB channels (base color, emissive) get hardware sRGB→linear on sample.
+// Metallic-roughness and normal are data, not color — must stay linear.
+inline QRhiTexture::Flags channelFlags(MaterialChannel ch) noexcept
+{
+  switch(ch)
+  {
+    case ChannelBaseColor:
+    case ChannelEmissive:
+      return QRhiTexture::sRGB;
+    default:
+      return {};
+  }
+}
+
+// =============================================================================
+// Ext-texture slot routing (KHR_materials_*)
+// =============================================================================
+//
+// Each MaterialExtensionsGPU::textureRefs[slot] is fed by an ext texture from
+// material_component, registered into one of the 5 existing channel pools
+// (BaseColor / MetalRough / Normal). Pool choice = format expectation:
+//   ChannelBaseColor  → sRGB color textures (sheen color, specular color,
+//                       diffuse-transmission color, subsurface color)
+//   ChannelMetalRough → linear scalar/factor textures (clearcoat factor +
+//                       roughness, sheen roughness, transmission, specular
+//                       factor, iridescence, diffuse-transmission factor,
+//                       subsurface factor)
+//   ChannelNormal     → tangent-space data (clearcoat normal, anisotropy
+//                       direction)
+//
+// Slot numbering matches MaterialExtensionsGPU::textureRefs[] documented in
+// SceneGPUState.hpp — they MUST stay in sync; this table is the loader-side
+// counterpart of the shader-side switch (see classic_pbr_openpbr.frag).
+//
+// Slots 13/14 (subsurface factor / color) and 15 (reserved) are intentionally
+// absent from this table: stock glTF has no SSS extension and material_
+// component carries no source texture_ref to drive them. Future loaders
+// growing `material_component::subsurface` fields can extend the table
+// here — the rebuild + patch walkers iterate kExtTextureSlots without
+// hard-coded slot count, so a single new entry is all it takes.
+struct ExtTextureSlot
+{
+  int slot;                 // 0..15 in MaterialExtensionsGPU::textureRefs
+  MaterialChannel channel;  // which existing pool this texture lands in
+  // Accessor returns a reference into `m`'s ext struct; the caller does
+  // its `valid()` / `source.get()` test on the resulting texture_ref.
+  // Returning by reference avoids dangling on temporary structs the
+  // accessor would have to construct otherwise.
+  const ossia::texture_ref& (*accessor)(const ossia::material_component& m);
+};
+
+inline constexpr ExtTextureSlot kExtTextureSlots[] = {
+    // KHR_materials_clearcoat — slots 0..2.
+    { 0,  ChannelMetalRough,
+      +[](const ossia::material_component& m) -> const ossia::texture_ref& {
+          return m.clearcoat.texture; } },
+    { 1,  ChannelMetalRough,
+      +[](const ossia::material_component& m) -> const ossia::texture_ref& {
+          return m.clearcoat.roughness_texture; } },
+    { 2,  ChannelNormal,
+      +[](const ossia::material_component& m) -> const ossia::texture_ref& {
+          return m.clearcoat.normal_texture; } },
+
+    // KHR_materials_sheen — slots 3..4.
+    { 3,  ChannelBaseColor,
+      +[](const ossia::material_component& m) -> const ossia::texture_ref& {
+          return m.sheen.color_texture; } },
+    { 4,  ChannelMetalRough,
+      +[](const ossia::material_component& m) -> const ossia::texture_ref& {
+          return m.sheen.roughness_texture; } },
+
+    // KHR_materials_transmission — slot 5.
+    { 5,  ChannelMetalRough,
+      +[](const ossia::material_component& m) -> const ossia::texture_ref& {
+          return m.transmission.texture; } },
+
+    // KHR_materials_specular — slots 6..7.
+    { 6,  ChannelMetalRough,
+      +[](const ossia::material_component& m) -> const ossia::texture_ref& {
+          return m.specular.texture; } },
+    { 7,  ChannelBaseColor,
+      +[](const ossia::material_component& m) -> const ossia::texture_ref& {
+          return m.specular.color_texture; } },
+
+    // KHR_materials_iridescence — slots 8..9.
+    { 8,  ChannelMetalRough,
+      +[](const ossia::material_component& m) -> const ossia::texture_ref& {
+          return m.iridescence.texture; } },
+    { 9,  ChannelMetalRough,
+      +[](const ossia::material_component& m) -> const ossia::texture_ref& {
+          return m.iridescence.thickness_texture; } },
+
+    // KHR_materials_anisotropy — slot 10.
+    { 10, ChannelNormal,
+      +[](const ossia::material_component& m) -> const ossia::texture_ref& {
+          return m.anisotropy.texture; } },
+
+    // KHR_materials_diffuse_transmission — slots 11..12.
+    { 11, ChannelMetalRough,
+      +[](const ossia::material_component& m) -> const ossia::texture_ref& {
+          return m.diffuse_transmission.texture; } },
+    { 12, ChannelBaseColor,
+      +[](const ossia::material_component& m) -> const ossia::texture_ref& {
+          return m.diffuse_transmission.color_texture; } },
+};
+
+QMatrix4x4 transformToMatrix(const ossia::scene_transform& t)
+{
+  QMatrix4x4 mat;
+  mat.translate(t.translation[0], t.translation[1], t.translation[2]);
+  mat.rotate(QQuaternion(t.rotation[3], t.rotation[0], t.rotation[1], t.rotation[2]));
+  mat.scale(t.scale[0], t.scale[1], t.scale[2]);
+  return mat;
+}
+
+// writeMat4 comes from Gfx/Graph/CameraMath.hpp (included above) — same
+// signature, column-major memcpy. Keeping a local copy would create an
+// ambiguous overload at every call site.
+
+}
+
+struct RenderedScenePreprocessorNode final : NodeRenderer
+{
+  // Texture arrays now live in GpuResourceRegistry and are destroyed
+  // by RenderList::release → registry.destroy(). Nothing to clean up
+  // here — the destructor is defaulted.
+
+  const ScenePreprocessorNode& m_node;
+
+  // Output owned GPU buffers (one set per flatten cycle). Sized to scene needs.
+  // scene_light_indices SSBO: compact list of RawLight arena slot
+  // indices for the current scene's live lights. Shader iterates
+  // 0..scene_counts.light_count and reads
+  // scene_lights.entries[scene_light_indices.data[i]] (task 28b phase 3).
+  QRhiBuffer* m_lightIndicesBuffer{};
+  int64_t m_lightIndicesCap{};
+  std::vector<uint32_t> m_cachedLightIndices;
+  // scene_materials is now served by the Material arena directly
+  // (registry.buffer(Arena::Material)) — no preprocessor-owned mirror.
+  // MaterialExtensions stays preprocessor-owned pending its own arena
+  // migration (larger struct, less pressure to move).
+  QRhiBuffer* m_materialsExtBuffer{};  // MaterialExtensionsGPU[]
+  // KHR_texture_transform: per-material per-channel UV offset/scale/
+  // rotation. Parallel to scene_materials, indexed by material_index.
+  // Identity for materials without the extension (zero shader cost).
+  QRhiBuffer* m_materialUVTransformsBuffer{};
+  int64_t m_materialUVTransformsCap{};
+  std::vector<MaterialUVTransformGPU> m_cachedMaterialUVTransforms;
+
+  // One QRhiBuffer per forwarded scene_data entry — allocated when the
+  // scene_data carries CPU-side `buffer_data`, borrowed from the upstream
+  // when it already holds a `gpu_buffer_handle`. Parallel to fs.scene_data.
+  struct SceneDataBinding
+  {
+    QRhiBuffer* buffer{};
+    std::string name;
+    int64_t byte_size{};
+    bool owned{false};
+  };
+  std::vector<SceneDataBinding> m_sceneDataBuffers;
+
+  // One per skeleton in scene_state.skeletons, holding the packed
+  // joint_matrices (mat4[N]). Grow-only; skinned draws attach one of these
+  // as a `joint_matrices` auxiliary.
+  struct SkinBinding
+  {
+    QRhiBuffer* buffer{};
+    int64_t capacity{};
+    int64_t byte_size{};
+  };
+  std::vector<SkinBinding> m_skinBuffers;
+
+  // std140-packed counts UBO: shaders read `scene_counts.light_count`,
+  // `.material_count`, `.draw_count` instead of `scene_lights.entries
+  // .length()`, so the SSBOs can keep their growth-only capacity without
+  // forcing shaders to iterate ghost tail entries. Uploaded on every
+  // change (partial uploads to scene_lights etc. may leave dead tail
+  // slots when counts shrink, and we want the shader to ignore them).
+  struct SceneCountsUBO
+  {
+    uint32_t light_count{};
+    uint32_t material_count{};
+    uint32_t draw_count{};
+    uint32_t _pad0{};
+  };
+  static_assert(sizeof(SceneCountsUBO) == 16, "scene_counts UBO layout");
+  QRhiBuffer* m_sceneCountsBuffer{};
+  SceneCountsUBO m_cachedSceneCounts{~0u, ~0u, ~0u, 0u};
+
+  // `shadow_cascades` aux UBO — light_view_proj[8] + split distances +
+  // cascade_count. Populated from `scene.state->shadow_cascades` (authored
+  // upstream by ShadowCascadeSetup). Diff-uploaded against the cached
+  // snapshot; unchanged frames cost zero bytes. Emitted to downstream as
+  // an `auxiliary_buffer` named "shadow_cascades" — classic_pbr_shadowed
+  // reads it to PCF-sample the right cascade; shadow_cascades.vert reads
+  // its `light_view_proj` array to transform vertices into cascade
+  // clip-space (its per-invocation `cascade_index` lives in a separate
+  // `shadow_draw_cfg` UBO that the depth-pass pipeline binds locally).
+  QRhiBuffer* m_shadowCascadesBuffer{};
+  ShadowCascadesUBO m_cachedShadowCascades{};
+  bool m_shadowCascadesSeeded{false};
+
+  // Per-camera std140 UBO array. Size = max(1, ncameras) * sizeof(CameraUBOData).
+  // First entry is always the active camera (resolved by flattenScene from
+  // scene_state.active_camera_id). When the scene has no cameras we publish
+  // a single default entry so the shader never sees a null binding.
+  // Bound as the `camera` aux buffer on Geometry Out — try_bind_from_geometry
+  // in the shader consumer resolves it by port name.
+  QRhiBuffer* m_camerasBuffer{};
+  int64_t m_camerasCap{};
+  std::vector<CameraUBOData> m_cachedCameras;
+
+  // One-frame history for motion-vector reprojection. Bound as the aux UBO
+  // `camera_prev`; consumer post-process shaders reconstruct world position
+  // from current depth + current camera, then reproject through this.
+  // On the first frame (no history) we seed prev = current so MV = 0.
+  // Filled each frame from m_cachedCameras BEFORE m_camerasBuffer is
+  // overwritten — same "GPU snapshot of last frame" semantics as
+  // m_worldTransformsPrevBuffer, just on a Dynamic UBO via CPU shadow
+  // upload instead of copyBuffer (which Dynamic UBOs don't support).
+  QRhiBuffer* m_camerasPrevBuffer{};
+
+  // Per-frame guard for packAndUploadCameras. update() is invoked once
+  // per outgoing edge by RenderList::renderInternal — for a
+  // ScenePreprocessor with N consumers, that's N calls per frame. The
+  // camera-prev semantic ("upload m_cachedCameras BEFORE overwriting
+  // it with fresh") only holds on the first call; on the second call,
+  // m_cachedCameras has already been replaced by fresh, so re-running
+  // would clobber camera_prev with current camera content.
+  // Keep packAndUploadCameras idempotent within a frame by tracking
+  // the last frame index we ran on (RenderList::frame, incremented at
+  // the end of each renderInternal). -1 = not yet run.
+  int64_t m_lastCameraUploadFrame{-1};
+
+  // Per-preprocessor world-transforms SSBO. One WorldTransformMat4 per
+  // producer-authored scene_transform seen during the walk, laid out in
+  // walk order. Not a shared registry arena — different preprocessors
+  // consuming different filtered views of the same source scene
+  // legitimately compute different world matrices for the same
+  // scene_transform, so each keeps its own buffer. Consumer shaders
+  // bind `world_transforms` by aux name and index via
+  // `per_draws[draw_id].transform_slot`.
+  QRhiBuffer* m_worldTransformsBuffer{};
+  int64_t m_worldTransformsCap{0};
+
+  // Previous-frame snapshot of m_worldTransformsBuffer. Bound as the
+  // `world_transforms_prev` aux buffer on Geometry Out; consumer
+  // shaders read it alongside `world_transforms` for motion-vector /
+  // TAA / reprojection passes. Maintained by a deferred-write scheme:
+  // update() stashes this frame's per-slot WorldTransformMat4 writes
+  // into m_pendingWorldXformWrites WITHOUT touching the resource-
+  // update batch. runInitialPasses then (a) issues a single GPU-side
+  // copyBuffer(current → prev) on the command buffer — at this point
+  // current still holds frame-N-1 data because the deferred writes
+  // haven't been applied yet — then (b) drains the pending list into
+  // the next resource-update batch (`res`), which RenderList submits
+  // AFTER runInitialPasses returns. Net: prev captures frame N-1's
+  // state, current then receives frame N's writes; consumer render
+  // passes downstream see the correct (prev, current) pair.
+  // Same Static + StorageBuffer constraint as the current buffer
+  // (QRhi forbids Dynamic + StorageBuffer).
+  QRhiBuffer* m_worldTransformsPrevBuffer{};
+
+  // Per-slot world-transform writes deferred from update() to
+  // runInitialPasses so that the prev-snapshot copy captures frame
+  // N-1 data before frame N's writes overwrite current. Drained once
+  // per frame, gated by m_lastSnapshotFrame.
+  std::vector<std::pair<uint32_t, WorldTransformMat4>>
+      m_pendingWorldXformWrites;
+  // Single-fire-per-frame guard for the prev-snapshot + pending-writes
+  // drain. runInitialPasses is invoked once per outgoing edge, so without
+  // a gate the snapshot would queue N copies and the pending-writes drain
+  // would double-upload. We compare against renderer.frame (the monotonic
+  // per-renderer frame counter that the camera path also uses, see the
+  // packAndUploadCameras / camera prev-snapshot sites). NB: the previous
+  // QRhiCommandBuffer-pointer discriminator was broken — every QRhi
+  // backend (Vulkan/D3D11/D3D12/Metal/GL) returns the address of a single
+  // by-value cbWrapper member from QRhiSwapChain::currentFrameCommandBuffer,
+  // so the pointer is constant across frames and the gate fired exactly
+  // once per swapchain lifetime, freezing world_transforms / _prev at
+  // their frame-0 contents (motion vectors / TAA / reprojection broken).
+  // Cleared on teardown (see release()).
+  int64_t m_lastSnapshotFrame{-1};
+
+  // Single-fire-per-frame guard for issuePendingGpuCopies (threedim#13).
+  // runInitialPasses fires once per outgoing edge; without a gate a node
+  // feeding K consumers issues K identical copy batches per frame (the
+  // destination MDI buffers are shared, so one batch already serves every
+  // consumer). Kept separate from m_lastSnapshotFrame because the snapshot
+  // block only sets that token when the world-transforms buffer exists —
+  // a dedicated token gates the copies unconditionally. Cleared on teardown.
+  int64_t m_lastGpuCopiesFrame{-1};
+
+  // Environment params UBO: preprocessor-owned Env arena slot. Each
+  // EnvironmentLoader / CubemapLoader contributes disjoint fields (via
+  // `params_set` bits on scene_environment); merge_scenes composes them
+  // field-by-field into this->scene.state->environment. The preprocessor
+  // packs the MERGED CPU-side env into m_envSlot here so consumers
+  // reading `env` see the composed result, not any one producer's
+  // contribution. The per-producer Env slots owned by EnvironmentLoader
+  // etc. remain valid but are no longer the binding target — they're
+  // just CPU-side marker that the producer is participating.
+  GpuResourceRegistry::Slot m_envSlot{};
+  uint32_t m_env_aux_offset{0};
+  // Cache the last uploaded EnvParamsUBO bytes so we can skip re-upload
+  // when the merged environment content doesn't change frame-to-frame.
+  EnvParamsUBO m_lastEnvUpload{};
+  bool m_envSlotSeeded{false};
+
+  // ─── MDI state (Plan 09 S4) ─────────────────────────────────────────
+  // Post-migration, the vertex/index streams live in the registry's
+  // MeshArenaManager. Only per_draws + indirect_draw_cmds remain
+  // preprocessor-owned — they're small, scene-wide SSBOs tied to a
+  // specific preprocessor's filtered view of the scene and not
+  // shareable across preprocessors.
+  struct MDIState
+  {
+    QRhiBuffer* per_draws{};
+    QRhiBuffer* indirect_draw_cmds{};
+    // Sidecar bounds SSBO parallel to per_draws. Same draw indexing
+    // (baseInstance / gl_BaseInstance), read by GPU culling shaders to
+    // transform local-space AABBs to world space and test against the
+    // camera frustum.
+    QRhiBuffer* per_draw_bounds{};
+    int64_t perDrawsCap{};
+    int64_t indirectCap{};
+    int64_t perDrawBoundsCap{};
+    uint32_t totalVertices{};
+    uint32_t totalIndices{};
+    uint32_t drawCount{};
+  };
+  MDIState m_mdi;
+
+  // ─── Primitive cloud (splat) bucket resources ───────────────────────
+  // One entry per bucket_key (hash(format_id) — or stable_id when
+  // format_id is empty so each unformatted cloud gets its own bucket).
+  // Each bucket carries:
+  //   - raw_splats: concatenation of all clouds' raw_data in the bucket
+  //   - cloud_meta: CloudMetaGPU[] (model matrix + slot indices)
+  //   - cloud_id_lookup: uint per primitive -> cloud_meta index
+  //   - indirect: a single IndirectCmd {6, total_primitives, 0, 0, 0}
+  //
+  // Buffers are persistent (growBuf-managed) so downstream SRBs see
+  // pointer-stable handles across frames. A bucket whose key disappears
+  // from the next flatten gets dropBuf'd in releaseStaleClouds().
+  //
+  // CloudMetaGPU mirrors PerDrawGPU's pattern (model[16] +
+  // transform_slot) so a CSF chain that wants per-cloud TRS reads it
+  // exactly the same way mesh shaders read per_draws[gl_DrawID].
+  //
+  // bounds_min / bounds_max are the per-cloud world-space AABB —
+  // populated by walking the 8 corners of `cloud->bounds` through
+  // `worldTransform`. Splat-format CSFs use these to do a per-cloud
+  // frustum-cull pre-pass so off-screen clouds skip all per-primitive
+  // work (a big win when scenes carry many bucketed clouds).
+  struct CloudMetaGPU
+  {
+    float model[16];                 // 64
+    float bounds_min[4];             // 80   xyz + pad
+    float bounds_max[4];             // 96   xyz + pad
+    uint32_t primitive_offset;       // 100
+    uint32_t primitive_count;        // 104
+    uint32_t transform_slot;         // 108
+    uint32_t format_param_index;     // 112
+    uint32_t _pad[4];                // 128 — 16-byte align
+  };
+  static_assert(sizeof(CloudMetaGPU) == 128, "CloudMetaGPU std430 layout");
+
+  struct PrimitiveCloudBucketBuffers
+  {
+    QRhiBuffer* raw_splats{};        int64_t rawSplatsCap{};
+    QRhiBuffer* cloud_meta{};        int64_t cloudMetaCap{};
+    QRhiBuffer* cloud_id_lookup{};   int64_t cloudIdLookupCap{};
+    QRhiBuffer* indirect{};          int64_t indirectCap{};
+    uint32_t row_stride{};           // cached from cloud->row_stride
+    uint64_t last_seen_frame{};      // for stale-bucket eviction
+    // Per-frame content fingerprint over (per cloud in bucket order):
+    //   raw_data identity + content_hash + primitive_count
+    //   + worldTransform bytes + transform_slot
+    // — i.e. everything the bucket's GPU buffers depend on. When the
+    // computed fingerprint matches the stored one, the bucket's
+    // raw_splats / cloud_meta / cloud_id_lookup / indirect buffers are
+    // already correct from the previous frame and the per-frame CPU
+    // concat + uploadStaticBuffer work can be skipped wholesale. 0 =
+    // "never uploaded; force the first frame's upload regardless".
+    // This is the Phase-1 delta-update step toward the persistent
+    // arena design (see .claude/PRIMITIVE-CLOUD-ARENA-DESIGN.md).
+    uint64_t content_fingerprint{};
+  };
+  ossia::flat_map<uint32_t, PrimitiveCloudBucketBuffers> m_primitiveCloudBuckets;
+  uint64_t m_primitiveCloudFrame{0};
+
+  // ─── Unified-MDI per-instance concat buffers ────────────────────────
+  // Three parallel arrays sized to K = (Σ regular_cmd_count + Σ
+  // instance_group_count). One slot per (cmd, instance) pair, contiguous
+  // within a cmd. Each indirect cmd sets `firstInstance = its first
+  // slot`, so per-instance VERTEX_INPUTs (translation / color / draw_id)
+  // step at the right offset on both indirect and CPU-fallback paths
+  // (firstInstance is honoured uniformly by every QRhi backend).
+  //
+  // - m_instTranslations: vec4-padded translation per slot (xyz used,
+  //   w pad). Identity (0,0,0) for regular-mesh slots; actual
+  //   per-particle position for instance-group slots (GPU-copied from
+  //   the Instancer's source buffer with format-aware offsets).
+  // - m_instColors: vec4 per slot. Identity (1,1,1,1) for regular-mesh
+  //   slots; actual per-instance broadcast colour for groups.
+  // - m_instDrawIds: uint per slot. Carries the cmd-index of the owning
+  //   draw — replaces gl_DrawID (broken on CPU-fallback) and
+  //   gl_BaseInstance (no longer = drawID once instanceCount > 1).
+  QRhiBuffer* m_instTranslations{};
+  QRhiBuffer* m_instColors{};
+  QRhiBuffer* m_instDrawIds{};
+  int64_t m_instTranslationsCap{};
+  int64_t m_instColorsCap{};
+  int64_t m_instDrawIdsCap{};
+  uint32_t m_instSlotsUsed{};
+
+  // CPU mirror of the draw_ids stream so we can diff-upload + cheaply
+  // pre-fill identity values for regular cmds. Translations / colors
+  // are GPU-resident sources for instance groups (no CPU mirror —
+  // copies are GPU→GPU); we pre-fill identity for regular slots
+  // straight into the GPU buffer via uploadStaticBuffer.
+  std::vector<uint32_t> m_cachedInstDrawIds;
+
+  // Prototype stable-id fallback map. Some producers (notably
+  // Threedim::Primitive going through halp::geometry → legacy_geometry)
+  // don't stamp a non-zero `mesh_primitive::stable_id` on their output.
+  // Without a stable id, the slab arena allocates a fresh slab per
+  // frame and the OffsetAllocator fragments until exhaustion. We cover
+  // this by minting a stable id keyed on the prototype's
+  // mesh_component pointer (which IS stable across frames as long as
+  // the producer re-emits the same shared_ptr). GC pass at the end of
+  // update() evicts entries whose pointer no longer appears in fs.
+  ossia::hash_map<const ossia::mesh_component*, uint64_t> m_protoStableIds;
+
+  // Pending GPU→GPU copy ops collected during update()'s accumulator loop
+  // and executed in runInitialPasses (the only place ScenePreprocessor has a
+  // live command buffer). Each op corresponds to one attribute of one
+  // draw whose source buffer is GPU-resident; the CPU accumulator was
+  // zero-filled in its place so all offsets stay consistent with the
+  // tight MDI-layout contract. Cleared after being issued.
+  enum class MdiAttr : uint8_t
+  {
+    Positions,
+    Normals,
+    Texcoords,
+    Tangents
+  };
+  struct PendingGpuCopy
+  {
+    QRhiBuffer* src{};
+    QRhiBuffer* dst{};   // explicit destination — when null, attr names
+                         // a mesh-stream slot resolved via mdiBufferFor()
+    int src_offset{};
+    int dst_offset{};
+    int size{};          // bytes if tight-copy, else element_size
+    int vertex_count{};
+    int src_stride{};    // 0 or element_size → tight; else strided
+    int element_size{};  // BytesPerVertex for this attribute
+    MdiAttr attr{};
+  };
+  std::vector<PendingGpuCopy> m_pendingGpuCopies;
+
+  // Capacities (in bytes) of the two shared scene buffers — for growth-only.
+  int64_t m_materialsExtCap{};
+
+  // Per-channel material texture arrays are now owned by
+  // GpuResourceRegistry and shared across all preprocessors in the same
+  // RenderList. Sharing is safe because texture-source / layer
+  // assignments are driven by asset identity (pointer to
+  // texture_source), which is view-independent — every preprocessor
+  // computes the same mapping. Shared arrays also let producers
+  // (PBRMesh, MaterialOverride, loaders) author their own textureRefs
+  // at update() time via the registry's resolve APIs without a
+  // preprocessor-local dedup step.
+  //
+  // We stash the registry pointer at init() instead of going through
+  // renderer.registry() at every call site — access is on the hot
+  // rebuild path. Cleared on release(); m_lastRegistry below remembers
+  // the previous pointer so the next init() can detect "same registry
+  // as before release" and skip the cache wipe.
+  GpuResourceRegistry* m_registry{};
+
+  // Persist-across-rebuild contract: snapshot of m_registry at
+  // release() time. Survives the release()/init() cycle so init() can
+  // compare against the new RL's registry: equal → skip wipe (relink
+  // graph, viewport resize when the renderer object is reused), unequal
+  // → wipe (first init / OutputNode-replaced QRhi). Never read in the
+  // hot path; only inspected from init().
+  GpuResourceRegistry* m_lastRegistry{};
+
+  // Convenience typedef + helper to localise the enum translation.
+  using TexChannel = GpuResourceRegistry::TextureChannel;
+  static TexChannel toTexChannel(MaterialChannel ch) noexcept
+  {
+    return static_cast<TexChannel>(ch);
+  }
+  auto& texChannel(MaterialChannel ch) noexcept
+  {
+    return m_registry->textureChannel(toTexChannel(ch));
+  }
+  const auto& texChannel(MaterialChannel ch) const noexcept
+  {
+    return m_registry->textureChannel(toTexChannel(ch));
+  }
+
+  // Uniform layer size — matching across channels keeps the samplers
+  // interchangeable in shaders and simplifies sampler state.
+  static constexpr int kChannelLayerSize
+      = GpuResourceRegistry::kTextureLayerSize;
+
+  // Content-based fingerprint of the materials list we last decoded. A
+  // vector of raw material_component pointers (shared_ptr-element
+  // identity). Stable across multi-producer scene merges: merge_scenes
+  // concatenates material_component_ptr elements without deep-copying,
+  // so the element pointers themselves don't change from frame to frame
+  // even though the enclosing `shared_ptr<vector<...>>` does (the
+  // _contributors > 1 branch in merge_scenes allocates a new vector
+  // every merge). Comparing by content identity instead of the outer
+  // pointer keeps the texture cache warm across multi-glTF scenes —
+  // critical because re-decoding every JPEG and re-uploading every
+  // 1024² layer every frame is the ~100ms/frame penalty we're fixing.
+  std::vector<uint64_t> m_cachedMaterialsFingerprint;
+
+  // -- Granular invalidation state ------------------------------------------
+  //
+  // We keep CPU mirrors of what's currently on the GPU for each small SSBO,
+  // plus a fingerprint of the concatenated mesh list. Each frame we:
+  //  * compare the fingerprint — if meshes unchanged, skip vertex/index
+  //    upload entirely and keep m_outputSpec.meshes as the same shared_ptr
+  //    (so downstream sees stable geometry_spec and doesn't rebuild any
+  //    pipeline/SRB).
+  //  * diff the mirror arrays against the freshly packed data and only
+  //    uploadStaticBuffer(offset, size, …) for the contiguous ranges that
+  //    actually changed. Moving a light thus costs one 64-byte partial
+  //    upload; moving an object costs one PerDrawGPU (144 bytes).
+  //
+  // Memory cost: ~sizeof(T) × count on CPU (tens of KB for typical scenes).
+  //
+  // `m_cachedMeshFingerprint` stores `DrawCall::stable_id` per draw — the
+  // address of the source mesh_primitive inside the stable mesh_component
+  // shared_ptr (or the legacy ossia::geometry entry inside a mesh_list).
+  // NOT `DrawCall::mesh`, because that points at a transient
+  // primitiveToGeometry() wrapper that's freshly allocated on every
+  // flattenScene() call and therefore changes every frame.
+  std::vector<uint64_t> m_cachedMeshFingerprint;
+  // Fingerprint of the primitive_cloud set (threedim#2). The fast path
+  // (`meshesUnchanged`) skips rebuildPrimitiveClouds entirely — clouds are
+  // NOT covered by m_cachedMeshFingerprint — so without this a cloud added
+  // / removed / moved while the mesh fingerprint is unchanged would render
+  // nothing / leave stale geometry / keep a stale CloudMetaGPU.model. Mixing
+  // the cloud set into the fast-path gate forces the full rebuild branch
+  // (which re-runs rebuildMDI + rebuildPrimitiveClouds) on any cloud change.
+  // Covers the same fields rebuildPrimitiveClouds' internal per-bucket
+  // fingerprint depends on (raw_data identity/content version, primitive
+  // count, transform), plus the bucket key so add/remove is detected.
+  uint64_t m_cachedCloudFingerprint{};
+  // m_cachedMaterials is gone — scene_materials is the registry's
+  // Material arena, not a preprocessor CPU mirror. Producers + the
+  // loader-material upload pass write directly into arena slots.
+  std::vector<MaterialExtensionsGPU> m_cachedMaterialExt;
+  std::vector<PerDrawGPU> m_cachedPerDraws;
+  // Mirror of the per_draw_bounds SSBO for diff-upload on the fast-path
+  // (transforms/materials change but topology doesn't → tiny range
+  // upload instead of full rewrite). Grow-only; same indexing as
+  // m_cachedPerDraws.
+  std::vector<PerDrawBoundsGPU> m_cachedPerDrawBounds;
+
+  // Arena slots allocated by this preprocessor for loader materials
+  // (materials entering scene_state.materials with raw_slot.size == 0,
+  // i.e. not authored by a live producer like PBRMesh). The preprocessor
+  // acts as a producer-on-behalf-of-loader for these: allocates one
+  // Material arena slot per loader material, writes MaterialGPU bytes,
+  // frees at release. Producer-authored materials already have their
+  // own slots — those stay out of this map.
+  ossia::hash_map<
+      const ossia::material_component*, GpuResourceRegistry::Slot>
+      m_loaderMaterialSlots;
+
+  // Remembered accumulator sizes from the last full rebuildMDI. Used to
+  // pre-reserve the temporary std::vector capacity so we don't pay for
+  // repeated realloc + memmove when the scene grew or stays the same
+  // size. Grow-only; never shrinks (negligible memory, big perf win for
+  // scenes with many verts).
+  // Plan 09 S4: vertex/index stream byte-sizes no longer tracked
+  // here — the arena's OffsetAllocator owns sizing. `m_lastDrawCount`
+  // stays, used to pre-reserve acc.perDraws / acc.indirectCmds.
+  std::size_t m_lastDrawCount{};
+
+  // Diff two CPU mirrors and partial-upload only the contiguous ranges
+  // where fresh != cached. Also grows / shrinks the cached mirror to match
+  // fresh's size. Returns true if at least one range was uploaded.
+  //
+  // When fresh.size() > cached.size() the new tail slots are appended +
+  // uploaded. When fresh.size() < cached.size() the tail is zero-filled on
+  // the GPU so stale content can't contribute (e.g. old lights with
+  // intensity=1 still emitting after the scene shrank).
+  template <typename T>
+  static bool diffUpload(
+      QRhiResourceUpdateBatch& res, QRhiBuffer* buf, std::vector<T>& cached,
+      const std::vector<T>& fresh)
+  {
+    if(!buf)
+      return false;
+    bool changed = false;
+
+    const std::size_t common = std::min(cached.size(), fresh.size());
+    for(std::size_t i = 0; i < common;)
+    {
+      // Skip equal runs.
+      if(std::memcmp(&cached[i], &fresh[i], sizeof(T)) == 0)
+      {
+        ++i;
+        continue;
+      }
+      // Coalesce contiguous differing slots into one upload.
+      std::size_t start = i;
+      while(i < common
+            && std::memcmp(&cached[i], &fresh[i], sizeof(T)) != 0)
+      {
+        cached[i] = fresh[i];
+        ++i;
+      }
+      res.uploadStaticBuffer(
+          buf, quint32(start * sizeof(T)),
+          quint32((i - start) * sizeof(T)),
+          reinterpret_cast<const char*>(&fresh[start]));
+      changed = true;
+    }
+
+    if(fresh.size() > cached.size())
+    {
+      const std::size_t start = cached.size();
+      cached.insert(cached.end(), fresh.begin() + start, fresh.end());
+      res.uploadStaticBuffer(
+          buf, quint32(start * sizeof(T)),
+          quint32((fresh.size() - start) * sizeof(T)),
+          reinterpret_cast<const char*>(&fresh[start]));
+      changed = true;
+    }
+    else if(fresh.size() < cached.size())
+    {
+      // Zero the stale tail on GPU so shaders iterating the buffer's
+      // capacity don't see ghost entries.
+      std::vector<T> zeros(cached.size() - fresh.size());
+      res.uploadStaticBuffer(
+          buf, quint32(fresh.size() * sizeof(T)),
+          quint32(zeros.size() * sizeof(T)),
+          reinterpret_cast<const char*>(zeros.data()));
+      cached.resize(fresh.size());
+      changed = true;
+    }
+    return changed;
+  }
+
+  // Last-published geometry_spec; kept alive so downstream shared_ptr equality
+  // sees stable identity across frames when the scene is unchanged.
+  ossia::geometry_spec m_outputSpec;
+
+  // Cache: identity of last input scene (raw scene_state* pointer + version).
+  const ossia::scene_state* m_cachedSceneState{};
+  int64_t m_cachedVersion{-1};
+
+  RenderedScenePreprocessorNode(const ScenePreprocessorNode& n)
+      : NodeRenderer{n}
+      , m_node{n}
+  {
+  }
+
+  // The incremental-reconciliation path (Graph::incrementalEdgeUpdate)
+  // creates fresh renderers and calls `initState()` on them, NOT `init()`.
+  // Our preprocessor has no per-edge state — everything lives at the
+  // init() level — so both entry points run the same setup. Without
+  // this delegation a preprocessor created via the incremental path
+  // never has `m_registry` set, every `rebuildChannel` call early-outs,
+  // and consumer shaders see empty texture arrays (the exact
+  // "textures gone on second play" failure mode observed on stop/start).
+  void initState(RenderList& renderer, QRhiResourceUpdateBatch& res) override
+  {
+    init(renderer, res);
+  }
+
+  void releaseState(RenderList& renderer) override
+  {
+    release(renderer);
+  }
+
+  // Reset every per-RenderList / per-registry cache field to empty.
+  // Frees registry-allocated slots (loader-material, env) when
+  // `freeRegistryResources` is true — pass true from release() (we
+  // still hold a valid m_registry) and false from init() (the prior
+  // m_registry, if any, may already be torn down: we cannot legally
+  // free against it; just drop the bookkeeping so arenaSlotForMaterial
+  // and the env publish path don't reuse stale slot indices on the
+  // fresh registry).
+  //
+  // QRhiBuffer-backed fields (m_materialsExtBuffer, m_lightIndicesBuffer,
+  // m_camerasBuffer, m_mdi.*, m_inst*, m_skinBuffers, m_sceneDataBuffers,
+  // m_sceneCountsBuffer, m_shadowCascadesBuffer, m_worldTransforms*Buffer)
+  // and their paired *Cap counters are NOT touched here — they go
+  // through dropBuf / renderer.releaseBuffer in release() because they
+  // need the renderer's release plumbing.
+  void clearAllCaches(bool freeRegistryResources, uint32_t current_frame = 0u)
+  {
+    if(freeRegistryResources && m_registry)
+    {
+      for(auto& [mat, slot] : m_loaderMaterialSlots)
+        if(slot.valid())
+          m_registry->free(slot);
+      if(m_envSlot.valid())
+        m_registry->free(m_envSlot);
+      // MeshSlab leak fix: every (mc, id) pair in m_protoStableIds is a
+      // stable_id WE minted (resolvePrototypeStableId line 1377). The
+      // matching slab is in the registry's m_meshSlabs cache. Clearing
+      // m_protoStableIds without releasing the slabs leaves them as
+      // orphans: the next renderer instance mints DIFFERENT IDs (mints
+      // are globally unique), so its acquireMeshSlab calls miss the
+      // cache and allocate fresh slabs. sweepMeshSlabs ages out the
+      // orphans after `grace=2` frames -- but rapid drag-resize
+      // triggers another rebuild before grace elapses, so slabs
+      // accumulate (used grew 70074 → 420444 in 6 resizes for the
+      // user's repro). Release explicitly here so the next-frame
+      // sweep can immediately reclaim. Routes through grace queue so
+      // any in-flight CB still referencing the slab is safe.
+      for(auto& [mc, id] : m_protoStableIds)
+        if(id != 0)
+          m_registry->releaseMeshSlab(id, current_frame);
+    }
+    m_loaderMaterialSlots.clear();
+    m_envSlot = {};
+    m_envSlotSeeded = false;
+    m_protoStableIds.clear();
+
+    m_cachedSceneState = nullptr;
+    m_cachedVersion = -1;
+    m_cachedMaterialsFingerprint.clear();
+    m_cachedMeshFingerprint.clear();
+    m_cachedCloudFingerprint = 0;
+    m_cachedMaterialExt.clear();
+    m_cachedPerDraws.clear();
+    m_cachedPerDrawBounds.clear();
+    m_cachedShadowCascades = {};
+    m_shadowCascadesSeeded = false;
+    m_cachedSceneCounts = {~0u, ~0u, ~0u, 0u};
+    m_cachedMaterialUVTransforms.clear();
+    m_cachedCameras.clear();
+    m_lastCameraUploadFrame = -1;
+    m_cachedInstDrawIds.clear();
+    m_cachedLightIndices.clear();
+    m_lastEnvUpload = {};
+    m_outputSpec = {};
+    m_lastDrawCount = 0;
+  }
+
+  void init(RenderList& renderer, QRhiResourceUpdateBatch& res) override
+  {
+    m_initialized = true;
+
+    // Persist-across-rebuild contract: if the OutputNode-owned registry
+    // is the SAME pointer we held in the previous init() / release()
+    // cycle, every slot index (m_loaderMaterialSlots, m_envSlot, ...)
+    // and the texture-array channels are still alive — re-allocating
+    // them on a viewport resize / relink would re-upload ~100 MiB of
+    // decoded textures and pay the 50–500 ms rebuild burst this whole
+    // refactor exists to avoid.
+    //
+    // Skip the cache wipe in that case. The fingerprint / per-draw /
+    // cascade caches will naturally match the unchanged scene state on
+    // the first post-rebuild frame, short-circuiting the REBUILD branch
+    // (see needsRebuild gate around line 4051) and rebuildChannel's
+    // sameMaterialsContent fast path → no texture re-upload.
+    //
+    // The pre-release pointer is stashed in m_lastRegistry; m_registry
+    // itself is null between release() and init() (so that any stray
+    // post-release rebuildChannel call hits its guarded early-out
+    // instead of dereferencing a stale pointer). m_lastRegistry == null
+    // means "first ever init on this renderer" → wipe (no-op since
+    // there's nothing to wipe). m_lastRegistry != new_registry means
+    // the OutputNode tore its registry down and built a fresh one
+    // (setSwapchainFormat / QRhi-replacement) → wipe (any slot indices
+    // we held are stale).
+    auto* new_registry = &renderer.registry();
+    const bool registry_changed = (m_lastRegistry != new_registry);
+    if(registry_changed)
+    {
+      // Drop every per-registry cache before swapping m_registry. If a
+      // previous RenderList left state behind (incremental edge rebuild
+      // without an intervening release()), m_loaderMaterialSlots /
+      // m_envSlot / m_protoStableIds carry slot indices that the new
+      // registry never allocated — arenaSlotForMaterial would silently
+      // return them and every mesh would wear the wrong material. The
+      // fingerprint / per-draw / cascade caches likewise gate dirty
+      // detection against the prior scene state. We can't legally free
+      // against the old registry (it may already be torn down), so we
+      // pass freeRegistryResources=false: just drop the bookkeeping.
+      clearAllCaches(/*freeRegistryResources=*/false);
+    }
+    // else: registry survived (resize fast path / relinkGraph reuse).
+    // Keep m_loaderMaterialSlots / m_envSlot / fingerprints / per-draw
+    // caches — they all reference live state in the persistent registry.
+    m_registry = new_registry;
+    m_lastRegistry = new_registry;
+
+    // Claim our own Env arena slot for the merged environment upload
+    // (task #26). Each preprocessor owns a slot — needed because two
+    // preprocessors can receive different filtered views of the same
+    // source scene and must not stomp each other's merged env.
+    if(!m_envSlot.valid())
+    {
+      m_envSlot = m_registry->allocate(
+          GpuResourceRegistry::Arena::Env, sizeof(EnvParamsUBO));
+      m_envSlotSeeded = false;
+    }
+
+    // Pre-allocate a 1-layer BaseColor array with a white fallback so
+    // downstream consumers (classic_pbr_textured) building their samplers
+    // in their own init() get a real texture pointer via textureForOutput,
+    // not nullptr. update() will reallocate with the right layer count
+    // once the scene is flattened. First preprocessor to run init() does
+    // this; subsequent preprocessors see the array already allocated and
+    // skip (shared registry state).
+    auto& rhi = *renderer.state.rhi;
+    auto& bc = texChannel(ChannelBaseColor);
+    if(!bc.primaryArray())
+    {
+      auto& b = bc.ensurePrimary(
+          QRhiTexture::RGBA8,
+          QSize(kChannelLayerSize, kChannelLayerSize));
+      b.array = rhi.newTextureArray(
+          b.format, 1, b.pixelSize, 1,
+          GpuResourceRegistry::textureChannelFlags(toTexChannel(ChannelBaseColor)));
+      if(b.array)
+      {
+        b.array->setName("GpuResourceRegistry::base_color_array (init fallback)");
+        if(!b.array->create())
+        {
+          delete b.array;
+          b.array = nullptr;
+        }
+      }
+      if(b.array)
+      {
+        b.layers = 1;
+        QImage w(1, 1, QImage::Format_RGBA8888);
+        w.fill(Qt::white);
+        w = w.scaled(
+            kChannelLayerSize, kChannelLayerSize,
+            Qt::IgnoreAspectRatio, Qt::SmoothTransformation);
+        QRhiTextureSubresourceUploadDescription sub(w);
+        QRhiTextureUploadEntry entry(0, 0, sub);
+        res.uploadTexture(
+            b.array, QRhiTextureUploadDescription({entry}));
+      }
+      else
+      {
+        // Allocation failed — drop the empty bucket so primaryArray()
+        // stays null and callers hit the "no array" fallback path.
+        bc.buckets.clear();
+      }
+    }
+  }
+
+  void release(RenderList& renderer) override
+  {
+    // QRhiBuffer invariant: go through RenderList::releaseBuffer so any
+    // buffer still referenced by a downstream mesh's MeshBuffers skips
+    // deleteLater (the mesh iteration at RenderList::release will
+    // destroy it via `delete b.handle`). Bypassing releaseBuffer with
+    // `deleteLater` directly is what caused the "rare segfault on exit"
+    // — the same pointer ending up in the final `delete b.handle` pass.
+    auto dropBuf = [&](QRhiBuffer*& b) {
+      if(b) { renderer.releaseBuffer(b); b = nullptr; }
+    };
+    dropBuf(m_lightIndicesBuffer);
+    // m_materialsBuffer + m_lightsBuffer removed — scene_materials and
+    // scene_lights bind the registry arenas directly.
+    dropBuf(m_materialsExtBuffer);
+    dropBuf(m_materialUVTransformsBuffer);
+    m_materialUVTransformsCap = 0;
+    for(auto& sd : m_sceneDataBuffers)
+      if(sd.owned && sd.buffer) renderer.releaseBuffer(sd.buffer);
+    m_sceneDataBuffers.clear();
+    for(auto& sk : m_skinBuffers)
+      if(sk.buffer) renderer.releaseBuffer(sk.buffer);
+    m_skinBuffers.clear();
+    // Plan 09 S4: vertex/index streams are registry-owned; only the
+    // preprocessor-owned per_draws + indirect_draw_cmds + per_draw_bounds
+    // drop here.
+    dropBuf(m_mdi.per_draws);
+    dropBuf(m_mdi.indirect_draw_cmds);
+    dropBuf(m_mdi.per_draw_bounds);
+    m_mdi = {};
+    // Per-bucket primitive cloud resources.
+    for(auto& [k, bb] : m_primitiveCloudBuckets)
+    {
+      dropBuf(bb.raw_splats);
+      dropBuf(bb.cloud_meta);
+      dropBuf(bb.cloud_id_lookup);
+      dropBuf(bb.indirect);
+    }
+    m_primitiveCloudBuckets.clear();
+    dropBuf(m_instTranslations);
+    dropBuf(m_instColors);
+    dropBuf(m_instDrawIds);
+    m_instTranslationsCap = 0;
+    m_instColorsCap = 0;
+    m_instDrawIdsCap = 0;
+    m_instSlotsUsed = 0;
+    m_lightIndicesCap = 0;
+    m_materialsExtCap = 0;
+    // Texture channel arrays are owned by GpuResourceRegistry — no
+    // per-preprocessor cleanup needed. They get destroyed when the
+    // RenderList tears down (registry.destroy()).
+    dropBuf(m_sceneCountsBuffer);
+    dropBuf(m_shadowCascadesBuffer);
+    dropBuf(m_camerasBuffer);
+    dropBuf(m_camerasPrevBuffer);
+    m_camerasCap = 0;
+    dropBuf(m_worldTransformsBuffer);
+    dropBuf(m_worldTransformsPrevBuffer);
+    m_worldTransformsCap = 0;
+    m_pendingWorldXformWrites.clear();
+    m_pendingWorldXformWrites.shrink_to_fit();
+    m_lastSnapshotFrame = -1;
+    // Symmetric clear for m_pendingGpuCopies: ops record raw QRhiBuffer*
+    // for src/dst (m_mdi.* and m_primitiveCloudBuckets buffers) which
+    // dropBuf above just released. Today release() is followed by either
+    // node teardown (no further runInitialPasses) or init() + a new
+    // rebuildMDI which clears the queue at its top, so the dangling
+    // pointers are never dereferenced — but the asymmetry is fragile
+    // against any future reordering. Defensive.
+    m_pendingGpuCopies.clear();
+    m_pendingGpuCopies.shrink_to_fit();
+    m_lastGpuCopiesFrame = -1;
+    // Env arena buffer is owned by GpuResourceRegistry — nothing to drop here.
+    // Plan 09 S4: stream byte-size trackers removed (see m_mdi comment).
+
+    // Free per-registry resources on every release(), regardless of
+    // whether the renderer will be destroyed (recreateOutputRenderList)
+    // or reused (relinkGraph). The "skip wipe on registry-pointer
+    // match" optimization the previous version of this comment
+    // referenced ONLY benefits the relinkGraph path; on resize the
+    // renderer is freshly constructed so m_loaderMaterialSlots etc.
+    // are already empty.
+    //
+    // The bug it caused: m_envSlot was leaked on every release().
+    // The Env arena has only 8 slots (GpuResourceRegistry.cpp:69), so
+    // after 8 resizes the arena exhausted, m_envSlot allocation fell
+    // back to slot 0 (or invalid), and the env aux binding pointed at
+    // slot 0's stale data — wildly wrong lighting / fog / exposure
+    // that drifts each resize as different stale data lands at slot 0.
+    // Other arenas have more headroom (Material 32K, RawTransform
+    // 16K) but they still leak; over many resizes the same drift
+    // would surface there.
+    //
+    // Trade-off: relinkGraph now pays the cost of re-allocating the
+    // env slot + per-loader-material slots + clearing the texture
+    // fingerprint (~10s of ms). Acceptable — relinkGraph is rare
+    // (user changes graph); resize is common (drag-resize fires
+    // continuously).
+    clearAllCaches(/*freeRegistryResources=*/true, (uint32_t)renderer.frame);
+
+    // Clear the registry pointer so a post-release rebuildChannel call
+    // hits its guarded early-out rather than dereferencing the
+    // pre-release pointer. m_lastRegistry stays populated for any
+    // future re-init wanting to detect "same registry as before".
+    m_lastRegistry = m_registry;
+    m_registry = nullptr;
+    m_initialized = false;
+  }
+
+  // Source byte size of one element of an ossia::geometry attribute format.
+  // Used to bound CPU attribute reads so an attribute authored in a smaller
+  // format than the consumer expects (threedim#10: an unorm-byte4 color, 4 B,
+  // read as float4, 16 B) doesn't over-read the source buffer.
+  static int geomAttrFormatByteSize(int format) noexcept
+  {
+    using A = ossia::geometry::attribute;
+    switch(format)
+    {
+      case A::float4:                            return 16;
+      case A::float3:                            return 12;
+      case A::float2:                            return 8;
+      case A::float1:                            return 4;
+      case A::unormbyte4:                        return 4;
+      case A::unormbyte2:                        return 2;
+      case A::unormbyte1:                        return 1;
+      case A::uint4: case A::sint4:              return 16;
+      case A::uint3: case A::sint3:              return 12;
+      case A::uint2: case A::sint2:              return 8;
+      case A::uint1: case A::sint1:              return 4;
+      case A::half4:                             return 8;
+      case A::half3:                             return 6;
+      case A::half2:                             return 4;
+      case A::half1:                             return 2;
+      case A::ushort4: case A::sshort4:          return 8;
+      case A::ushort3: case A::sshort3:          return 6;
+      case A::ushort2: case A::sshort2:          return 4;
+      case A::ushort1: case A::sshort1:          return 2;
+      default:                                   return 0; // user_struct / unknown
+    }
+  }
+
+  // Read a single vertex attribute's full range from a CPU-backed source
+  // geometry into a freshly-allocated contiguous byte buffer. Returns empty
+  // if the source uses a GPU handle, is missing, or has an unsupported
+  // format. `BytesPerVertex` is the consumer's expected element size.
+  template <int BytesPerVertex>
+  static std::vector<std::byte> extractCpuAttribute(
+      const ossia::geometry& g, ossia::attribute_semantic sem)
+  {
+    const auto* a = g.find(sem);
+    if(!a)
+      return {};
+    if(a->binding < 0 || a->binding >= (int)g.input.size())
+      return {};
+    const auto& in = g.input[a->binding];
+    if(in.buffer < 0 || in.buffer >= (int)g.buffers.size())
+      return {};
+    const auto& b = g.buffers[in.buffer];
+    const auto* cpu = ossia::get_if<ossia::geometry::cpu_buffer>(&b.data);
+    if(!cpu || !cpu->raw_data)
+      return {};
+
+    const int stride = (a->binding < (int)g.bindings.size())
+        ? (int)g.bindings[a->binding].byte_stride
+        : BytesPerVertex;
+
+    // Copy at most the source element's byte size into the destination
+    // element (the rest stays zero-filled). An attribute whose source
+    // format is narrower than BytesPerVertex (e.g. unorm-byte4 color, 4 B,
+    // consumed as float4, 16 B) must not pull 12 stray bytes per vertex.
+    const int srcElem = geomAttrFormatByteSize(a->format);
+    const int copyPerVertex
+        = (srcElem > 0) ? std::min(BytesPerVertex, srcElem) : BytesPerVertex;
+
+    // Bound every read against the source buffer's actual byte_size:
+    // an inconsistent producer (short buffer, wrong vertex_count) must not
+    // over-read off the end of the heap allocation (threedim#10).
+    const int64_t baseOff = (int64_t)in.byte_offset + (int64_t)a->byte_offset;
+    const int64_t srcBytes = cpu->byte_size;
+    if(baseOff < 0 || (srcBytes > 0 && baseOff >= srcBytes))
+      return {};
+
+    std::vector<std::byte> out(std::size_t(g.vertices) * BytesPerVertex);
+    const auto* raw = reinterpret_cast<const std::byte*>(cpu->raw_data.get());
+    const auto* base = raw + baseOff;
+    for(int i = 0; i < g.vertices; ++i)
+    {
+      const int64_t off = baseOff + (int64_t)i * stride;
+      // Clamp this element's copy so it never reads past byte_size.
+      int n = copyPerVertex;
+      if(srcBytes > 0)
+      {
+        const int64_t avail = srcBytes - off;
+        if(avail <= 0)
+          break; // remaining vertices stay zero-filled
+        if(avail < n)
+          n = (int)avail;
+      }
+      std::memcpy(out.data() + std::size_t(i) * BytesPerVertex,
+                  base + (int64_t)i * stride, n);
+    }
+    return out;
+  }
+
+  // GPU-backed counterpart of extractCpuAttribute. Returns the backing
+  // QRhiBuffer* + source byte offset + stride for the requested semantic
+  // when the mesh's buffer is a gpu_buffer variant (upstream compute
+  // shader output, etc). Empty when the attribute is missing or the
+  // buffer is CPU-resident.
+  struct GpuAttrView
+  {
+    QRhiBuffer* buf{};
+    int src_offset{};
+    int byte_stride{};
+  };
+  static GpuAttrView
+  extractGpuAttribute(const ossia::geometry& g, ossia::attribute_semantic sem)
+  {
+    const auto* a = g.find(sem);
+    if(!a)
+      return {};
+    if(a->binding < 0 || a->binding >= (int)g.input.size())
+      return {};
+    const auto& in = g.input[a->binding];
+    if(in.buffer < 0 || in.buffer >= (int)g.buffers.size())
+      return {};
+    const auto& b = g.buffers[in.buffer];
+    const auto* gpu = ossia::get_if<ossia::geometry::gpu_buffer>(&b.data);
+    if(!gpu || !gpu->handle)
+      return {};
+    GpuAttrView v;
+    v.buf = static_cast<QRhiBuffer*>(gpu->handle);
+    v.src_offset = int(in.byte_offset + a->byte_offset);
+    v.byte_stride = (a->binding < (int)g.bindings.size())
+                        ? (int)g.bindings[a->binding].byte_stride
+                        : 0;
+    return v;
+  }
+
+  static std::vector<uint32_t> extractCpuIndices(const ossia::geometry& g)
+  {
+    if(g.index.buffer < 0 || g.index.buffer >= (int)g.buffers.size())
+      return {};
+    const auto& b = g.buffers[g.index.buffer];
+    const auto* cpu = ossia::get_if<ossia::geometry::cpu_buffer>(&b.data);
+    if(!cpu || !cpu->raw_data)
+      return {};
+
+    // Bound the index read against the source byte_size (threedim#10): a
+    // short / inconsistent index buffer must not over-read the heap. Clamp
+    // the readable index count to what fits past byte_offset.
+    const int idxBytes
+        = (g.index.format == decltype(g.index)::uint16) ? 2 : 4;
+    const int64_t baseOff = (int64_t)g.index.byte_offset;
+    const int64_t srcBytes = cpu->byte_size;
+    if(baseOff < 0 || (srcBytes > 0 && baseOff >= srcBytes))
+      return {};
+    int readable = g.indices;
+    if(srcBytes > 0)
+    {
+      const int64_t avail = (srcBytes - baseOff) / idxBytes;
+      if(avail < readable)
+        readable = (int)std::max<int64_t>(avail, 0);
+    }
+
+    std::vector<uint32_t> out(g.indices); // tail (if clamped) stays 0
+    const auto* base = reinterpret_cast<const std::byte*>(cpu->raw_data.get())
+                       + baseOff;
+    if(g.index.format == decltype(g.index)::uint16)
+    {
+      const auto* src = reinterpret_cast<const uint16_t*>(base);
+      for(int i = 0; i < readable; ++i)
+        out[i] = src[i];
+    }
+    else
+    {
+      std::memcpy(out.data(), base, std::size_t(readable) * 4);
+    }
+    return out;
+  }
+
+  // Mesh-deterministic subset of emitDraw's skip predicate (threedim#3).
+  // emitDraw drops a draw when:
+  //   (a) the mesh has no usable positions (neither CPU nor GPU sourced), or
+  //   (b) it has indices but they're GPU-backed (extractCpuIndices empty).
+  // Both depend only on the mesh's buffers, which are invariant while the
+  // mesh fingerprint matches — so the fast path can replicate them here to
+  // keep its freshPerDraws mirror in lock-step with what emitDraw packed.
+  // The remaining emitDraw skips (null mesh / vertices<=0 / null registry /
+  // slab exhaustion) are handled at the fast-path call site or cannot occur
+  // once a slab is already resident.
+  static bool meshEmitsDraw(const ossia::geometry& mesh)
+  {
+    const bool hasCpuPos
+        = !extractCpuAttribute<12>(mesh, ossia::attribute_semantic::position)
+               .empty();
+    if(!hasCpuPos)
+    {
+      const auto gpu_pos
+          = extractGpuAttribute(mesh, ossia::attribute_semantic::position);
+      if(!gpu_pos.buf)
+        return false; // no positions → emitDraw skips
+    }
+    if(mesh.indices > 0 && extractCpuIndices(mesh).empty())
+      return false; // GPU-backed indices unsupported → emitDraw skips
+    return true;
+  }
+
+  // Grow-only allocate / reuse a single QRhiBuffer.
+  //
+  // Releases the old handle via RenderList::releaseBuffer — which is the
+  // project-wide invariant for QRhiBuffer lifetime: releaseBuffer scans
+  // the RenderList's m_vertexBuffers for the pointer and either skips
+  // (when the buffer is still referenced by a mesh, so the mesh iteration
+  // at RenderList::release will clean it up) or deleteLater's (when it
+  // isn't referenced). Calling QRhiBuffer::deleteLater directly bypasses
+  // that check and causes a double-free on RenderList::release for any
+  // buffer that was also stored in a MeshBuffers entry — the "sometimes
+  // segfault on exit" crash pattern.
+  // Returns true when the buffer was (re)allocated this call. Callers
+  // pairing the buffer with a diffUpload-managed CPU mirror MUST clear
+  // that mirror on `true` so diffUpload re-emits the full fresh
+  // contents into the new (uninitialised) allocation. Without this,
+  // diffUpload's equal-prefix short-circuit (lines 779-801) leaves the
+  // freshly-allocated GPU buffer's prefix bytes uninitialised whenever
+  // the new fresh values match the previous frame's cached values
+  // (e.g. an Instancer with one prototype emits draw_id=0 for every
+  // slot — every cross of the power-of-two capacity boundary leaks the
+  // first cached.size() entries as driver-uninit memory). Manifests as
+  // "instances disappear at counts 4→5 / 8→9 / 16→17 / …" because the
+  // prototype's vertex shader reads garbage draw_id and OOBs on
+  // per_draws[draw_id].
+  static bool growBuf(
+      score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res,
+      QRhiBuffer*& buf, int64_t& cap,
+      int64_t need, QRhiBuffer::UsageFlags flags, const char* name)
+  {
+    if(buf && cap >= need)
+      return false;
+    // Capacity policy: pure power-of-two doubling overshoots badly for
+    // large buffers (a 1.08 GB request landed on a 2 GB allocation, which
+    // QRhi/Vulkan/D3D commonly reject around the 2³¹ byte boundary —
+    // many driver paths cap maxStorageBufferRange at 2GB-4 or use a
+    // signed-int32 size internally). Switch policy at a 256 MB knee:
+    // small buffers double (so frequent grows don't thrash); huge
+    // buffers grow by 25 % over need (still amortised, but never
+    // doubles past a 2 GB cliff for a sub-2 GB need). Aligned to 16 B
+    // so std430 structures land on natural strides.
+    constexpr int64_t kKnee = 256ll * 1024 * 1024; // 256 MB
+    int64_t newCap = cap > 0 ? cap : 16;
+    while(newCap < need)
+    {
+      if(newCap < kKnee)
+        newCap *= 2;
+      else
+        newCap = (need * 5 / 4 + 15) & ~int64_t{15};
+    }
+    auto* old = buf;
+    if(buf)
+      renderer.releaseBuffer(buf);
+    buf = renderer.state.rhi->newBuffer(QRhiBuffer::Static, flags, newCap);
+    buf->setName(name);
+    // QRhi::create() returns false on driver-level allocation failure
+    // (out of VRAM, exceeds maxBufferSize, signed-32-bit overflow in
+    // the backend). Without this check we'd publish a zombie wrapper
+    // whose underlying VkBuffer/D3D buffer is null; uploadStaticBuffer
+    // becomes a silent no-op and the GPU sees zero-filled memory at
+    // every read. That's exactly the "all splats collapse to origin"
+    // signature in the 3DGS pipeline. Surface the failure loudly.
+    const bool ok = buf->create();
+    BUFTRACE() << "ScenePreprocessor::growBuf name=" << name
+               << " old=" << (void*)old
+               << " new=" << (void*)buf
+               << " cap=" << (qint64)cap << "->" << (qint64)newCap
+               << " need=" << (qint64)need
+               << " ok=" << ok;
+    if(!ok)
+    {
+      qWarning() << "ScenePreprocessor::growBuf:" << name
+                 << "create() FAILED at cap=" << (qint64)newCap
+                 << "(need=" << (qint64)need
+                 << "). Driver likely refused the allocation —"
+                    " too large, OOM, or hit a backend size limit."
+                    " Downstream reads will return zeros.";
+    }
+    else
+    {
+      // Zero-fill the freshly allocated buffer. Vulkan does NOT
+      // zero-initialise new VkBuffers — the underlying device-memory
+      // page contains whatever was there before. For sparse-uploaded
+      // SSBOs (per_draws padding past drawCount, world_transforms
+      // unused arena slots, etc.) the un-touched bytes would otherwise
+      // be read by shaders (especially when an indexer like
+      // PerDraw.transform_slot points at a slot the producer hasn't
+      // populated this frame) and feed garbage into the pipeline.
+      // After resize, each fresh VkBuffer gets a different page →
+      // wildly different visual results per resize. RhiClearBuffer
+      // pulls the zero source bytes from a thread-local pool — no
+      // per-call std::vector<char>(newCap, 0) allocation.
+      RhiClearBuffer::clearBuffer(
+          *renderer.state.rhi, res, buf, 0, (quint32)newCap);
+    }
+    cap = newCap;
+    return true;
+  }
+
+  // Resolve a material_component pointer to its Material-arena slot
+  // index. Producer-authored materials carry a live raw_slot; loader
+  // materials get one allocated in m_loaderMaterialSlots. Returns 0
+  // when no slot is found — matches an unused arena entry, so shaders
+  // fall back to a default-initialised MaterialGPU rather than reading
+  // undefined bytes.
+  //
+  // Task 28a arena-direct path: this is the value stamped into
+  // `PerDrawGPU.material_index`, NOT the scene.state->materials index.
+  // Both the fast-path per_draws pack (update()) and the full-rebuild
+  // pack (rebuildMDI) must use this helper so the arena slot index is
+  // consistent across meshes-changed and meshes-unchanged paths.
+  uint32_t arenaSlotForMaterial(const ossia::material_component* mat) const noexcept
+  {
+    if(!mat || !m_registry)
+      return 0u;
+    if(m_registry->isLive(mat->raw_slot))
+      return mat->raw_slot.internal_index;
+    auto it = m_loaderMaterialSlots.find(mat);
+    if(it != m_loaderMaterialSlots.end() && it->second.valid())
+      return it->second.slot_index;
+    return 0u;
+  }
+
+  // Resolve a stable id for an instance prototype. Producers SHOULD stamp
+  // mesh_primitive::stable_id at construction (loaders do, PBRMesh does);
+  // when they don't (notably Threedim::Primitive routed through
+  // halp::geometry → mesh_component::legacy_geometry, which carries no
+  // primitive list at all and is bridged into a synthesized primitive
+  // upstream), we mint our own id keyed on the mesh_component pointer
+  // — stable across frames as long as the producer re-emits the same
+  // shared_ptr, which the Phase-1 identity-caching pattern enforces.
+  uint64_t resolvePrototypeStableId(
+      const ossia::mesh_component* mc,
+      const ossia::mesh_primitive& prim) noexcept
+  {
+    if(prim.stable_id != 0)
+      return prim.stable_id;
+    if(!mc)
+      return reinterpret_cast<uint64_t>(&prim);
+    auto [it, inserted] = m_protoStableIds.emplace(mc, 0u);
+    if(inserted)
+      it->second = ossia::mint_stable_id();
+    return it->second;
+  }
+
+  // MDI rebuild: concatenate CPU-backed legacy_geometry meshes into shared
+  // vertex / index buffers + emit one output geometry with indirect draw
+  // metadata. Draws whose source is GPU-backed or uses non-standard formats
+  // are skipped with a warning (they can be rendered through per-mesh mode).
+  //
+  // Plan 09 S4 integration (Wave 1): the MeshArenaManager's slab lifecycle
+  // is exercised here — `acquireMeshSlab` + `markMeshSlabSeen` per-draw,
+  // `sweepMeshSlabs` at the end. Slabs are allocated, their offsets are
+  // available, but the concat-and-bulk-upload path below still runs
+  // unchanged: byte-identical rendering is the Wave 1 acceptance criterion.
+  //
+  // TODO (S4 full migration, follow-up): replace `uploadStaticBuffer` at
+  // offset 0 over concatenated ACC vectors with per-slab
+  // `registry.uploadMeshStream(slab, Stream, bytes, size)` calls, gated
+  // by `slab->freshly_allocated`. Output geometry's vertex/index buffer
+  // bindings switch from `m_mdi.positions` to
+  // `registry.meshStreamBuffer(MeshStream::Positions)`. indirect_draw_cmds
+  // entries take their `baseVertex` / `firstIndex` from the slab's
+  // stream offsets. GPU-to-GPU copies (m_pendingGpuCopies) point at
+  // slab offsets too. Net effect: adding one mesh uploads only that
+  // mesh's bytes; no scene-wide reconcat.
+  // Primitive-cloud branch — buckets fs.primitive_clouds by format_id
+  // and emits one indirect-draw geometry per bucket. Each bucket
+  // geometry is appended to m_outputSpec.meshes after the mesh MDI
+  // entry (if any). Per bucket emits:
+  //   - one auxiliary SSBO `raw_splats` (concatenation of cloud
+  //     raw_data buffers; same row stride across the bucket's clouds)
+  //   - one auxiliary SSBO `cloud_meta` (CloudMetaGPU[] mirroring
+  //     PerDrawGPU's model[16] + transform_slot pattern)
+  //   - one auxiliary SSBO `cloud_id_lookup` (uint per primitive ->
+  //     index into cloud_meta)
+  //   - one indirect cmd buffer {vertex_count=6, instance_count=Σ
+  //     primitive_counts, ...} so RawRaster's existing m_mesh->draw()
+  //     path picks up the draw via cb.drawIndirect or the cpu_draw
+  //     fallback.
+  //
+  // The format's first CSF stage reads `raw_splats` via AUXILIARY
+  // LAYOUT (no per-column SSBO bindings, so descriptor budget stays
+  // tight on integrated Metal — see .claude/PRIMITIVE-CLOUD-PLAN.md).
+  void rebuildPrimitiveClouds(
+      RenderList& renderer, QRhiResourceUpdateBatch& res,
+      const FlatScene& fs)
+  {
+    ++m_primitiveCloudFrame;
+    if(fs.primitive_clouds.empty())
+    {
+      // No clouds this frame — keep buckets around for one frame in
+      // case the scene briefly goes empty during a graph rebuild, but
+      // the persistent buffers are released by releaseBuffer() when
+      // the renderer torn down. Stale eviction only fires when the
+      // primitive_clouds list is non-empty (below).
+      return;
+    }
+
+    // Bucket the entries. flat_map<bucket_key, vector<entry index>>.
+    // bucket_key was already chosen by the visitor: hash(format_id) or
+    // stable_id when format_id is empty (each unformatted cloud
+    // becomes its own bucket).
+    struct Bucket
+    {
+      uint32_t bucket_key;
+      ossia::small_vector<const FlatScene::PrimitiveCloudDraw*, 4> draws;
+      uint64_t total_primitives{};
+      uint32_t row_stride{};
+      int64_t  raw_splats_bytes{};
+    };
+    ossia::flat_map<uint32_t, Bucket> buckets;
+
+    for(const auto& d : fs.primitive_clouds)
+    {
+      if(!d.cloud || d.cloud->primitive_count == 0)
+        continue;
+      // Bucket by format_id when set, else by cloud's address (stable
+      // pointer keyed bucket). Mirrors the visitor's intent. Hash matches
+      // the canonical filter_tag stamp (ossia::hash_string truncated to
+      // 32 bits) so a downstream FlattenedSceneFilterNode "format_id ==
+      // match_str" route lines up byte-for-byte with this bucket key.
+      uint32_t key = 0;
+      if(!d.cloud->format_id.empty())
+      {
+        key = (uint32_t)ossia::hash_string(d.cloud->format_id);
+      }
+      else
+      {
+        key = (uint32_t)((uintptr_t)d.cloud.get() & 0xffffffffu);
+      }
+
+      auto& b = buckets[key];
+      if(b.draws.empty())
+      {
+        b.bucket_key = key;
+        b.row_stride = d.cloud->row_stride;
+      }
+      else if(b.row_stride != d.cloud->row_stride)
+      {
+        // Row-stride mismatch in a same-key bucket: skip the
+        // mismatched cloud rather than corrupt the concat. Indicates
+        // a tagging error in the producer.
+        qWarning() << "ScenePreprocessor::rebuildPrimitiveClouds: "
+                      "row_stride mismatch within bucket"
+                   << QString::fromStdString(d.cloud->format_id)
+                   << " expected" << b.row_stride
+                   << "got" << d.cloud->row_stride;
+        continue;
+      }
+      b.draws.push_back(&d);
+      b.total_primitives += d.cloud->primitive_count;
+    }
+
+    // Drop buckets whose key did not appear this frame.
+    for(auto it = m_primitiveCloudBuckets.begin();
+        it != m_primitiveCloudBuckets.end();)
+    {
+      if(buckets.find(it->first) == buckets.end())
+      {
+        auto& bb = it->second;
+        if(bb.raw_splats)       renderer.releaseBuffer(bb.raw_splats);
+        if(bb.cloud_meta)       renderer.releaseBuffer(bb.cloud_meta);
+        if(bb.cloud_id_lookup)  renderer.releaseBuffer(bb.cloud_id_lookup);
+        if(bb.indirect)         renderer.releaseBuffer(bb.indirect);
+        it = m_primitiveCloudBuckets.erase(it);
+      }
+      else
+      {
+        ++it;
+      }
+    }
+
+    using UF = QRhiBuffer::UsageFlags;
+
+    // Lazily ensure m_outputSpec.meshes exists so we can append.
+    if(!m_outputSpec.meshes)
+      m_outputSpec.meshes = std::make_shared<ossia::mesh_list>();
+    if(!m_outputSpec.filters)
+      m_outputSpec.filters = std::make_shared<ossia::geometry_filter_list>();
+
+    // Cow if shared with downstream — the mesh MDI rebuilds via
+    // make_shared<mesh_list>() so the typical state is non-shared
+    // here. If a downstream reader is holding the previous list, we
+    // need a fresh one to avoid mutating it.
+    if(m_outputSpec.meshes.use_count() > 1)
+    {
+      auto fresh = std::make_shared<ossia::mesh_list>();
+      fresh->meshes = m_outputSpec.meshes->meshes;
+      fresh->dirty_index = m_outputSpec.meshes->dirty_index;
+      m_outputSpec.meshes = std::move(fresh);
+    }
+
+    auto wrapGpu = [](QRhiBuffer* b, int64_t size) {
+      ossia::geometry::gpu_buffer gb;
+      gb.handle = b;
+      gb.byte_size = size;
+      return ossia::geometry::buffer{.data = gb, .dirty = true};
+    };
+
+    bool any_emitted = false;
+    for(auto& [key, b] : buckets)
+    {
+      if(b.draws.empty() || b.total_primitives == 0 || b.row_stride == 0)
+        continue;
+
+      auto& bb = m_primitiveCloudBuckets[key];
+      bb.row_stride = b.row_stride;
+      bb.last_seen_frame = m_primitiveCloudFrame;
+
+      // ── Indirect-draw command shape (used both for size accounting
+      // upfront and for the CPU build inside the upload guard).
+      struct IndirectCmd
+      {
+        uint32_t indexOrVertexCount;
+        uint32_t instanceCount;
+        uint32_t firstIndexOrVertex;
+        int32_t  baseVertex; // for indexed draws — unused (vertex_count path)
+        uint32_t baseInstance;
+      };
+
+      // ── Upfront sizing (needed by growBuf AND by the per-bucket
+      // geometry construction further down, which references the
+      // owned buffer pointers regardless of upload/skip). raw_splats
+      // needs VertexBuffer alongside StorageBuffer because the bucket
+      // exposes the buffer through both paths: as an AUXILIARY SSBO
+      // (CSF reads the row layout via std430) AND as a per-vertex
+      // ATTRIBUTE buffer (Raw Raster's setVertexInput pulls every
+      // g.input entry — even on procedural draws — and Vulkan
+      // requires VK_BUFFER_USAGE_VERTEX_BUFFER_BIT for vertex
+      // bindings).
+      const int64_t rawBytes
+          = (int64_t)b.total_primitives * (int64_t)b.row_stride;
+      const uint32_t bucketCloudCount = (uint32_t)b.draws.size();
+      const int64_t cmBytes
+          = (int64_t)bucketCloudCount * (int64_t)sizeof(CloudMetaGPU);
+      const int64_t lookupBytes
+          = (int64_t)b.total_primitives * (int64_t)sizeof(uint32_t);
+      const int64_t icBytes = (int64_t)sizeof(IndirectCmd);
+
+      growBuf(renderer, res,bb.raw_splats, bb.rawSplatsCap, rawBytes,
+              UF(QRhiBuffer::StorageBuffer | QRhiBuffer::VertexBuffer),
+              "ScenePreprocessor::cloud.raw_splats");
+      growBuf(renderer, res,bb.cloud_meta, bb.cloudMetaCap, cmBytes,
+              UF(QRhiBuffer::StorageBuffer),
+              "ScenePreprocessor::cloud.cloud_meta");
+      growBuf(renderer, res,bb.cloud_id_lookup, bb.cloudIdLookupCap, lookupBytes,
+              UF(QRhiBuffer::StorageBuffer | QRhiBuffer::VertexBuffer),
+              "ScenePreprocessor::cloud.cloud_id_lookup");
+#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0)
+      growBuf(renderer, res,bb.indirect, bb.indirectCap, icBytes,
+              UF(QRhiBuffer::StorageBuffer | QRhiBuffer::IndirectBuffer),
+              "ScenePreprocessor::cloud.indirect");
+#else
+      growBuf(renderer, res,bb.indirect, bb.indirectCap, icBytes,
+              UF(QRhiBuffer::StorageBuffer),
+              "ScenePreprocessor::cloud.indirect");
+#endif
+
+      // ── Phase-1 delta-update fingerprint ─────────────────────────
+      // Hash everything the four GPU buffers depend on. When this
+      // matches the last frame's value, the buckets are byte-equal
+      // to what the previous frame uploaded — the per-frame CPU
+      // concat + uploadStaticBuffer ×4 is pure waste, skip it.
+      // For the user's "drop a 1 GB PLY into a static scene" case
+      // this brings raw_splats per-frame work from ~720 MB/s of GPU
+      // memcpy down to zero. The growBuf calls above already
+      // short-circuited (cap >= need), so on the steady state the
+      // entire bucket loop becomes O(draws.size()) hashing.
+      uint64_t fp = 0;
+      ossia::hash_combine(fp, (uint64_t)bucketCloudCount);
+      ossia::hash_combine(fp, (uint64_t)b.row_stride);
+      ossia::hash_combine(fp, (uint64_t)b.total_primitives);
+      for(const auto* d : b.draws)
+      {
+        const auto* raw = d->cloud->raw_data.get();
+        ossia::hash_combine(fp, (uint64_t)(uintptr_t)raw);
+        // raw_data carries an explicit content_hash for fast
+        // diff-skip when the producer can stamp one (PlyParser
+        // sets it from the storage pointer); fall back to
+        // dirty_index for producers that don't.
+        const uint64_t content_id
+            = raw ? (raw->content_hash != 0
+                         ? raw->content_hash
+                         : (uint64_t)raw->dirty_index)
+                  : 0u;
+        ossia::hash_combine(fp, content_id);
+        ossia::hash_combine(fp, (uint64_t)d->cloud->primitive_count);
+        ossia::hash_combine(fp, (uint64_t)d->transform_slot);
+        // worldTransform: 16 floats × 4 = 64 bytes column-major.
+        ossia::hash_combine(
+            fp,
+            ossia::hash_bytes(d->worldTransform.constData(), 64));
+      }
+
+      // 0 = "never uploaded yet, force the first frame's upload
+      // regardless of fingerprint matching". growBuf may also have
+      // just allocated a fresh VkBuffer (cap < need), in which case
+      // the old data is gone; the fingerprint differs from frame N-1
+      // because the size constraint changed (total_primitives or
+      // row_stride is part of fp). Either way the !unchanged branch
+      // runs and we re-upload.
+      const bool unchanged = (bb.content_fingerprint != 0)
+                             && (bb.content_fingerprint == fp)
+                             && (bb.raw_splats != nullptr);
+
+      if(!unchanged)
+      {
+        // ── raw_splats: concatenation of all clouds' raw bytes ────────
+        // Bucket-internal format_id mismatch was rejected above so all
+        // clouds in this bucket share row_stride.
+        std::vector<uint8_t> concat;
+        concat.resize((std::size_t)rawBytes);
+        uint8_t* dst = concat.data();
+        for(const auto* d : b.draws)
+        {
+          const auto& br = d->cloud->raw_data;
+          if(!br) continue;
+          const int64_t bytes
+              = (int64_t)d->cloud->primitive_count * (int64_t)b.row_stride;
+          if(auto* cpu = ossia::get_if<ossia::buffer_data>(&br->resource))
+          {
+            if(cpu->data && cpu->byte_size >= bytes)
+            {
+              std::memcpy(dst, cpu->data.get(), (std::size_t)bytes);
+            }
+            else
+            {
+              std::memset(dst, 0, (std::size_t)bytes);
+            }
+          }
+          else
+          {
+            // GPU-resident raw_data: out of scope for v1 (would need a
+            // GPU-to-GPU copy via copyBuffer). Zero-fill so the bucket
+            // is at least well-defined. See PRIMITIVE-CLOUD-ARENA-DESIGN.md
+            // for the planned Phase-2 path (slot-based GPU-resident
+            // producers writing into the per-format arena directly).
+            std::memset(dst, 0, (std::size_t)bytes);
+          }
+          dst += bytes;
+        }
+        res.uploadStaticBuffer(bb.raw_splats, 0, rawBytes, concat.data());
+
+        // ── cloud_meta + cloud_id_lookup ─────────────────────────────
+        std::vector<CloudMetaGPU> cmData;
+        cmData.resize(bucketCloudCount);
+
+        std::vector<uint32_t> lookup;
+        lookup.resize((std::size_t)b.total_primitives);
+
+        uint32_t prim_offset = 0;
+        uint32_t prim_lookup_pos = 0;
+        for(uint32_t ci = 0; ci < bucketCloudCount; ++ci)
+        {
+          const auto* d = b.draws[ci];
+          CloudMetaGPU& gm = cmData[ci];
+
+          // Composed world matrix from the FlattenVisitor walk
+          // (parentWorld). QMatrix4x4 is column-major and we want a
+          // column-major float[16] — its constData() returns column-
+          // major memory directly.
+          const float* m = d->worldTransform.constData();
+          for(int k = 0; k < 16; ++k) gm.model[k] = m[k];
+
+          // Per-cloud world-space AABB: 8-corner walk of the local
+          // bounds through worldTransform. Mirrors the bucket-bounds
+          // loop below at :~1776, but kept per-cloud so format CSFs
+          // can frustum-cull individual clouds inside a bucket.
+          const auto& lb = d->cloud->bounds;
+          if(lb.empty())
+          {
+            // Sentinel: empty bounds -> produce an inverted AABB so
+            // any frustum test in the shader trivially marks it
+            // visible (consumers can also check for the inversion).
+            gm.bounds_min[0] = gm.bounds_min[1] = gm.bounds_min[2] = 1.f;
+            gm.bounds_max[0] = gm.bounds_max[1] = gm.bounds_max[2] = -1.f;
+          }
+          else
+          {
+            const QMatrix4x4& W = d->worldTransform;
+            float minx = std::numeric_limits<float>::infinity();
+            float miny = minx, minz = minx;
+            float maxx = -minx, maxy = -minx, maxz = -minx;
+            for(int corner = 0; corner < 8; ++corner)
+            {
+              const float x = (corner & 1) ? lb.max[0] : lb.min[0];
+              const float y = (corner & 2) ? lb.max[1] : lb.min[1];
+              const float z = (corner & 4) ? lb.max[2] : lb.min[2];
+              const QVector3D p = W.map(QVector3D(x, y, z));
+              minx = std::min(minx, p.x()); maxx = std::max(maxx, p.x());
+              miny = std::min(miny, p.y()); maxy = std::max(maxy, p.y());
+              minz = std::min(minz, p.z()); maxz = std::max(maxz, p.z());
+            }
+            gm.bounds_min[0] = minx; gm.bounds_min[1] = miny; gm.bounds_min[2] = minz;
+            gm.bounds_max[0] = maxx; gm.bounds_max[1] = maxy; gm.bounds_max[2] = maxz;
+          }
+          gm.bounds_min[3] = 0.f;
+          gm.bounds_max[3] = 0.f;
+
+          gm.primitive_offset    = prim_offset;
+          gm.primitive_count     = (uint32_t)d->cloud->primitive_count;
+          gm.transform_slot      = d->transform_slot; // 0xFFFFFFFFu = none
+          gm.format_param_index  = 0; // unused for v1
+          gm._pad[0] = gm._pad[1] = gm._pad[2] = gm._pad[3] = 0;
+
+          // Fill lookup[prim_offset..prim_offset+count] = ci
+          for(uint32_t p = 0; p < gm.primitive_count; ++p)
+            lookup[prim_lookup_pos + p] = ci;
+          prim_lookup_pos += gm.primitive_count;
+          prim_offset    += gm.primitive_count;
+        }
+
+        res.uploadStaticBuffer(
+            bb.cloud_meta, 0, cmBytes, cmData.data());
+        res.uploadStaticBuffer(
+            bb.cloud_id_lookup, 0, lookupBytes, lookup.data());
+
+        // ── indirect_draw_cmds: one cmd, vertex_count=N (one slot per
+        // primitive). The bucket geometry is a flat point cloud — the
+        // CSF stage downstream (e.g. 01_Decode for 3dgs.classic) reads
+        // `$VERTEX_COUNT_geoIn = N` and emits the instanced 6×N quad
+        // topology its draw stage expects. Format CSF chains may rewrite
+        // this cmd post-cull to shrink the active set; the unculled
+        // total is the safe default.
+        const IndirectCmd cmd{
+            /*indexOrVertexCount*/ (uint32_t)b.total_primitives,
+            /*instanceCount*/      1u,
+            /*firstIndexOrVertex*/ 0u,
+            /*baseVertex*/         0,
+            /*baseInstance*/       0u};
+        res.uploadStaticBuffer(bb.indirect, 0, icBytes, &cmd);
+
+        bb.content_fingerprint = fp;
+      }
+
+      // ── Build the bucket geometry ─────────────────────────────────
+      ossia::geometry g;
+      const int rawSplatsBufIdx     = (int)g.buffers.size();
+      g.buffers.push_back(wrapGpu(bb.raw_splats, rawBytes));
+      const int cloudMetaBufIdx     = (int)g.buffers.size();
+      g.buffers.push_back(wrapGpu(bb.cloud_meta, cmBytes));
+      const int cloudLookupBufIdx   = (int)g.buffers.size();
+      g.buffers.push_back(wrapGpu(bb.cloud_id_lookup, lookupBytes));
+      const int indirectBufIdx      = (int)g.buffers.size();
+      g.buffers.push_back(wrapGpu(bb.indirect, icBytes));
+
+      g.auxiliary.push_back({
+          .name = "raw_splats",
+          .buffer = rawSplatsBufIdx,
+          .byte_offset = 0, .byte_size = rawBytes});
+      g.auxiliary.push_back({
+          .name = "cloud_meta",
+          .buffer = cloudMetaBufIdx,
+          .byte_offset = 0, .byte_size = cmBytes});
+
+      // Expose the cloud→primitive mapping as a per-vertex ATTRIBUTE
+      // (one uint per primitive), not as AUXILIARY. The CSF binder
+      // converts ATTRIBUTES into named SSBOs accessible as
+      // `geo_cloud_id_in[idx]`, and — crucially — the presence of a
+      // read_only ATTRIBUTE on the input geometry resource is what
+      // makes the CSF node *create an input port*. Without at least
+      // one such attribute the node has no way to be wired up.
+      ossia::geometry::binding cidBinding{};
+      cidBinding.byte_stride    = 4;
+      cidBinding.classification = ossia::geometry::binding::per_vertex;
+      const int cidBindingIdx = (int)g.bindings.size();
+      g.bindings.push_back(cidBinding);
+
+      struct ossia::geometry::input cidInput{};
+      cidInput.buffer      = cloudLookupBufIdx;
+      cidInput.byte_offset = 0;
+      g.input.push_back(cidInput);
+
+      ossia::geometry::attribute cidAttr{};
+      cidAttr.binding     = cidBindingIdx;
+      cidAttr.location    = 0;
+      cidAttr.format      = ossia::geometry::attribute::uint1;
+      cidAttr.byte_offset = 0;
+      cidAttr.semantic    = ossia::attribute_semantic::custom;
+      cidAttr.name        = "cloud_id";
+      g.attributes.push_back(cidAttr);
+
+      // When the producer named a struct type for the per-row payload
+      // (e.g. PlyParser sets "Splat3DGS" for 3dgs.classic), expose
+      // raw_splats *also* as a per-vertex ATTRIBUTE of format
+      // user_struct. The CSF binder generates a `Splat3DGS
+      // geo_splat_in[]` SSBO declaration matching the consumer's
+      // `TYPES.Splat3DGS` block, so shaders read rows as
+      // `ISF_READ(geoIn, splat)[idx].field` directly. The legacy
+      // raw_splats AUXILIARY entry above stays so older presets keep
+      // working through the migration; once all bundled presets move
+      // to TYPES the AUXILIARY emit can drop.
+      const auto* rep = b.draws[0]->cloud.get();
+      if(rep && !rep->struct_type_name.empty())
+      {
+        ossia::geometry::binding splatBinding{};
+        splatBinding.byte_stride    = (uint32_t)b.row_stride;
+        splatBinding.classification = ossia::geometry::binding::per_vertex;
+        const int splatBindingIdx = (int)g.bindings.size();
+        g.bindings.push_back(splatBinding);
+
+        struct ossia::geometry::input splatInput{};
+        splatInput.buffer      = rawSplatsBufIdx;
+        splatInput.byte_offset = 0;
+        g.input.push_back(splatInput);
+
+        ossia::geometry::attribute splatAttr{};
+        splatAttr.binding           = splatBindingIdx;
+        splatAttr.location          = 1;
+        splatAttr.format            = ossia::geometry::attribute::user_struct;
+        splatAttr.element_byte_size = (uint32_t)b.row_stride;
+        splatAttr.user_type_name    = rep->struct_type_name;
+        splatAttr.byte_offset       = 0;
+        splatAttr.semantic          = ossia::attribute_semantic::custom;
+        splatAttr.name              = "splat";
+        g.attributes.push_back(splatAttr);
+      }
+
+      // Forward the camera UBO (uploaded earlier in update() before
+      // rebuildMDI) so cloud-format CSF stages can read view /
+      // projection / cameraPosition / renderSize without manual
+      // wiring. Same name ("camera") that mesh shaders use, so a
+      // single GLSL UBO declaration works for both paths.
+      if(m_camerasBuffer)
+      {
+        const int camBufIdx = (int)g.buffers.size();
+        g.buffers.push_back(
+            wrapGpu(m_camerasBuffer, (int64_t)sizeof(CameraUBOData)));
+        g.auxiliary.push_back({
+            .name = "camera",
+            .buffer = camBufIdx,
+            .byte_offset = 0,
+            .byte_size = (int64_t)sizeof(CameraUBOData)});
+      }
+      if(m_sceneCountsBuffer)
+      {
+        const int countsBufIdx = (int)g.buffers.size();
+        g.buffers.push_back(
+            wrapGpu(m_sceneCountsBuffer, (int64_t)sizeof(SceneCountsUBO)));
+        g.auxiliary.push_back({
+            .name = "scene_counts",
+            .buffer = countsBufIdx,
+            .byte_offset = 0,
+            .byte_size = (int64_t)sizeof(SceneCountsUBO)});
+      }
+
+      // Indirect draw shape: vertex_count=N points, instance_count=1.
+      // The bucket is a flat point cloud — instancing is introduced by
+      // the format's CSF preprocessor (which converts each input
+      // "vertex" into a 6-vertex×N-instance quad topology its raster
+      // stage consumes).
+      ossia::geometry::gpu_buffer ic_gpu;
+      ic_gpu.handle = bb.indirect;
+      ic_gpu.byte_size = icBytes;
+      g.indirect_count = ic_gpu;
+
+      // Mirror the IndirectCmd shape uploaded inside the !unchanged guard
+      // (or kept stable from a previous frame). Values are derived directly
+      // from b.total_primitives + the bucket's "one cmd, instance=1" shape;
+      // re-deriving here avoids hoisting `cmd` itself out of the upload
+      // guard just to read its fields.
+      g.cpu_draw_commands.push_back({
+          .index_or_vertex_count = (uint32_t)b.total_primitives,
+          .instance_count        = 1u,
+          .first_index_or_vertex = 0u,
+          .base_vertex           = 0,
+          .first_instance        = 0u});
+
+      g.vertices  = (int)b.total_primitives;
+      g.instances = 1;
+      g.topology  = ossia::geometry::points;
+      g.cull_mode = ossia::geometry::none;
+      g.front_face = ossia::geometry::counter_clockwise;
+      // Splats need alpha-blend; tag the geometry so a downstream
+      // RawRaster picks the right pipeline state. The format's actual
+      // PIPELINE_STATE in its .frag overrides this if more specific.
+      g.blend = ossia::geometry::blend_premultiplied_alpha;
+      g.depth_write = false;
+
+      // Surface format_id as filter_tag (rapidhash truncated to 32 bits)
+      // so a downstream FlattenedSceneFilterNode in "format_id ==
+      // match_str" mode can route this bucket to its format-specific
+      // shader chain. Same hash that the bucket key above uses, so the
+      // producer-side bucketing and the consumer-side filter agree
+      // byte-for-byte. Empty format_id leaves filter_tag at 0 (the
+      // "untagged" sentinel — string-match mode treats both as "no
+      // tag" and matches when match_str is also empty).
+      if(rep && !rep->format_id.empty())
+        g.filter_tag = (uint32_t)ossia::hash_string(rep->format_id);
+
+      // Bounds: union of cloud world-space AABBs.
+      ossia::aabb worldBounds{};
+      worldBounds.min[0] = worldBounds.min[1] = worldBounds.min[2] = 1.f;
+      worldBounds.max[0] = worldBounds.max[1] = worldBounds.max[2] = -1.f;
+      for(const auto* d : b.draws)
+      {
+        const auto& lb = d->cloud->bounds;
+        if(lb.empty())
+          continue;
+        // 8 corners of the local AABB transformed to world space.
+        const QMatrix4x4& W = d->worldTransform;
+        for(int corner = 0; corner < 8; ++corner)
+        {
+          const float x = (corner & 1) ? lb.max[0] : lb.min[0];
+          const float y = (corner & 2) ? lb.max[1] : lb.min[1];
+          const float z = (corner & 4) ? lb.max[2] : lb.min[2];
+          // Use QMatrix4x4::map() (inline member, no QtGui operator
+          // export needed). Equivalent to (W * vec4(x,y,z,1)).xyz.
+          const QVector3D p = W.map(QVector3D(x, y, z));
+          worldBounds.expand(p.x(), p.y(), p.z());
+        }
+      }
+      if(!worldBounds.empty())
+      {
+        g.bounds.min[0] = worldBounds.min[0];
+        g.bounds.min[1] = worldBounds.min[1];
+        g.bounds.min[2] = worldBounds.min[2];
+        g.bounds.max[0] = worldBounds.max[0];
+        g.bounds.max[1] = worldBounds.max[1];
+        g.bounds.max[2] = worldBounds.max[2];
+      }
+
+      m_outputSpec.meshes->meshes.push_back(std::move(g));
+      any_emitted = true;
+    }
+
+    if(any_emitted)
+    {
+      m_outputSpec.meshes->dirty_index += 1;
+    }
+  }
+
+  void rebuildMDI(
+      RenderList& renderer, QRhiResourceUpdateBatch& res, const FlatScene& fs,
+      const std::vector<uint32_t>& materialTagHashes)
+  {
+    // Plan 09 S4 (full migration). Per-mesh slab allocation replaces
+    // the old concat-and-bulk-upload path. Flow per draw:
+    //   1. acquireMeshSlab(stable_id, vc, ic) — hit OR fresh allocation
+    //      into the 5 per-stream OffsetAllocators in GpuResourceRegistry.
+    //   2. If slab.freshly_allocated: extract CPU bytes (or queue a GPU
+    //      copy for GPU-backed sources) and uploadMeshStream into the
+    //      slab's byte offset on each stream. Existing slabs: zero upload.
+    //   3. indirect_draw_cmds baseVertex / firstIndex come from the slab's
+    //      byte offsets divided by stream stride.
+    //   4. markMeshSlabSeen so the per-frame sweep doesn't reclaim it.
+    // The grace queue (2 frames by default) prevents the arena from
+    // returning a live slab's offset to another allocation while an
+    // in-flight draw still references it.
+    //
+    // Output layout unchanged from Wave 1's byte-identical state: four
+    // vertex bindings (pos/nrm/uv/tan) + one index buffer + all the
+    // scene auxiliaries. Consumer shaders see identical output shape.
+    //
+    // What's NOT in this function anymore:
+    //  - Concatenated CPU byte vectors (acc.positions / .normals / …).
+    //  - Running baseVertex / firstIndex counters.
+    //  - uploadStaticBuffer(offset=0, totalBytes) for vertex/index streams
+    //    — those buffers are registry-owned; we write per-slab only.
+    //  - growBuf for vertex/index streams — pre-sized at registry init.
+    // What IS here: the per_draws + indirect_draw_cmds upload (small
+    // preprocessor-owned SSBOs), per-draw metadata pack, output
+    // geometry construction.
+    auto& rhi = *renderer.state.rhi;
+    const uint32_t current_frame = (uint32_t)renderer.frame;
+
+    struct Acc
+    {
+      std::vector<PerDrawGPU> perDraws;
+      std::vector<PerDrawBoundsGPU> perDrawBounds;
+      struct IndirectCmd
+      {
+        uint32_t indexCount, instanceCount, firstIndex;
+        int32_t baseVertex;
+        uint32_t baseInstance;
+      };
+      std::vector<IndirectCmd> indirectCmds;
+    } acc;
+
+    acc.perDraws.reserve(std::max(m_lastDrawCount, fs.draws.size()));
+    acc.perDrawBounds.reserve(std::max(m_lastDrawCount, fs.draws.size()));
+    acc.indirectCmds.reserve(std::max(m_lastDrawCount, fs.draws.size()));
+
+    // Concat-offsets for joint matrices across all skeletons in this
+    // flatten. skinJointOffsets[k] = sum of joint counts for skins < k.
+    // Stamped into PerDrawGPU.skeleton_offset so a future consolidated
+    // `joint_matrices` SSBO (single buffer across all skeletons) is a
+    // drop-in change on the shader side — offsets already point at the
+    // correct record. 0xFFFFFFFF sentinel is written for unskinned
+    // draws.
+    std::vector<uint32_t> skinJointOffsets;
+    skinJointOffsets.reserve(fs.skins.size());
+    {
+      uint32_t running = 0;
+      for(const auto& sk : fs.skins)
+      {
+        skinJointOffsets.push_back(running);
+        running += (uint32_t)sk.joint_matrices.size();
+      }
+    }
+
+    // Reset pending GPU copies for this frame — populated below when a
+    // draw's attributes are GPU-resident; issued in runInitialPasses.
+    m_pendingGpuCopies.clear();
+
+    // Queue one copy op targeting a slab's byte offset in the arena
+    // stream. No accumulator pre-reservation here: dst_offset is the
+    // slab's allocator-assigned offset, not an accumulator-relative
+    // position.
+    auto queueSlabCopy = [&](MdiAttr attr, const GpuAttrView& view,
+                             int elem_size, int vertex_count,
+                             uint32_t dst_slab_offset) {
+      PendingGpuCopy op;
+      op.attr = attr;
+      op.src = view.buf;
+      op.src_offset = view.src_offset;
+      op.dst_offset = (int)dst_slab_offset;
+      op.vertex_count = vertex_count;
+      op.src_stride = view.byte_stride;
+      op.element_size = elem_size;
+      op.size = (op.src_stride == 0 || op.src_stride == elem_size)
+                    ? vertex_count * elem_size
+                    : elem_size; // per-vertex path computes size each iter
+      m_pendingGpuCopies.push_back(op);
+    };
+
+    // Scratch CPU buffers reused across draws to hold the padded
+    // vec3→vec4 conversions for positions / normals and the fallback
+    // (1,0,0,1) tangents. Grow-only; never shrinks. Avoids re-allocating
+    // for each per-draw upload.
+    std::vector<std::byte> scratch;
+
+    uint32_t totalVertices = 0;
+    uint32_t totalIndices = 0;
+    bool warned_missing_stable_id = false;
+
+    using Stream = GpuResourceRegistry::MeshStream;
+
+    // Running cursor into the unified per-instance concat space. Each
+    // emitted indirect cmd consumes `instanceCount` contiguous slots and
+    // writes its own cmd-index into draw_ids[slot..slot+instanceCount-1].
+    // For regular fs.draws cmds (instanceCount=1) cmd_index == slot
+    // index. For fs.instances groups (instanceCount=N) cmd_index !=
+    // slot index, so the shader CANNOT use gl_BaseInstance/gl_DrawID to
+    // recover the cmd index — it reads the per-instance `draw_id`
+    // attribute that this cursor populates.
+    uint32_t slot_cursor = 0;
+
+    // Records of instance-group slot ranges so the post-loop CPU
+    // bookkeeping can pre-fill draw_ids and queue the GPU copies for
+    // upstream translation / color buffers into the right concat
+    // offsets without a second pass over fs.instances.
+    struct InstanceSlotRecord
+    {
+      uint32_t slot_base;
+      uint32_t count;
+      uint32_t cmd_index;
+      QRhiBuffer* src_translations;
+      uint32_t src_translation_offset;
+      uint32_t src_translation_stride;
+      QRhiBuffer* src_colors;
+      uint32_t src_color_offset;
+    };
+    std::vector<InstanceSlotRecord> instanceRecords;
+
+    // Shared per-cmd processor. Used by the fs.draws loop and the
+    // fs.instances loop. Performs:
+    //   - attribute extraction (CPU + GPU paths) from the wrapper
+    //     ossia::geometry
+    //   - slab acquire / per-stream upload (only on freshly_allocated)
+    //   - per_draws + per_draw_bounds push
+    //   - indirect cmd push with firstInstance = slot_cursor
+    //   - slot_cursor += instanceCount
+    // Returns the cmd_index that was emitted (== acc.indirectCmds.size()
+    // BEFORE the push, == sentinel if the cmd was skipped).
+    constexpr uint32_t kCmdSkipped = 0xFFFFFFFFu;
+    auto emitDraw = [&](
+        const ossia::geometry* mesh, uint64_t stable_id,
+        const QMatrix4x4& worldTransform,
+        const ossia::material_component* materialPtr,
+        int materialIndex, uint32_t transform_slot,
+        int skinIndex, const ossia::aabb& local_bounds,
+        uint32_t instanceCount) -> uint32_t
+    {
+      if(!mesh || mesh->vertices <= 0 || !m_registry || instanceCount == 0)
+        return kCmdSkipped;
+      if(stable_id == 0)
+      {
+        if(!warned_missing_stable_id)
+        {
+          qWarning() << "ScenePreprocessor::rebuildMDI: draw has no "
+                        "stable_id — synthesising from mesh pointer. "
+                        "Producer should stamp mesh_primitive::stable_id "
+                        "for cache stability.";
+          warned_missing_stable_id = true;
+        }
+        stable_id = (uint64_t)((uintptr_t)mesh)
+                    ^ ((uint64_t)mesh->vertices << 32)
+                    ^ (uint64_t)mesh->indices;
+        if(stable_id == 0)
+          stable_id = 1;
+      }
+
+      // CPU extraction — still the hot path for loaded glTF/FBX scenes.
+      auto pos = extractCpuAttribute<12>(*mesh, ossia::attribute_semantic::position);
+      auto nrm = extractCpuAttribute<12>(*mesh, ossia::attribute_semantic::normal);
+      auto uv  = extractCpuAttribute<8>(*mesh, ossia::attribute_semantic::texcoord0);
+      auto uv1 = extractCpuAttribute<8>(*mesh, ossia::attribute_semantic::texcoord1);
+      auto col = extractCpuAttribute<16>(*mesh, ossia::attribute_semantic::color0);
+      auto tan = extractCpuAttribute<16>(*mesh, ossia::attribute_semantic::tangent);
+
+      GpuAttrView gpu_pos, gpu_nrm, gpu_uv, gpu_tan;
+      if(pos.empty())
+        gpu_pos = extractGpuAttribute(*mesh, ossia::attribute_semantic::position);
+      if(nrm.empty())
+        gpu_nrm = extractGpuAttribute(*mesh, ossia::attribute_semantic::normal);
+      if(uv.empty())
+        gpu_uv = extractGpuAttribute(*mesh, ossia::attribute_semantic::texcoord0);
+      if(tan.empty())
+        gpu_tan = extractGpuAttribute(*mesh, ossia::attribute_semantic::tangent);
+
+      if(pos.empty() && !gpu_pos.buf)
+        return kCmdSkipped;
+
+      std::vector<uint32_t> idx;
+      if(mesh->indices > 0)
+      {
+        idx = extractCpuIndices(*mesh);
+        if(idx.empty())
+          return kCmdSkipped; // GPU-backed indices not yet supported.
+      }
+      else
+      {
+        idx.resize(mesh->vertices);
+        for(int v = 0; v < mesh->vertices; ++v)
+          idx[v] = (uint32_t)v;
+      }
+
+      const uint32_t drawIndexCount = (uint32_t)idx.size();
+      const int vc = mesh->vertices;
+
+      auto* slab = m_registry->acquireMeshSlab(
+          stable_id, (uint32_t)vc, drawIndexCount, current_frame);
+      if(!slab)
+        return kCmdSkipped;
+
+      m_registry->markMeshSlabSeen(stable_id, current_frame);
+
+      if(slab->freshly_allocated)
+      {
+        // ── Position ── vec3→vec4 padding when CPU-sourced.
+        const uint32_t posOff
+            = m_registry->meshSlabOffsetBytes(*slab, Stream::Positions);
+        if(!pos.empty())
+        {
+          scratch.assign(std::size_t(vc) * 16, std::byte{});
+          for(int v = 0; v < vc; ++v)
+            std::memcpy(scratch.data() + v * 16, pos.data() + v * 12, 12);
+          m_registry->uploadMeshStream(
+              res, *slab, Stream::Positions,
+              scratch.data(), (uint32_t)scratch.size());
+        }
+        else
+        {
+          queueSlabCopy(MdiAttr::Positions, gpu_pos, 16, vc, posOff);
+        }
+
+        // ── Normals ── vec3→vec4 padding; zero fallback when missing.
+        const uint32_t nrmOff
+            = m_registry->meshSlabOffsetBytes(*slab, Stream::Normals);
+        if(!nrm.empty())
+        {
+          scratch.assign(std::size_t(vc) * 16, std::byte{});
+          for(int v = 0; v < vc; ++v)
+            std::memcpy(scratch.data() + v * 16, nrm.data() + v * 12, 12);
+          m_registry->uploadMeshStream(
+              res, *slab, Stream::Normals,
+              scratch.data(), (uint32_t)scratch.size());
+        }
+        else if(gpu_nrm.buf)
+        {
+          queueSlabCopy(MdiAttr::Normals, gpu_nrm, 16, vc, nrmOff);
+        }
+        else
+        {
+          scratch.assign(std::size_t(vc) * 16, std::byte{});
+          m_registry->uploadMeshStream(
+              res, *slab, Stream::Normals,
+              scratch.data(), (uint32_t)scratch.size());
+        }
+
+        // ── Texcoords ── vec2; zero fallback when missing.
+        const uint32_t uvOff
+            = m_registry->meshSlabOffsetBytes(*slab, Stream::Texcoords);
+        if(!uv.empty())
+        {
+          m_registry->uploadMeshStream(
+              res, *slab, Stream::Texcoords,
+              uv.data(), (uint32_t)uv.size());
+        }
+        else if(gpu_uv.buf)
+        {
+          queueSlabCopy(MdiAttr::Texcoords, gpu_uv, 8, vc, uvOff);
+        }
+        else
+        {
+          scratch.assign(std::size_t(vc) * 8, std::byte{});
+          m_registry->uploadMeshStream(
+              res, *slab, Stream::Texcoords,
+              scratch.data(), (uint32_t)scratch.size());
+        }
+
+        // ── Tangents ── vec4; (1,0,0,1) fallback.
+        const uint32_t tanOff
+            = m_registry->meshSlabOffsetBytes(*slab, Stream::Tangents);
+        if(!tan.empty())
+        {
+          m_registry->uploadMeshStream(
+              res, *slab, Stream::Tangents,
+              tan.data(), (uint32_t)tan.size());
+        }
+        else if(gpu_tan.buf)
+        {
+          queueSlabCopy(MdiAttr::Tangents, gpu_tan, 16, vc, tanOff);
+        }
+        else
+        {
+          scratch.assign(std::size_t(vc) * 16, std::byte{});
+          float fb[4] = {1.f, 0.f, 0.f, 1.f};
+          for(int v = 0; v < vc; ++v)
+            std::memcpy(scratch.data() + v * 16, fb, 16);
+          m_registry->uploadMeshStream(
+              res, *slab, Stream::Tangents,
+              scratch.data(), (uint32_t)scratch.size());
+        }
+
+        // ── Colors ── vec4; (1,1,1,1) fallback.
+        if(!col.empty())
+        {
+          m_registry->uploadMeshStream(
+              res, *slab, Stream::Colors,
+              col.data(), (uint32_t)col.size());
+        }
+        else
+        {
+          scratch.assign(std::size_t(vc) * 16, std::byte{});
+          float fb[4] = {1.f, 1.f, 1.f, 1.f};
+          for(int v = 0; v < vc; ++v)
+            std::memcpy(scratch.data() + v * 16, fb, 16);
+          m_registry->uploadMeshStream(
+              res, *slab, Stream::Colors,
+              scratch.data(), (uint32_t)scratch.size());
+        }
+
+        // ── Texcoords1 ── vec2; zero fallback.
+        if(!uv1.empty())
+        {
+          m_registry->uploadMeshStream(
+              res, *slab, Stream::Texcoords1,
+              uv1.data(), (uint32_t)uv1.size());
+        }
+        else
+        {
+          scratch.assign(std::size_t(vc) * 8, std::byte{});
+          m_registry->uploadMeshStream(
+              res, *slab, Stream::Texcoords1,
+              scratch.data(), (uint32_t)scratch.size());
+        }
+
+        // ── Indices ──
+        m_registry->uploadMeshStream(
+            res, *slab, Stream::Indices,
+            idx.data(), (uint32_t)(idx.size() * 4));
+      }
+
+      // Per-draw GPU record.
+      PerDrawGPU pd{};
+      writeMat4(pd.model, worldTransform);
+      QMatrix4x4 nm = worldTransform.inverted().transposed();
+      nm.setColumn(3, QVector4D(0, 0, 0, 1));
+      nm.setRow(3, QVector4D(0, 0, 0, 1));
+      writeMat4(pd.normal, nm);
+      pd.material_index = arenaSlotForMaterial(materialPtr);
+      pd.tag_hash
+          = (materialIndex >= 0
+             && (std::size_t)materialIndex < materialTagHashes.size())
+              ? materialTagHashes[(std::size_t)materialIndex]
+              : 0u;
+      pd.transform_slot = transform_slot;
+      pd.skeleton_offset
+          = (skinIndex >= 0
+             && (std::size_t)skinIndex < skinJointOffsets.size())
+                ? skinJointOffsets[(std::size_t)skinIndex]
+                : 0xFFFFFFFFu;
+      acc.perDraws.push_back(pd);
+      acc.perDrawBounds.push_back(packBounds(local_bounds));
+
+      const uint32_t cmd_index = (uint32_t)acc.indirectCmds.size();
+      Acc::IndirectCmd cmd{
+          drawIndexCount,
+          instanceCount,
+          slab->index_slot.offset,
+          (int32_t)slab->vertex_slot.offset,
+          slot_cursor};
+      acc.indirectCmds.push_back(cmd);
+      slot_cursor += instanceCount;
+
+      totalVertices += (uint32_t)vc;
+      totalIndices += drawIndexCount;
+      return cmd_index;
+    };
+
+    for(std::size_t i = 0; i < fs.draws.size(); ++i)
+    {
+      const auto& dc = fs.draws[i];
+      emitDraw(
+          dc.mesh, dc.stable_id, dc.worldTransform, dc.material.get(),
+          dc.materialIndex, dc.transform_slot, dc.skinIndex, dc.local_bounds,
+          /*instanceCount=*/1u);
+    }
+
+    // Number of per_draws entries that the fs.draws loop actually emitted
+    // (i.e. after emitDraw's skip predicate). The fast path's diff-upload
+    // mirror must be seeded from exactly this prefix — emitDraw can skip
+    // draws (slab exhaustion, GPU-backed indices, missing positions) that a
+    // naive `vertices > 0` filter would wrongly keep, which would desync the
+    // mirror from the GPU per_draws layout (threedim#3).
+    const std::size_t meshDrawCount = acc.perDraws.size();
+
+    // ── fs.instances ── one cmd per instance_component, instanceCount =
+    // group's instance count, firstInstance = slot_cursor before the
+    // cmd. Per-instance translations / colors are GPU-copied from the
+    // upstream Instancer's source buffers into the concat per-instance
+    // arrays at offset slot_base * stride; CPU-side draw_ids[slot..]
+    // get the cmd-index of the owning group (populated below, after
+    // both loops complete and slot_cursor stops moving).
+    //
+    // Defensive null-handle skip: the upstream Instancer may republish
+    // a fresh `instance_component` whose buffer handles haven't been
+    // populated yet (CSF compute pass mid-rebuild, etc). Skipping the
+    // group for that frame is correct — next frame the upstream is
+    // ready and the group renders.
+    for(std::size_t k = 0; k < fs.instances.size(); ++k)
+    {
+      const auto& inst_draw = fs.instances[k];
+      if(!inst_draw.instance)
+        continue;
+      const auto& inst = *inst_draw.instance;
+      if(!inst.prototype || inst.prototype->primitives.empty())
+        continue;
+      if(inst.instance_count == 0)
+        continue;
+
+      const auto& prim = inst.prototype->primitives[0];
+      if(prim.vertex_count == 0)
+        continue;
+
+      // Defensive null-handle skip on prototype buffers — happens during
+      // model swaps when the new prototype's data hasn't been uploaded
+      // yet. The next frame retries.
+      bool prototype_buffers_ready = true;
+      for(const auto& vb : prim.vertex_buffers)
+      {
+        if(!vb)
+          continue;
+        if(auto* gpu = ossia::get_if<ossia::gpu_buffer_handle>(&vb->resource))
+        {
+          if(!gpu->native_handle)
+          { prototype_buffers_ready = false; break; }
+        }
+        else if(auto* cpu = ossia::get_if<ossia::buffer_data>(&vb->resource))
+        {
+          if(!cpu->data || cpu->byte_size == 0)
+          { prototype_buffers_ready = false; break; }
+        }
+        else
+        { prototype_buffers_ready = false; break; }
+      }
+      if(prim.index_buffer && prototype_buffers_ready)
+      {
+        const auto& ib = *prim.index_buffer;
+        if(auto* gpu = ossia::get_if<ossia::gpu_buffer_handle>(&ib.resource))
+        {
+          if(!gpu->native_handle) prototype_buffers_ready = false;
+        }
+        else if(auto* cpu = ossia::get_if<ossia::buffer_data>(&ib.resource))
+        {
+          if(!cpu->data || cpu->byte_size == 0) prototype_buffers_ready = false;
+        }
+      }
+      if(!prototype_buffers_ready)
+        continue;
+
+      // Per-instance source buffers — translations may carry vec3 / trs /
+      // mat4 layouts; we currently only support `translation` (the
+      // shader's per-instance VERTEX_INPUT is vec3). trs / mat4 land in
+      // a follow-up (Phase 3.5).
+      QRhiBuffer* srcTranslations = nullptr;
+      uint32_t srcTranslationOffset = 0;
+      uint32_t srcTranslationStride = 16; // CSF emitters pad to vec4.
+      // Per-format byte offset of the translation within the source
+      // element. For column-major mat4 (64 B), the translation is
+      // column 3 at offset 48; vec4 / trs put translation at offset 0.
+      uint32_t srcTranslationColumnOffset = 0;
+      if(inst.instance_transforms)
+      {
+        if(auto* gpu = ossia::get_if<ossia::gpu_buffer_handle>(
+               &inst.instance_transforms->resource))
+        {
+          if(!gpu->native_handle)
+            continue;
+          srcTranslations = static_cast<QRhiBuffer*>(gpu->native_handle);
+          srcTranslationOffset = (uint32_t)gpu->byte_offset;
+          using TF = ossia::instance_component::transform_format;
+          switch(inst.transform_type)
+          {
+            case TF::translation: srcTranslationStride = 16; break;
+            case TF::trs:         srcTranslationStride = 40; break;
+            case TF::mat4:
+              srcTranslationStride = 64;
+              srcTranslationColumnOffset = 48;
+              break;
+          }
+        }
+      }
+      QRhiBuffer* srcColors = nullptr;
+      uint32_t srcColorOffset = 0;
+      if(inst.instance_colors)
+      {
+        if(auto* gpu = ossia::get_if<ossia::gpu_buffer_handle>(
+               &inst.instance_colors->resource))
+        {
+          if(!gpu->native_handle)
+            continue;
+          srcColors = static_cast<QRhiBuffer*>(gpu->native_handle);
+          srcColorOffset = (uint32_t)gpu->byte_offset;
+        }
+      }
+
+      // Build a transient ossia::geometry from the prototype primitive
+      // and feed it into the shared emitDraw closure.
+      auto proto_geom = primitiveToGeometry(prim);
+      if(!proto_geom)
+        continue;
+
+      const uint32_t slot_base = slot_cursor;
+      const uint64_t prim_id = resolvePrototypeStableId(
+          inst.prototype.get(), prim);
+
+      const uint32_t cmd_index = emitDraw(
+          proto_geom.get(), prim_id, inst_draw.worldTransform,
+          prim.material.get(), /*materialIndex=*/-1,
+          inst.raw_slot.size != 0 ? inst.raw_slot.internal_index
+                                  : 0xFFFFFFFFu,
+          /*skinIndex=*/-1, prim.bounds, inst.instance_count);
+      if(cmd_index == kCmdSkipped)
+        continue;
+
+      InstanceSlotRecord rec{};
+      rec.slot_base = slot_base;
+      rec.count = inst.instance_count;
+      rec.cmd_index = cmd_index;
+      rec.src_translations = srcTranslations;
+      rec.src_translation_offset = srcTranslationOffset + srcTranslationColumnOffset;
+      rec.src_translation_stride = srcTranslationStride;
+      rec.src_colors = srcColors;
+      rec.src_color_offset = srcColorOffset;
+      instanceRecords.push_back(rec);
+    }
+
+    // GC slabs not seen this frame. Grace = 2 protects against the CB
+    // still referencing a culled slab's offset through its indirect-
+    // draw-cmds entry from frame N-1.
+    m_registry->sweepMeshSlabs(current_frame, 2u);
+
+    // Garbage-collect prototype-id map entries that no longer appear in
+    // the live scene. Keeps the map bounded across long sessions where
+    // Instancer prototypes get swapped (Box.gltf → Duck.gltf etc).
+    {
+      ossia::hash_set<const ossia::mesh_component*> live_protos;
+      live_protos.reserve(fs.instances.size());
+      for(const auto& id : fs.instances)
+      {
+        if(id.instance && id.instance->prototype)
+          live_protos.insert(id.instance->prototype.get());
+      }
+      for(auto it = m_protoStableIds.begin(); it != m_protoStableIds.end();)
+      {
+        if(live_protos.find(it->first) == live_protos.end())
+          it = m_protoStableIds.erase(it);
+        else
+          ++it;
+      }
+    }
+
+    m_mdi.totalVertices = totalVertices;
+    m_mdi.totalIndices = totalIndices;
+    m_mdi.drawCount = (uint32_t)acc.indirectCmds.size();
+    m_lastDrawCount = std::max(m_lastDrawCount, acc.indirectCmds.size());
+    m_instSlotsUsed = slot_cursor;
+
+    // drawCount==0: no mesh draws this frame, but procedural-only consumers
+    // (classic_skybox, fullscreen-triangle effects) still need the
+    // scene-wide aux table — `camera` rides on the geometry, so an empty
+    // mesh_list would leave them with no camera UBO. Fall through and
+    // build a 0-vertex carrier mesh that exposes the full auxiliary
+    // list; mesh-consuming downstream nodes see vertices==0 and skip
+    // their draw call. The drawCount-dependent uploads below are gated
+    // on non-empty sources; the binding extents fall back to one
+    // element so RHI accepts the bindings.
+
+    const int64_t pdBytes = std::max<int64_t>(
+        sizeof(PerDrawGPU),
+        (int64_t)acc.perDraws.size() * sizeof(PerDrawGPU));
+    const int64_t icBytes = std::max<int64_t>(
+        sizeof(Acc::IndirectCmd),
+        (int64_t)acc.indirectCmds.size() * sizeof(Acc::IndirectCmd));
+    const int64_t pdbBytes
+        = (int64_t)acc.perDrawBounds.size() * sizeof(PerDrawBoundsGPU);
+
+    // Grow-only for the preprocessor-owned small SSBOs (arena streams
+    // don't grow — pre-sized in registry.init()). On realloc we drop
+    // the diff-upload mirror so the next diffUpload call (fast path
+    // at lines 4744 / 4751) treats the new buffer as empty and uploads
+    // the full fresh contents — see growBuf's prefix-staleness comment.
+    // The slow path's `uploadStaticBuffer(per_draws, 0, full_size, ...)`
+    // at lines 2478-2486 already covers a slow-frame realloc; the
+    // mirror clear here defends the (less common) case where a fast
+    // frame's grow is followed by another fast-frame diffUpload before
+    // a slow frame intervenes.
+    using UF = QRhiBuffer::UsageFlags;
+    if(growBuf(renderer, res,m_mdi.per_draws, m_mdi.perDrawsCap, pdBytes,
+               QRhiBuffer::StorageBuffer,
+               "ScenePreprocessor::mdi.per_draws"))
+      m_cachedPerDraws.clear();
+    if(growBuf(renderer, res,m_mdi.per_draw_bounds, m_mdi.perDrawBoundsCap, pdbBytes,
+               QRhiBuffer::StorageBuffer,
+               "ScenePreprocessor::mdi.per_draw_bounds"))
+      m_cachedPerDrawBounds.clear();
+#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0)
+    growBuf(renderer, res,m_mdi.indirect_draw_cmds, m_mdi.indirectCap, icBytes,
+            UF(QRhiBuffer::StorageBuffer | QRhiBuffer::IndirectBuffer),
+            "ScenePreprocessor::mdi.indirect_draw_cmds");
+#else
+    growBuf(renderer, res,m_mdi.indirect_draw_cmds, m_mdi.indirectCap, icBytes,
+            QRhiBuffer::StorageBuffer,
+            "ScenePreprocessor::mdi.indirect_draw_cmds");
+#endif
+
+    // Gate uploads on non-empty sources: when drawCount==0 the carrier
+    // mesh path keeps the buffers at their element-size minimums (already
+    // grown by growBuf above) and skips the upload. Procedural consumers
+    // never read these slots; mesh consumers don't draw, so contents are
+    // irrelevant.
+    if(!acc.perDraws.empty())
+      res.uploadStaticBuffer(
+          m_mdi.per_draws, 0,
+          (int64_t)acc.perDraws.size() * sizeof(PerDrawGPU),
+          acc.perDraws.data());
+    if(!acc.indirectCmds.empty())
+      res.uploadStaticBuffer(
+          m_mdi.indirect_draw_cmds, 0,
+          (int64_t)acc.indirectCmds.size() * sizeof(Acc::IndirectCmd),
+          acc.indirectCmds.data());
+    if(pdbBytes > 0)
+      res.uploadStaticBuffer(
+          m_mdi.per_draw_bounds, 0, pdbBytes, acc.perDrawBounds.data());
+
+    // Seed the fast-path diff-upload mirror from the ACTUALLY-EMITTED set
+    // (acc.perDraws / acc.perDrawBounds), restricted to the fs.draws prefix
+    // (instance-group entries are never compared on the fast path — it's
+    // gated on fs.instances.empty()). Seeding from `freshPerDraws` (filtered
+    // only by vertices>0) would diverge whenever emitDraw skipped a draw,
+    // making diffUpload write a neighbour's model matrix into the wrong slot
+    // (threedim#3).
+    m_cachedPerDraws.assign(
+        acc.perDraws.begin(),
+        acc.perDraws.begin() + (std::ptrdiff_t)meshDrawCount);
+    m_cachedPerDrawBounds.assign(
+        acc.perDrawBounds.begin(),
+        acc.perDrawBounds.begin() + (std::ptrdiff_t)meshDrawCount);
+
+    // ── Per-instance concat buffers (Phase 2 unified MDI) ──────────────
+    //
+    // Three parallel arrays sized to slot_cursor:
+    //   - draw_ids[k]      : cmd index of the cmd that owns slot k
+    //   - translations[k]  : vec4 (xyz used) — identity for regular cmd
+    //                        slots, GPU-copied per-particle position for
+    //                        instance group slots
+    //   - colors[k]        : vec4 — identity (1,1,1,1) for regular cmd
+    //                        slots, GPU-copied per-instance color for
+    //                        groups
+    //
+    // Layout invariant: every regular fs.draws cmd at acc index i lands
+    // at slot i (instanceCount=1). Instance groups follow contiguously
+    // (slot >= acc.indirectCmds.size() - fs.instances.size() in general,
+    // but the bookkeeping is captured per-group in instanceRecords). The
+    // shader reads `draw_id` as a per-instance VERTEX_INPUT and indexes
+    // per_draws[draw_id] — works on both indirect and CPU-fallback paths
+    // because firstInstance is the only state needed (no gl_DrawID
+    // dependency).
+    if(slot_cursor > 0)
+    {
+      const int64_t drawIdsBytes      = (int64_t)slot_cursor * 4;
+      const int64_t translationsBytes = (int64_t)slot_cursor * 16;
+      const int64_t colorsBytes       = (int64_t)slot_cursor * 16;
+
+      // m_instDrawIds: paired with diff-upload via m_cachedInstDrawIds
+      // at line 2544. On realloc we MUST clear the mirror — this is the
+      // root cause of the "instances disappear at p2-of instance count"
+      // bug (4→5, 8→9, 16→17 …). For an Instancer with one prototype
+      // every slot's draw_id is the same value (the cmd_index, usually
+      // 0), so cached and fresh are byte-identical for the prefix and
+      // diffUpload's equal-runs branch (line 783) skips them — leaving
+      // the new buffer's prefix as uninitialised driver memory. The
+      // basic-unlit / classic_pbr vertex shader then OOBs on
+      // per_draws[draw_id] for every "garbage" instance. Translations /
+      // colors are immune (full GPU copy at lines 2606+), so they don't
+      // need the clear, but cleaning the diff-upload one is mandatory.
+      if(growBuf(renderer, res,m_instDrawIds, m_instDrawIdsCap, drawIdsBytes,
+                 UF(QRhiBuffer::VertexBuffer | QRhiBuffer::StorageBuffer),
+                 "ScenePreprocessor::inst.draw_ids"))
+        m_cachedInstDrawIds.clear();
+      growBuf(renderer, res,m_instTranslations, m_instTranslationsCap,
+              translationsBytes,
+              UF(QRhiBuffer::VertexBuffer | QRhiBuffer::StorageBuffer),
+              "ScenePreprocessor::inst.translations");
+      growBuf(renderer, res,m_instColors, m_instColorsCap, colorsBytes,
+              UF(QRhiBuffer::VertexBuffer | QRhiBuffer::StorageBuffer),
+              "ScenePreprocessor::inst.colors");
+
+      // Build the full draw_ids vector. For a regular fs.draws cmd at
+      // acc index i: draw_ids[i] = i. For instance group records: the
+      // group's slot range gets cmd_index repeated `count` times.
+      // Diff-uploaded via the m_cachedInstDrawIds mirror so steady-state
+      // frames touch zero bytes when the topology is unchanged.
+      std::vector<uint32_t> fresh_draw_ids(slot_cursor, 0u);
+      // Regular cmds: each occupies one slot at acc index = slot index.
+      const std::size_t n_regular_cmds
+          = acc.indirectCmds.size() - instanceRecords.size();
+      for(std::size_t i = 0; i < n_regular_cmds; ++i)
+        fresh_draw_ids[i] = (uint32_t)i;
+      for(const auto& rec : instanceRecords)
+      {
+        for(uint32_t k = 0; k < rec.count; ++k)
+          fresh_draw_ids[rec.slot_base + k] = rec.cmd_index;
+      }
+      diffUpload(res, m_instDrawIds, m_cachedInstDrawIds, fresh_draw_ids);
+
+      // Regular-slot identity values for translations + colors. Instance
+      // group slots (offset >= n_regular_cmds * 16) are filled by the
+      // GPU copies below — uploadStaticBuffer here covers ONLY the
+      // regular range so we don't stomp the GPU-copied data. Instance
+      // group slot ranges that overlap stale content from a previous
+      // frame are overwritten by the per-frame GPU copy.
+      if(n_regular_cmds > 0)
+      {
+        std::vector<float> regular_translations(n_regular_cmds * 4, 0.f);
+        std::vector<float> regular_colors(n_regular_cmds * 4, 1.f);
+        res.uploadStaticBuffer(
+            m_instTranslations, 0,
+            (quint32)(n_regular_cmds * 16),
+            regular_translations.data());
+        res.uploadStaticBuffer(
+            m_instColors, 0,
+            (quint32)(n_regular_cmds * 16),
+            regular_colors.data());
+      }
+
+      // Queue GPU copies for instance groups. Each record copies
+      // `count` instances from the upstream Instancer's source buffer
+      // into the concat array at `slot_base * stride` bytes. The
+      // record's src_translation_offset is biased per source format so
+      // each strided slice lands on the actual translation bytes:
+      //  - translation (vec4):  bytes [0..15]  = (x, y, z, w).
+      //  - trs (vec3 T + ...):  bytes [0..15]  = T + 4 leading bytes
+      //    of R; the shader binds vec3 from offset 0 so stray R bytes
+      //    are never sampled.
+      //  - mat4 (col-major):    bytes [48..63] = column 3 = (Tx, Ty, Tz, 1).
+      auto queueInstanceCopy = [&](
+          QRhiBuffer* src, uint32_t srcOffset, uint32_t srcStride,
+          QRhiBuffer* dst, uint32_t dstOffset, uint32_t count,
+          uint32_t elemSize)
+      {
+        if(!src || !dst || count == 0)
+          return;
+        PendingGpuCopy op;
+        op.attr = MdiAttr::Positions;  // unused when dst is set explicitly
+        op.src = src;
+        op.dst = dst;
+        op.src_offset = (int)srcOffset;
+        op.dst_offset = (int)dstOffset;
+        op.vertex_count = (int)count;
+        op.src_stride = (int)srcStride;
+        op.element_size = (int)elemSize;
+        op.size = (op.src_stride == 0 || op.src_stride == op.element_size)
+                      ? op.vertex_count * op.element_size
+                      : op.element_size;
+        m_pendingGpuCopies.push_back(op);
+      };
+      for(const auto& rec : instanceRecords)
+      {
+        // Translation: copy 12 bytes per instance into the leading
+        // bytes of each vec4-stride slot. The slot's trailing 4 bytes
+        // remain garbage / leftover (identity uploads only cover the
+        // regular range above) — the shader binds vec3 from offset 0
+        // so the trailing pad is never sampled.
+        if(rec.src_translations)
+        {
+          queueInstanceCopy(
+              rec.src_translations, rec.src_translation_offset,
+              rec.src_translation_stride,
+              m_instTranslations, rec.slot_base * 16, rec.count,
+              /*elemSize=*/16);
+        }
+        if(rec.src_colors)
+        {
+          queueInstanceCopy(
+              rec.src_colors, rec.src_color_offset, /*srcStride=*/16,
+              m_instColors, rec.slot_base * 16, rec.count,
+              /*elemSize=*/16);
+        }
+      }
+    }
+
+    // Build the output geometry referencing the arena stream buffers
+    // (pointer-stable across frames and scene churn).
+    ossia::geometry g;
+    auto wrapGpu = [](QRhiBuffer* b, int64_t size) {
+      ossia::geometry::gpu_buffer gb;
+      gb.handle = b;
+      gb.byte_size = size;
+      return ossia::geometry::buffer{.data = gb, .dirty = true};
+    };
+
+    // The "byte_size" on each gpu_buffer is the binding extent
+    // downstream consumers use when constructing their MeshBuffer
+    // BufferViews. Using the arena's full capacity (stable across
+    // frames) keeps downstream pointer identity + extent identical
+    // frame-over-frame — the per-draw `baseVertex` in
+    // indirect_draw_cmds addresses into this range.
+    const int64_t posCapBytes
+        = (int64_t)GpuResourceRegistry::kMeshCapBytes[(int)Stream::Positions];
+    const int64_t nrmCapBytes
+        = (int64_t)GpuResourceRegistry::kMeshCapBytes[(int)Stream::Normals];
+    const int64_t uvCapBytes
+        = (int64_t)GpuResourceRegistry::kMeshCapBytes[(int)Stream::Texcoords];
+    const int64_t tanCapBytes
+        = (int64_t)GpuResourceRegistry::kMeshCapBytes[(int)Stream::Tangents];
+    const int64_t colCapBytes
+        = (int64_t)GpuResourceRegistry::kMeshCapBytes[(int)Stream::Colors];
+    const int64_t uv1CapBytes
+        = (int64_t)GpuResourceRegistry::kMeshCapBytes[(int)Stream::Texcoords1];
+    const int64_t idxCapBytes
+        = (int64_t)GpuResourceRegistry::kMeshCapBytes[(int)Stream::Indices];
+
+    // Buffer slot order is wired through to the index-buffer slot
+    // below — keep buffers 0..5 as the six vertex streams, slot 6 as
+    // the index buffer. Adding/reordering here REQUIRES updating
+    // g.index.buffer at the end of this block.
+    g.buffers.push_back(wrapGpu(
+        m_registry->meshStreamBuffer(Stream::Positions), posCapBytes));
+    g.buffers.push_back(wrapGpu(
+        m_registry->meshStreamBuffer(Stream::Normals), nrmCapBytes));
+    g.buffers.push_back(wrapGpu(
+        m_registry->meshStreamBuffer(Stream::Texcoords), uvCapBytes));
+    g.buffers.push_back(wrapGpu(
+        m_registry->meshStreamBuffer(Stream::Tangents), tanCapBytes));
+    g.buffers.push_back(wrapGpu(
+        m_registry->meshStreamBuffer(Stream::Colors), colCapBytes));
+    g.buffers.push_back(wrapGpu(
+        m_registry->meshStreamBuffer(Stream::Texcoords1), uv1CapBytes));
+    g.buffers.push_back(wrapGpu(
+        m_registry->meshStreamBuffer(Stream::Indices), idxCapBytes));
+
+    // MDI uses vec4 stride (16 B) for position and normal even though the
+    // shader binding format is float3. Vulkan reads the first 12 bytes of
+    // each 16-byte slot for vec3, so the last 4 bytes are unused padding.
+    // Why: GPU-resident vertex sources (compute-shader outputs) naturally
+    // emit vec3 inside a 16-byte-aligned slot due to std430/std140 layout
+    // rules. Matching MDI stride lets us turn what would be a per-vertex
+    // strided copyBuffer loop (O(N) vkCmdCopyBuffer regions per frame)
+    // into a single tight blit. Cost: 33 % extra memory for pos/nrm only.
+    ossia::geometry::binding bPos{}; bPos.byte_stride = 16; bPos.classification = ossia::geometry::binding::per_vertex;
+    ossia::geometry::binding bNrm{}; bNrm.byte_stride = 16; bNrm.classification = ossia::geometry::binding::per_vertex;
+    ossia::geometry::binding bUv{};  bUv.byte_stride  = 8;  bUv.classification  = ossia::geometry::binding::per_vertex;
+    ossia::geometry::binding bTan{}; bTan.byte_stride = 16; bTan.classification = ossia::geometry::binding::per_vertex;
+    ossia::geometry::binding bCol{}; bCol.byte_stride = 16; bCol.classification = ossia::geometry::binding::per_vertex;
+    ossia::geometry::binding bUv1{}; bUv1.byte_stride = 8;  bUv1.classification = ossia::geometry::binding::per_vertex;
+    g.bindings.push_back(bPos);
+    g.bindings.push_back(bNrm);
+    g.bindings.push_back(bUv);
+    g.bindings.push_back(bTan);
+    g.bindings.push_back(bCol);
+    g.bindings.push_back(bUv1);
+
+    // `input` is both the type and the vector member on geometry; use the
+    // elaborated `struct` tag to disambiguate in this scope.
+    using GeomInput = struct ossia::geometry::input;
+    g.input.push_back(GeomInput{.buffer = 0, .byte_offset = 0});
+    g.input.push_back(GeomInput{.buffer = 1, .byte_offset = 0});
+    g.input.push_back(GeomInput{.buffer = 2, .byte_offset = 0});
+    g.input.push_back(GeomInput{.buffer = 3, .byte_offset = 0});
+    g.input.push_back(GeomInput{.buffer = 4, .byte_offset = 0});
+    g.input.push_back(GeomInput{.buffer = 5, .byte_offset = 0});
+
+    auto pushAttr = [&](ossia::attribute_semantic sem, int binding,
+                        decltype(ossia::geometry::attribute::format) fmt) {
+      ossia::geometry::attribute a{};
+      a.binding = binding;
+      a.byte_offset = 0;
+      a.format = fmt;
+      a.semantic = sem;
+      g.attributes.push_back(a);
+    };
+    pushAttr(ossia::attribute_semantic::position,  0, ossia::geometry::attribute::float3);
+    pushAttr(ossia::attribute_semantic::normal,    1, ossia::geometry::attribute::float3);
+    pushAttr(ossia::attribute_semantic::texcoord0, 2, ossia::geometry::attribute::float2);
+    pushAttr(ossia::attribute_semantic::tangent,   3, ossia::geometry::attribute::float4);
+    pushAttr(ossia::attribute_semantic::color0,    4, ossia::geometry::attribute::float4);
+    pushAttr(ossia::attribute_semantic::texcoord1, 5, ossia::geometry::attribute::float2);
+
+    // ── Per-instance vertex bindings (Phase 2 unified MDI) ─────────────
+    //
+    // Three PerInstance step_rate=1 bindings carry the unified-MDI
+    // per-instance state. Each indirect cmd (regular or instance group)
+    // sets `firstInstance = its own slot offset` so these bindings
+    // address the right slice of each concat buffer on both the
+    // indirect path and the CPU-fallback drawIndexed loop.
+    //
+    // Buffer slot order in `g.buffers`:
+    //   0..5  per-vertex streams (pos / nrm / uv0 / tan / col / uv1)
+    //   6     index buffer
+    //   7     inst_translations (vec4 stride 16)
+    //   8     inst_colors (vec4 stride 16)
+    //   9     inst_draw_ids (uint stride 4)
+    // Adding more slots HERE shifts every subsequent aux's buf index;
+    // the post-section building auxiliaries computes its base via
+    // `baseBuf = (int)g.buffers.size()` so it doesn't need changing.
+    if(slot_cursor > 0 && m_instTranslations && m_instColors && m_instDrawIds)
+    {
+      // Index buffer must come before per-instance buffers since
+      // g.index.buffer is hard-coded to slot 6 below; per-instance
+      // buffers occupy slots 7, 8, 9.
+      g.buffers.push_back(wrapGpu(
+          m_instTranslations, (int64_t)slot_cursor * 16));
+      g.buffers.push_back(wrapGpu(
+          m_instColors, (int64_t)slot_cursor * 16));
+      g.buffers.push_back(wrapGpu(
+          m_instDrawIds, (int64_t)slot_cursor * 4));
+
+      ossia::geometry::binding bInstT{};
+      bInstT.byte_stride = 16;
+      bInstT.classification = ossia::geometry::binding::per_instance;
+      bInstT.step_rate = 1;
+      const int instTBindIdx = (int)g.bindings.size();
+      g.bindings.push_back(bInstT);
+
+      ossia::geometry::binding bInstC{};
+      bInstC.byte_stride = 16;
+      bInstC.classification = ossia::geometry::binding::per_instance;
+      bInstC.step_rate = 1;
+      const int instCBindIdx = (int)g.bindings.size();
+      g.bindings.push_back(bInstC);
+
+      ossia::geometry::binding bInstD{};
+      bInstD.byte_stride = 4;
+      bInstD.classification = ossia::geometry::binding::per_instance;
+      bInstD.step_rate = 1;
+      const int instDBindIdx = (int)g.bindings.size();
+      g.bindings.push_back(bInstD);
+
+      g.input.push_back(GeomInput{.buffer = 7, .byte_offset = 0});
+      g.input.push_back(GeomInput{.buffer = 8, .byte_offset = 0});
+      g.input.push_back(GeomInput{.buffer = 9, .byte_offset = 0});
+
+      // Per-instance attributes. Translation reuses the existing
+      // `translation` semantic (no per-vertex `translation` ever exists,
+      // so no collision). Color uses the dedicated `instance_color0`
+      // semantic added to libossia for unified MDI to avoid the
+      // per-vertex / per-instance `color0` collision in
+      // findGeometryAttribute. draw_id uses `instance_draw_id`
+      // (uint-typed; required by every shader using per_draws[] in
+      // Phase 2).
+      pushAttr(ossia::attribute_semantic::translation,
+               instTBindIdx, ossia::geometry::attribute::float3);
+      pushAttr(ossia::attribute_semantic::instance_color0,
+               instCBindIdx, ossia::geometry::attribute::float4);
+      pushAttr(ossia::attribute_semantic::instance_draw_id,
+               instDBindIdx, ossia::geometry::attribute::uint1);
+    }
+
+    g.vertices  = (int)m_mdi.totalVertices;
+    g.indices   = (int)m_mdi.totalIndices;
+    g.instances = 1;
+    g.topology  = ossia::geometry::triangles;
+    // glTF doubleSided: pipeline-side culling is OFF for the MDI
+    // batch. Per-fragment culling is shader-side, driven by each
+    // material's `feature_mask`:
+    //   - single-sided (no `double_sided` bit): shader discards
+    //     `!gl_FrontFacing` fragments → matches CULL_BACK behaviour.
+    //   - double-sided: shader keeps both sides and flips the surface
+    //     normal for back-facing fragments so lighting works on both.
+    // Splitting the MDI batch by cull mode would multiply the draw
+    // count and lose much of the indirect-draw benefit; per-fragment
+    // gating is the simpler trade.
+    g.cull_mode = ossia::geometry::none;
+    g.front_face = ossia::geometry::counter_clockwise;
+
+    g.index.buffer = 6;  // Slot order: pos=0, nrm=1, uv=2, tan=3, col=4, uv1=5, idx=6.
+    g.index.byte_offset = 0;
+    g.index.format = decltype(g.index)::uint32;
+
+    // filter_tag / filter_material_index are per-geometry metadata
+    // used by Tier-2 mesh-level filters (FlattenedSceneFilterNode).
+    // The preprocessor emits ONE geometry per MDI batch spanning many
+    // materials, so there's no single value that would be meaningful
+    // here — we stamp 0 so Tier-2 filters either drop or keep the
+    // whole batch. Per-draw material / tag filtering belongs to a
+    // Tier-3 compute-shader filter that consumes indirect_draw_cmds +
+    // per_draws (CSF-based, see docs on scene_filter_* presets).
+    g.filter_tag = 0;
+    g.filter_material_index = 0;
+
+    // Attach scene-wide auxiliaries. Shaders pick these up by NAME via
+    // try_bind_from_geometry, so there's no need for downstream nodes to
+    // wire every SSBO/UBO manually — the geometry cable already carries
+    // scene lights / materials / per-draws / indirect / counts / camera
+    // / env. The names here MUST match the shader's `INPUTS[].NAME`.
+    const int baseBuf = (int)g.buffers.size();
+    // scene_lights → RawLight arena directly (task 28b-shader flip).
+    // Every classic_pbr_*.frag's Light struct now matches the arena
+    // layout and the light loop reads
+    // scene_lights.entries[scene_light_indices.data[i]], composing
+    // world-space direction from world_transforms[transform_slot].
+    {
+      auto* lightArena
+          = renderer.registry().buffer(GpuResourceRegistry::Arena::RawLight);
+      const int64_t lightArenaBytes
+          = (int64_t)renderer.registry().arenaSlotStride(
+                GpuResourceRegistry::Arena::RawLight)
+            * (int64_t)renderer.registry().arenaSlotCount(
+                GpuResourceRegistry::Arena::RawLight);
+      g.buffers.push_back(wrapGpu(lightArena, lightArenaBytes));
+    }
+    // scene_materials binding points at the Material arena directly.
+    // Shader indexes entries[material_index] where material_index is
+    // the arena slot index (stamped in PerDrawGPU above) and the SSBO
+    // stride matches sizeof(MaterialGPU) = 80B. Eliminates the
+    // per-frame CPU-side repack + upload that m_materialsBuffer used
+    // to carry.
+    {
+      auto* matArena
+          = renderer.registry().buffer(GpuResourceRegistry::Arena::Material);
+      const int64_t matArenaBytes
+          = (int64_t)renderer.registry().arenaSlotStride(
+                GpuResourceRegistry::Arena::Material)
+            * (int64_t)renderer.registry().arenaSlotCount(
+                GpuResourceRegistry::Arena::Material);
+      g.buffers.push_back(wrapGpu(matArena, matArenaBytes));
+    }
+    g.buffers.push_back(wrapGpu(m_materialsExtBuffer, m_materialsExtCap));
+    g.buffers.push_back(wrapGpu(m_mdi.per_draws,          pdBytes));
+    g.buffers.push_back(wrapGpu(m_mdi.indirect_draw_cmds, icBytes));
+    g.buffers.push_back(wrapGpu(m_sceneCountsBuffer, sizeof(SceneCountsUBO)));
+    // Only bind the ACTIVE camera slot (first 240 bytes) — shaders declare
+    // `uniform camera_t camera` as a single entry, not an array. Slot 0 is
+    // guaranteed to be the active camera by packAndUploadCameras.
+    g.buffers.push_back(wrapGpu(m_camerasBuffer, sizeof(CameraUBOData)));
+    g.buffers.push_back(wrapGpu(m_camerasPrevBuffer, sizeof(CameraUBOData)));
+    // Env UBO: bind a PREPROCESSOR-owned slot, not any single producer's
+    // slot. With multi-producer env composition (task #26) the merged
+    // scene_environment is built field-by-field by merge_scenes from
+    // every contributing EnvironmentLoader / CubemapLoader — no single
+    // producer's slot holds the merged result. The preprocessor packs
+    // the merged CPU-side env into m_envSlot here and consumers bind
+    // that offset.
+    m_env_aux_offset = renderer.registry().slotOffset(m_envSlot);
+    g.buffers.push_back(wrapGpu(
+        renderer.registry().buffer(GpuResourceRegistry::Arena::Env),
+        sizeof(EnvParamsUBO)));
+    // World transforms — arena-slot-indexed (task 28b phase 1). Consumer
+    // shaders read world_transforms.data[slot_index] for any light /
+    // particle / compute pass that needs slot-addressable world-space
+    // composition. Preprocessor-private so multi-filter pipelines don't
+    // stomp each other.
+    g.buffers.push_back(wrapGpu(
+        m_worldTransformsBuffer, m_worldTransformsCap));
+    // Previous-frame snapshot of the same layout; consumer shaders
+    // declare an AUXILIARY / storage input named `world_transforms_prev`
+    // to read it for motion vectors, TAA, reprojection, etc.
+    g.buffers.push_back(wrapGpu(
+        m_worldTransformsPrevBuffer, m_worldTransformsCap));
+    // scene_light_indices — compact list of RawLight arena slot indices
+    // for the scene's live lights (task 28b phase 3). Shader iterates
+    // 0..scene_counts.light_count, reads
+    // scene_lights.entries[scene_light_indices.data[i]].
+    g.buffers.push_back(wrapGpu(
+        m_lightIndicesBuffer, m_lightIndicesCap));
+
+    {
+      const int64_t lightArenaBytes
+          = (int64_t)renderer.registry().arenaSlotStride(
+                GpuResourceRegistry::Arena::RawLight)
+            * (int64_t)renderer.registry().arenaSlotCount(
+                GpuResourceRegistry::Arena::RawLight);
+      g.auxiliary.push_back({
+          .name = "scene_lights", .buffer = baseBuf,
+          .byte_offset = 0,
+          .byte_size = lightArenaBytes});
+    }
+    {
+      const int64_t matArenaBytes
+          = (int64_t)renderer.registry().arenaSlotStride(
+                GpuResourceRegistry::Arena::Material)
+            * (int64_t)renderer.registry().arenaSlotCount(
+                GpuResourceRegistry::Arena::Material);
+      g.auxiliary.push_back({
+          .name = "scene_materials", .buffer = baseBuf + 1,
+          .byte_offset = 0,
+          .byte_size = matArenaBytes});
+    }
+    // Parallel to scene_materials — same element count, same indexing.
+    // OpenPBR-grade shaders bind this as a second SSBO and use the same
+    // material_index to read the extension struct.
+    // byte_size = full buffer capacity. The buffer is sized in update()
+    // to (max_arena_slot + 1) * sizeof(MaterialExtensionsGPU) — see the
+    // arenaSlotEntries computation there. The shader indexes by
+    // pd.material_index (arena slot), so the binding extent must cover
+    // the full arena range.
+    g.auxiliary.push_back({
+        .name = "scene_materials_ext", .buffer = baseBuf + 2,
+        .byte_offset = 0,
+        .byte_size = m_materialsExtCap});
+    g.auxiliary.push_back({
+        .name = "per_draws", .buffer = baseBuf + 3,
+        .byte_offset = 0, .byte_size = pdBytes});
+    g.auxiliary.push_back({
+        .name = "indirect_draw_cmds", .buffer = baseBuf + 4,
+        .byte_offset = 0, .byte_size = icBytes});
+    g.auxiliary.push_back({
+        .name = "scene_counts", .buffer = baseBuf + 5,
+        .byte_offset = 0, .byte_size = (int64_t)sizeof(SceneCountsUBO)});
+    g.auxiliary.push_back({
+        .name = "camera", .buffer = baseBuf + 6,
+        .byte_offset = 0, .byte_size = (int64_t)sizeof(CameraUBOData)});
+    g.auxiliary.push_back({
+        .name = "camera_prev", .buffer = baseBuf + 7,
+        .byte_offset = 0, .byte_size = (int64_t)sizeof(CameraUBOData)});
+    g.auxiliary.push_back({
+        .name = "env", .buffer = baseBuf + 8,
+        .byte_offset = (int64_t)m_env_aux_offset,
+        .byte_size = (int64_t)sizeof(EnvParamsUBO)});
+    g.auxiliary.push_back({
+        .name = "world_transforms", .buffer = baseBuf + 9,
+        .byte_offset = 0,
+        .byte_size = m_worldTransformsCap});
+    // Previous-frame snapshot for motion-vector / TAA / reprojection
+    // shaders. Snapshot is produced in runInitialPasses via a single
+    // GPU-side copyBuffer; the per-slot writes for the same frame
+    // are deferred from update() into the next resource-update batch
+    // so the copy reads the still-frame-N-1 contents of current.
+    g.auxiliary.push_back({
+        .name = "world_transforms_prev", .buffer = baseBuf + 10,
+        .byte_offset = 0,
+        .byte_size = m_worldTransformsCap});
+    g.auxiliary.push_back({
+        .name = "scene_light_indices", .buffer = baseBuf + 11,
+        .byte_offset = 0,
+        .byte_size = m_lightIndicesCap});
+
+    // KHR_texture_transform: per-material per-channel UV transforms.
+    // Parallel to scene_materials, indexed by material_index. Identity
+    // transforms for materials without the extension — zero shader cost.
+    {
+      const int buf_idx = (int)g.buffers.size();
+      g.buffers.push_back(wrapGpu(
+          m_materialUVTransformsBuffer, m_materialUVTransformsCap));
+      g.auxiliary.push_back({
+          .name = "scene_material_uv_xforms", .buffer = buf_idx,
+          .byte_offset = 0,
+          .byte_size = m_materialUVTransformsCap});
+    }
+
+    // per_draw_bounds — sidecar to per_draws, one local-space AABB per
+    // draw (std430 2×vec4 = 32 B). Consumer: GPU culling shaders
+    // (scene_filter_aabb_cull.csf and the future HiZ variant) read this
+    // together with per_draws[i].model to frustum-test each draw and
+    // rewrite indirect_draw_cmds[i] with indexCount=0 when culled.
+    {
+      const int buf_idx = (int)g.buffers.size();
+      g.buffers.push_back(wrapGpu(m_mdi.per_draw_bounds, pdbBytes));
+      g.auxiliary.push_back({
+          .name = "per_draw_bounds", .buffer = buf_idx,
+          .byte_offset = 0, .byte_size = pdbBytes});
+    }
+
+    // shadow_cascades UBO — 544 B, std140. Consumer: classic_pbr_shadowed
+    // PCF cascade pick + light_view_proj sampling, and the shadow-pass
+    // depth-only shader's light_view_proj array. Populated from
+    // scene_state.shadow_cascades (Threedim::ShadowCascadeSetup). Always
+    // published — when no upstream authored cascades, cascade_count=0
+    // signals consumers to skip shadow sampling (the shader-side guard
+    // already handles this).
+    if(m_shadowCascadesBuffer)
+    {
+      const int buf_idx = (int)g.buffers.size();
+      g.buffers.push_back(wrapGpu(
+          m_shadowCascadesBuffer, (int64_t)sizeof(ShadowCascadesUBO)));
+      g.auxiliary.push_back({
+          .name = "shadow_cascades", .buffer = buf_idx,
+          .byte_offset = 0,
+          .byte_size = (int64_t)sizeof(ShadowCascadesUBO)});
+    }
+
+    // Attach per-channel material texture arrays + skybox as auxiliary
+    // textures. Consumer shaders (classic_pbr_textured / classic_pbr_ibl /
+    // classic_pbr_full) pick them up by NAME through the same
+    // try_bind_texture_from_geometry mechanism as the buffer auxes above —
+    // no manual cable required. Null handles are filtered out so a shader
+    // missing a given channel falls back to its own sampler (emptyTexture).
+    appendTextureAuxes(g);
+
+    // Mid-pipeline aux injection from InjectBuffer / InjectTexture nodes
+    // upstream. Name collisions with preprocessor-owned auxes are resolved
+    // last-wins: we append these AFTER the preprocessor's own entries, and
+    // consumer-side find_auxiliary / find_auxiliary_texture return the
+    // LAST match when we pre-remove colliding earlier entries below.
+    //
+    // Buffer injections: wrap each handle as a geometry-buffer slot, add
+    // an auxiliary_buffer entry pointing at it.
+    if(this->scene.state)
+    {
+      for(const auto& ib : this->scene.state->inject_buffers)
+      {
+        if(!ib.native_handle || ib.name.empty())
+          continue;
+        // Remove any earlier entry with the same name so the injection
+        // wins (consumer find_auxiliary returns first-match; easier to
+        // maintain "last-wins" semantics by purging the earlier one).
+        auto& aux_list = g.auxiliary;
+        aux_list.erase(
+            std::remove_if(
+                aux_list.begin(), aux_list.end(),
+                [&](const ossia::geometry::auxiliary_buffer& a) {
+                  return a.name == ib.name;
+                }),
+            aux_list.end());
+        const int buf_idx = (int)g.buffers.size();
+        g.buffers.push_back(
+            wrapGpu(static_cast<QRhiBuffer*>(ib.native_handle), ib.byte_size));
+        g.auxiliary.push_back(
+            {.name = ib.name,
+             .buffer = buf_idx,
+             .byte_offset = 0,
+             .byte_size = ib.byte_size});
+      }
+      for(const auto& it : this->scene.state->inject_textures)
+      {
+        if(!it.native_handle || it.name.empty())
+          continue;
+        auto& tex_list = g.auxiliary_textures;
+        tex_list.erase(
+            std::remove_if(
+                tex_list.begin(), tex_list.end(),
+                [&](const ossia::geometry::auxiliary_texture& a) {
+                  return a.name == it.name;
+                }),
+            tex_list.end());
+        g.auxiliary_textures.push_back(
+            {.name = it.name, .native_handle = it.native_handle});
+      }
+    }
+
+    // Use the existing indirect_count slot for the draw count — renderers
+    // that support drawIndexedIndirect pick it up automatically.
+    //
+    // drawCount==0 carrier-mesh path: leave indirect_count.handle null
+    // so CustomMesh::drawSingleMesh skips its indirect-draw branch
+    // (which would otherwise issue cb.drawIndirect against a buffer
+    // whose contents weren't uploaded this frame, yielding the
+    // UINT32_MAX-firstIndex Vulkan validation error). The carrier still
+    // gets pushed onto m_outputSpec.meshes as a pure aux carrier for
+    // procedural-only consumers (skybox, fullscreen effects); they read
+    // the auxiliary list and don't issue an indirect draw themselves.
+    // Mesh consumers fall through to `cb.draw(0, 0)` — a no-op.
+    ossia::geometry::gpu_buffer ic_count;
+    if(!acc.indirectCmds.empty())
+    {
+      ic_count.handle = m_mdi.indirect_draw_cmds;
+      ic_count.byte_size = icBytes;
+    }
+    g.indirect_count = ic_count;
+
+    // CPU-side copy of indirect draw commands for the Qt < 6.12 fallback
+    // path. CustomMesh::draw iterates these and issues per-command
+    // drawIndexed calls with the correct firstInstance / baseVertex.
+    g.cpu_draw_commands.reserve(acc.indirectCmds.size());
+    for(const auto& cmd : acc.indirectCmds)
+    {
+      g.cpu_draw_commands.push_back({
+          .index_or_vertex_count = cmd.indexCount,
+          .instance_count = cmd.instanceCount,
+          .first_index_or_vertex = cmd.firstIndex,
+          .base_vertex = cmd.baseVertex,
+          .first_instance = cmd.baseInstance});
+    }
+
+    auto meshes = std::make_shared<ossia::mesh_list>();
+    meshes->meshes.push_back(std::move(g));
+    meshes->dirty_index
+        = (m_outputSpec.meshes ? m_outputSpec.meshes->dirty_index : 0) + 1;
+
+    m_outputSpec.meshes = std::move(meshes);
+    if(!m_outputSpec.filters)
+      m_outputSpec.filters = std::make_shared<ossia::geometry_filter_list>();
+  }
+
+
+  // Decode a texture_source to an RGBA8888 QImage. Single-texture-point of
+  // decode so the rebuild code below can dedupe upstream of JPEG decoding.
+  //
+  // Plan 09 S1 path: when `src.content_hash != 0` and an AssetTable is
+  // available, peek the cache first. On hit: skip decode, return the
+  // cached QImage directly. On miss: decode, stage into the cache so
+  // future RenderLists (other outputs, reloads within the session) hit
+  // without re-decoding. Zero-hash sources (legacy parsers that don't
+  // populate the hash) always take the decode path.
+  static QImage decodeTextureSource(
+      const ossia::texture_source& src, Gfx::AssetTable* cache)
+  {
+    if(cache && src.content_hash != 0)
+    {
+      if(auto asset = cache->peek(src.content_hash); asset && !asset->image.isNull())
+        return asset->image;
+    }
+
+    std::optional<DecodedImage> decoded;
+    if(src.embedded_data && !src.embedded_data->empty())
+    {
+      QByteArray bytes(
+          reinterpret_cast<const char*>(src.embedded_data->data()),
+          (qsizetype)src.embedded_data->size());
+      decoded = decodeImageFromMemory(
+          bytes, QString::fromStdString(src.mime_type));
+    }
+    else if(!src.file_path.empty())
+    {
+      decoded = decodeImageFromPath(QString::fromStdString(src.file_path));
+    }
+    if(decoded && !decoded->image.isNull())
+    {
+      // Stage into the cross-output decode cache so the next
+      // RenderList / reload hits without re-decoding. Stage is
+      // idempotent — same hash re-staged is a no-op.
+      if(cache && src.content_hash != 0)
+        cache->stage(src.content_hash, decoded->image);
+      return decoded->image;
+    }
+    QImage fallback(1, 1, QImage::Format_RGBA8888);
+    fallback.fill(Qt::white);
+    return fallback;
+  }
+
+  // Build a content fingerprint of the current materials list — keyed on
+  // material_component::stable_id rather than the raw pointer. Stable
+  // across producer rebuilds (the producer re-emits a fresh shared_ptr
+  // with the same id) AND across merge_scenes contributor reshuffles.
+  // Falls back to the pointer bits when stable_id is zero so un-stamped
+  // legacy producers still work (just with less-stable semantics).
+  void computeMaterialsFingerprint(std::vector<uint64_t>& out) const
+  {
+    out.clear();
+    if(!this->scene.state || !this->scene.state->materials)
+      return;
+    const auto& mats = *this->scene.state->materials;
+    out.reserve(mats.size());
+    for(const auto& m : mats)
+    {
+      if(!m)
+      {
+        out.push_back(0);
+        continue;
+      }
+      out.push_back(
+          m->stable_id != 0
+              ? m->stable_id
+              : reinterpret_cast<uint64_t>(m.get()));
+    }
+  }
+
+  // (Re)allocate a material-texture channel's array, deduping by
+  // texture_source pointer so N materials that share one image upload
+  // ONE layer, not N. Patches fs.materials[i].textureRefs[ch] with the
+  // packed layer ref for material i.
+  //
+  // Call sequence in update():
+  //   flattenScene → fs.materials      ← un-patched, all textureRefs=NONE
+  //   computeMaterialsFingerprint(fp)  ← snapshot element ptrs
+  //   rebuildChannel(ch, fp, fs, …)    ← dedupes + patches textureRefs[ch]
+  //   diffUpload / uploadStaticBuffer of scene_materials SSBO
+  //
+  // `sameMaterialsContent` is the result of comparing `fp` to
+  // `m_cachedMaterialsFingerprint`, computed once per update() and passed
+  // in so the ChannelCount rebuildChannel calls each frame don't each
+  // re-walk the list.
+  //
+  // Returns true if the channel's QRhiTexture* was (re)allocated —
+  // caller uses this to trigger downstream SRB rebinds.
+  // Walk materials and assign dynamic-slot indices for texture_refs that
+  // carry a GPU handle without a source. Rebuilt every frame because the
+  // upstream QRhiTexture* can swap without the material_component pointer
+  // changing (e.g., video-texture resized mid-stream). Cheap: O(n_mats),
+  // no uploads. Materials past the slot cap recycle the LRU-oldest slot
+  // (per resolveDynamicSlot's eviction path); the corresponding shader
+  // sampler now points at the new texture rather than tex_ref_none.
+  void rebuildDynamicSlots(MaterialChannel ch)
+  {
+    // Dynamic slot maps persist across the registry's lifetime — they
+    // are NOT cleared per-frame (cleared only in GpuResourceRegistry
+    // init()/destroy()). resolveDynamicSlot is idempotent on the same
+    // QRhiTexture* handle, so re-registering during this per-channel
+    // pass is a no-op for handles that haven't changed and refreshes
+    // the LRU last-use stamp on hit. Producers (PBRMesh,
+    // MaterialOverride) calling resolveDynamicSlot before this pass
+    // agree on the same slot index for the same handle.
+    if(!this->scene.state || !this->scene.state->materials || !m_registry)
+      return;
+
+    // Resolve a single dynamic-handle texture_ref into the channel's
+    // dynamic slot map. Static refs (with a CPU-side `source`) and
+    // empty refs short-circuit out — only refs carrying a runtime GPU
+    // handle land here. Idempotent for repeated handle / multi-channel
+    // routing.
+    const auto resolve_dyn = [this, ch](const ossia::texture_ref& tref) {
+      if(tref.source)
+        return;
+      if(!tref.texture.valid())
+        return;
+      m_registry->resolveDynamicSlot(toTexChannel(ch), tref.texture.native_handle);
+    };
+
+    for(const auto& m : *this->scene.state->materials)
+    {
+      if(!m)
+        continue;
+      // Main channel ref (the existing path).
+      if(const auto* tref = channelRef(ch, *m); tref)
+        resolve_dyn(*tref);
+      // Ext-table refs whose pool matches this channel.
+      for(const auto& slot : kExtTextureSlots)
+        if(slot.channel == ch)
+          resolve_dyn(slot.accessor(*m));
+    }
+  }
+
+  bool rebuildChannel(
+      MaterialChannel ch, bool sameMaterialsContent, RenderList& renderer,
+      QRhiResourceUpdateBatch& res, FlatScene& fs)
+  {
+    if(!m_registry)
+      return false;
+    auto& rhi = *renderer.state.rhi;
+    auto& channel = texChannel(ch);
+
+    const auto matsPtr
+        = this->scene.state ? this->scene.state->materials : nullptr;
+
+    // Dynamic slots refresh every frame regardless of sameMaterialsContent:
+    // runtime handles can swap without the outer material pointer changing.
+    rebuildDynamicSlots(ch);
+
+    // Fast path: the per-element materials fingerprint matches what we
+    // last fingerprinted, and this channel's texture array + layer map
+    // are still valid. Only need to re-patch textureRefs on fs.materials
+    // so the SSBO upload below carries the cached layer indices (dynamic
+    // slots patched from the freshly rebuilt dynamicSlotMap).
+    if(sameMaterialsContent && channel.primaryArray())
+    {
+      patchMaterialRefsFromCache(ch, fs);
+      return false;
+    }
+
+    // Wave 2 S2-shader: multi-bucket texture arrays. Each distinct
+    // (RGBA8, imageSize) tuple goes into its own bucket. Materials
+    // reference `tex_ref_static(bucket_id, layer_id)`; patchMaterial-
+    // RefsFromCache walks buckets[] to emit the correct refs.
+    //
+    // Algorithm:
+    //   1. Clear all buckets' layerMaps (we'll rebuild them).
+    //   2. Walk materials, decode each unique source up-front, route
+    //      it to `findOrCreateBucket(RGBA8, image.size())`. Layer
+    //      indices are bucket-local.
+    //   3. For each bucket that changed size/layer-count: reallocate
+    //      its QRhiTextureArray at the right native size.
+    //   4. Upload decoded images into their assigned (bucket, layer)
+    //      slots — no scaling, sizes already match by construction.
+    //   5. Ensure bucket 0 always has at least 1 fallback layer so
+    //      the default `baseColorArray` binding stays valid for
+    //      single-bucket-era shaders.
+    //
+    // Format axis reserved for future: today every bucket is RGBA8.
+    // HDR emissive / wide-gamut / compressed formats plug into this
+    // same mechanism by varying the format argument.
+
+    for(auto& b : channel.buckets)
+      b.layerMap.clear();
+
+    // Decoded pending uploads + their target (bucket, layer).
+    struct PendingLayer
+    {
+      int bucket_idx;
+      int layer_idx;
+      QImage image;
+    };
+    std::vector<PendingLayer> pendingUploads;
+    pendingUploads.reserve(16);
+
+    if(matsPtr)
+    {
+      // Process a single static texture_ref into this channel's bucket
+      // pool. Used uniformly for both the main channel ref and every
+      // ext-table ref whose `channel` matches `ch` — shared logic
+      // means new ext slots automatically pick up dedup, decode-fail
+      // handling, and bucket-cap diagnostics for free.
+      //
+      // `is_main_occlusion` enables the glTF MR-r packed-occlusion
+      // shortcut, which only applies to the main occlusion channel ref
+      // (an ext texture happening to share a source with MR doesn't
+      // get short-circuited — semantically distinct field). When the
+      // shortcut fires we also need the material's MR source pointer
+      // for the comparison; passed in as `mr_source_for_occ_check`.
+      const auto register_static_ref
+          = [&](const ossia::texture_ref& tref,
+                const ossia::texture_source* mr_source_for_occ_check,
+                bool is_main_occlusion) {
+        const auto* s = tref.source.get();
+        if(!s)
+          return;
+
+        // Occlusion-from-MR shortcut: when the material's occlusion
+        // texture and metallic-roughness texture share a source, the
+        // shader will read occlusion from MR.r * factor (the canonical
+        // glTF packing convention) and we don't need to allocate a
+        // separate occlusion layer for this material. patchMaterial-
+        // RefsFromCache also short-circuits → tex_ref_none() for the
+        // occlusion ref, the shader feature_mask bit stays clear, and
+        // the MR.r path takes over.
+        if(is_main_occlusion && s == mr_source_for_occ_check)
+          return;
+
+        // Skip if already mapped in any bucket this walk (same source
+        // referenced by N materials, or by main + ext slots on the
+        // same material — single upload shared by all).
+        for(const auto& b : channel.buckets)
+          if(b.layerMap.find(s) != b.layerMap.end())
+            return;
+
+        // Decode now so we know the native size to pick a bucket.
+        // AssetTable `peek` may return a cached QImage → zero-cost.
+        QImage img = decodeTextureSource(*s, renderer.assetTable());
+        if(img.isNull())
+          return;
+
+        // Heuristic: the decode-failure fallback is a 1×1 image; real
+        // textures are ≥ 8 px on both axes. Skip bucket assignment on
+        // clearly-degenerate results so we don't spawn a 1×1 bucket.
+        if(img.width() < 8 || img.height() < 8)
+          return;
+
+        // Route to bucket keyed on (format, size, sampler_config). The
+        // sampler_config split lets per-glTF-texture wrap/filter modes
+        // be honoured even when several materials share a channel
+        // array — distinct samplers → distinct buckets, each with its
+        // own QRhiSampler. For the common case (Sponza, DamagedHelmet,
+        // most glTFs use a single sampler) this collapses to one
+        // bucket per (format, size).
+        auto [b_idx, b_ptr] = channel.findOrCreateBucket(
+            QRhiTexture::RGBA8, img.size(), tref.sampler);
+        if(b_idx < 0)
+        {
+          qWarning().noquote()
+              << "ScenePreprocessor: channel" << channelName(ch)
+              << "hit bucket cap ("
+              << GpuResourceRegistry::kMaxBuckets
+              << "); texture_source skipped — shader will see tex_ref_none.";
+          return;
+        }
+
+        const int layer = (int)b_ptr->layerMap.size();
+        b_ptr->layerMap[s] = layer;
+        pendingUploads.push_back({b_idx, layer, std::move(img)});
+      };
+
+      const auto register_material_refs
+          = [&](const ossia::material_component& m) {
+        const auto* mr_source = m.metallic_roughness_texture.source.get();
+        // Main channel ref.
+        if(const auto* tref = channelRef(ch, m); tref)
+          register_static_ref(*tref, mr_source, ch == ChannelOcclusion);
+        // Ext-table refs whose pool matches this channel.
+        for(const auto& slot : kExtTextureSlots)
+          if(slot.channel == ch)
+            register_static_ref(slot.accessor(m), mr_source, false);
+      };
+      for(const auto& m : *matsPtr)
+        if(m)
+          register_material_refs(*m);
+      // Instancer-prototype materials live outside scene_state.materials
+      // (owned by the prototype mesh_component). Walk them here so their
+      // textures land in the channel buckets and arenaSlotForMaterial
+      // can patch resolved refs in the upload pass — see diagnostic 029.
+      for(const auto& inst_draw : fs.instances)
+      {
+        const auto* inst = inst_draw.instance.get();
+        if(!inst || !inst->prototype)
+          continue;
+        for(const auto& prim : inst->prototype->primitives)
+          if(const auto* mat = prim.material.get(); mat)
+            register_material_refs(*mat);
+      }
+    }
+
+    // Ensure bucket 0 exists for init-time / shader-binding stability.
+    // If no material landed in it, ensurePrimary() with default size
+    // gives a safe fallback target.
+    if(channel.buckets.empty())
+    {
+      channel.ensurePrimary(
+          QRhiTexture::RGBA8,
+          QSize(kChannelLayerSize, kChannelLayerSize));
+    }
+
+    // Per-bucket allocate / reallocate.
+    bool anyReallocated = false;
+    for(std::size_t bi = 0; bi < channel.buckets.size(); ++bi)
+    {
+      auto& b = channel.buckets[bi];
+      // At least 1 layer — empty bucket gets a fallback at layer 0.
+      const int wantLayers = std::max(1, (int)b.layerMap.size());
+      if(!b.array || b.layers != wantLayers)
+      {
+        if(b.array)
+          b.array->deleteLater();
+        b.array = rhi.newTextureArray(
+            b.format, wantLayers, b.pixelSize, 1, channelFlags(ch));
+        if(b.array)
+        {
+          b.array->setName(
+              QByteArray("ScenePreprocessor::") + channelName(ch)
+              + '[' + QByteArray::number((int)bi) + ']');
+          if(!b.array->create())
+          {
+            delete b.array;
+            b.array = nullptr;
+          }
+          else
+          {
+            b.layers = wantLayers;
+            anyReallocated = true;
+          }
+        }
+      }
+
+      // Per-bucket QRhiSampler. Created on first allocation, kept
+      // alive across rebuilds (the sampler_config is immutable for a
+      // bucket — bucket identity includes it). Never recreated unless
+      // the bucket is destroyed.
+      if(b.array && !b.sampler)
+      {
+        auto wrap_to_qrhi = [](ossia::texture_address_mode m) {
+          switch(m)
+          {
+            case ossia::REPEAT:        return QRhiSampler::Repeat;
+            case ossia::CLAMP_TO_EDGE: return QRhiSampler::ClampToEdge;
+            case ossia::MIRROR:        return QRhiSampler::Mirror;
+          }
+          return QRhiSampler::Repeat;
+        };
+        auto filter_to_qrhi = [](ossia::texture_filter f,
+                                 QRhiSampler::Filter dflt) {
+          switch(f)
+          {
+            case ossia::NONE:    return QRhiSampler::None;
+            case ossia::NEAREST: return QRhiSampler::Nearest;
+            case ossia::LINEAR:  return QRhiSampler::Linear;
+          }
+          return dflt;
+        };
+        // Material textures are always uploaded with a full mip chain
+        // (TextureLoader.cpp::uploadImageToTexture: MipMapped +
+        // generateMips on first upload). Force the bucket sampler to
+        // trilinear-filter that chain:
+        //   - mag/min filter promoted to LINEAR when the loader said
+        //     NONE (NEAREST is preserved — that's an explicit author
+        //     choice, e.g. pixel-art assets).
+        //   - mipmap_mode promoted to LINEAR when the loader said NONE
+        //     (the common case where a glTF declared minFilter=LINEAR
+        //     instead of LINEAR_MIPMAP_LINEAR — without this override
+        //     the GPU only ever samples mip 0 and we get the same
+        //     minification noise the mipmap fix was meant to solve).
+        auto promote_to_linear
+            = [](ossia::texture_filter f) -> ossia::texture_filter {
+          return f == ossia::NONE ? ossia::LINEAR : f;
+        };
+        b.sampler = rhi.newSampler(
+            filter_to_qrhi(promote_to_linear(b.sampler_config.mag_filter), QRhiSampler::Linear),
+            filter_to_qrhi(promote_to_linear(b.sampler_config.min_filter), QRhiSampler::Linear),
+            filter_to_qrhi(promote_to_linear(b.sampler_config.mipmap_mode), QRhiSampler::Linear),
+            wrap_to_qrhi(b.sampler_config.wrap_s),
+            wrap_to_qrhi(b.sampler_config.wrap_t));
+        b.sampler->setName(
+            QByteArray("ScenePreprocessor::") + channelName(ch) + "_sampler["
+            + QByteArray::number((int)bi) + ']');
+        if(!b.sampler->create())
+        {
+          delete b.sampler;
+          b.sampler = nullptr;
+        }
+        else
+        {
+          // Sampler swap forces SRB rebind on the consumer side.
+          anyReallocated = true;
+        }
+      }
+    }
+
+    // Upload real textures into their bucket/layer slots.
+    for(auto& pu : pendingUploads)
+    {
+      auto& b = channel.buckets[pu.bucket_idx];
+      if(!b.array)
+        continue;
+      QImage img = std::move(pu.image);
+      if(img.format() != QImage::Format_RGBA8888)
+        img.convertTo(QImage::Format_RGBA8888);
+      // Sizes match by construction — no scale needed.
+      QRhiTextureSubresourceUploadDescription sub(img);
+      QRhiTextureUploadEntry entry(pu.layer_idx, 0, sub);
+      res.uploadTexture(
+          b.array, QRhiTextureUploadDescription({entry}));
+    }
+
+    // Fallback for empty buckets (no real uploads): drop a neutral
+    // 1-layer default so the shader's bucket-switch case for this
+    // bucket doesn't sample undefined memory.
+    for(std::size_t bi = 0; bi < channel.buckets.size(); ++bi)
+    {
+      auto& b = channel.buckets[bi];
+      if(!b.array || !b.layerMap.empty())
+        continue;
+      QImage fallback(b.pixelSize, QImage::Format_RGBA8888);
+      switch(ch)
+      {
+        case ChannelBaseColor:  fallback.fill(Qt::white); break;
+        case ChannelEmissive:   fallback.fill(Qt::black); break;
+        // MR / packed-extension fallback: white (1,1,1,1) so per-material
+        // metallic_factor / roughness_factor / clearcoat_factor / sheen / etc.
+        // apply via multiplication. A non-white fallback would zero out the
+        // authored factors (e.g., metallic_factor=1 + no MR texture → black
+        // metal instead of mirror).
+        case ChannelMetalRough: fallback.fill(Qt::white); break;
+        case ChannelNormal:     fallback.fill(QColor(128, 128, 255, 255)); break;
+        default:                fallback.fill(Qt::white); break;
+      }
+      QRhiTextureSubresourceUploadDescription sub(fallback);
+      QRhiTextureUploadEntry entry(0, 0, sub);
+      res.uploadTexture(
+          b.array, QRhiTextureUploadDescription({entry}));
+    }
+
+    // `arrayReallocated` is the rebuildChannel return value: when any
+    // bucket's QRhiTexture* was recreated, downstream SRBs need a
+    // rebind. Caller threads it through the "auxBuffersChanged"
+    // flag in update().
+    const bool arrayReallocated = anyReallocated;
+
+    // Per-channel diagnostic — tells you bucket count, per-bucket size,
+    // layer count, and how many sources got dropped. Critical for
+    // understanding "missing textures" symptoms (e.g. Sponza mat 2
+    // dropped because white.png is 4×4, below the <8 px decode floor).
+    if(buftrace_enabled())
+    {
+      QString detail;
+      detail.reserve(128);
+      for(std::size_t bi = 0; bi < channel.buckets.size(); ++bi)
+      {
+        const auto& b = channel.buckets[bi];
+        detail += QStringLiteral(" b%1=%2x%3×%4")
+                      .arg(bi)
+                      .arg(b.pixelSize.width())
+                      .arg(b.pixelSize.height())
+                      .arg(b.layers);
+      }
+      BUFTRACE() << "[Channel " << channelName(ch)
+                 << "] buckets=" << channel.buckets.size()
+                 << " pendingUploads=" << pendingUploads.size()
+                 << detail
+                 << " realloc=" << anyReallocated;
+    }
+
+    patchMaterialRefsFromCache(ch, fs);
+    return arrayReallocated;
+  }
+
+  // Walk fs.materials in lockstep with scene.state->materials and set
+  // textureRefs[ch] from channel's layerMap. Called from both the fast
+  // path (same materials list) and the rebuild path (materials list
+  // changed).
+  void patchMaterialRefsFromCache(MaterialChannel ch, FlatScene& fs)
+  {
+    if(!this->scene.state || !this->scene.state->materials || !m_registry)
+      return;
+    const auto& mats = *this->scene.state->materials;
+    const auto& channel = texChannel(ch);
+    const auto& dynMap = channel.dynamicSlotMap;
+    const std::size_t n = std::min(fs.materials.size(), mats.size());
+    const std::size_t n_ext = std::min(n, fs.material_extensions.size());
+
+    // Channel 4 (Occlusion) lives in `MaterialGPU::occlusion_textureRef`,
+    // a single uint32 outside the 4-element textureRefs uvec4 (which
+    // holds BC/MR/Normal/Em only). Branch out the storage target so we
+    // don't write OOB into textureRefs[4].
+    const auto write_main_ref
+        = [ch](MaterialGPU& m, uint32_t ref) noexcept {
+      if(ch == ChannelOcclusion)
+        m.occlusion_textureRef = ref;
+      else
+        m.textureRefs[ch] = ref;
+    };
+
+    // Encode a single texture_ref into a packed uint per the
+    // tex_ref_static / tex_ref_dynamic / tex_ref_none scheme. Looks up
+    // the dynamic handle in this channel's slotMap first (since GPU
+    // handles take precedence over CPU sources when both are set —
+    // mirrors the rebuild walker's order). Static sources are matched
+    // against the per-bucket layerMap that rebuildChannel populated.
+    // Returns tex_ref_none() for empty refs OR refs that overflowed
+    // the dynamic slot cap OR static sources we failed to map (decode
+    // failure, bucket cap, etc.).
+    const auto encode_ref = [&](const ossia::texture_ref& tref) -> uint32_t {
+      // Dynamic path: GPU handle without a CPU source.
+      if(!tref.source && tref.texture.valid())
+      {
+        // Look up by globalResourceId — see GpuResourceRegistry.cpp's
+        // resolveDynamicSlot for the recycling-safety rationale.
+        auto* dynTex
+            = static_cast<QRhiTexture*>(tref.texture.native_handle);
+        auto it
+            = dynTex ? dynMap.find(dynTex->globalResourceId()) : dynMap.end();
+        return (it != dynMap.end())
+                   ? tex_ref_dynamic((uint32_t)it->second)
+                   : tex_ref_none();
+      }
+      // Static path: walk this channel's buckets for the source pointer.
+      if(const auto* s = tref.source.get(); s)
+      {
+        for(std::size_t bi = 0; bi < channel.buckets.size(); ++bi)
+        {
+          auto it = channel.buckets[bi].layerMap.find(s);
+          if(it != channel.buckets[bi].layerMap.end())
+            return tex_ref_static((uint32_t)bi, (uint32_t)it->second);
+        }
+      }
+      return tex_ref_none();
+    };
+
+    for(std::size_t i = 0; i < n; ++i)
+    {
+      // Null-material clear: zero out main + all ext slots mapped to
+      // this channel so a transient nullptr in mats[i] doesn't leave
+      // stale refs from the previous frame.
+      if(!mats[i])
+      {
+        write_main_ref(fs.materials[i], tex_ref_none());
+        if(i < n_ext)
+          for(const auto& slot : kExtTextureSlots)
+            if(slot.channel == ch)
+              fs.material_extensions[i].textureRefs[slot.slot]
+                  = tex_ref_none();
+        continue;
+      }
+
+      // ── Main channel ref ──────────────────────────────────────────
+      // Occlusion-from-MR shortcut (see rebuildChannel above): when
+      // the source is shared with MR, leave the ref as none so the
+      // shader takes the MR.r packed-occlusion path.
+      const auto* main_tref = channelRef(ch, *mats[i]);
+      const bool occ_packed_in_mr
+          = (ch == ChannelOcclusion
+             && main_tref
+             && main_tref->source
+             && main_tref->source.get()
+                    == mats[i]->metallic_roughness_texture.source.get());
+      write_main_ref(
+          fs.materials[i],
+          (main_tref && !occ_packed_in_mr)
+              ? encode_ref(*main_tref)
+              : tex_ref_none());
+
+      // ── Ext-slot refs ─────────────────────────────────────────────
+      // For each ext slot whose pool is `ch`, encode and write to
+      // MaterialExtensionsGPU::textureRefs[slot]. Slots whose pool
+      // ≠ ch are written by other rebuildChannel(ch') passes — over
+      // ChannelCount calls per frame, every slot mapped in
+      // kExtTextureSlots gets its turn.
+      if(i < n_ext)
+      {
+        for(const auto& slot : kExtTextureSlots)
+        {
+          if(slot.channel != ch)
+            continue;
+          fs.material_extensions[i].textureRefs[slot.slot]
+              = encode_ref(slot.accessor(*mats[i]));
+        }
+      }
+    }
+  }
+
+  // Append all non-null material-texture channels + skybox to the emitted
+  // geometry as auxiliary_texture entries. Consumer shaders auto-resolve
+  // by name (base_color_array / metal_rough_array / normal_array /
+  // emissive_array / skybox) via try_bind_texture_from_geometry — no
+  // manual cable required. Null handles are filtered out so a shader
+  // missing a given channel falls back to its own sampler default.
+  void appendTextureAuxes(ossia::geometry& g) const
+  {
+    if(!m_registry)
+      return;
+    for(int i = 0; i < ChannelCount; ++i)
+    {
+      auto ch = static_cast<MaterialChannel>(i);
+      const auto& channel = texChannel(ch);
+
+      // Wave 2 S2-shader: emit one `auxiliary_texture` per live bucket,
+      // named `<channelName><bucket_id>` (e.g. `baseColorArray0`,
+      // `baseColorArray1`, …). Consumer shaders declare matching
+      // sampler2DArray INPUTS per bucket and switch on the 6-bit
+      // `bucket` field from MaterialGPU::textureRefs. Capped at
+      // kMaxBuckets.
+      //
+      // Back-compat alias: bucket 0 is ALSO emitted under the
+      // unsuffixed name `<channelName>` (e.g. `baseColorArray`). That
+      // keeps single-bucket-era shaders (classic_pbr, classic_pbr_textured,
+      // etc.) rendering correctly — they only decode bucket 0's
+      // layers and ignore the higher bits. Multi-bucket scenes that
+      // hit a non-zero bucket through one of those shaders will
+      // render bucket 0's layer in place of the intended bucket
+      // (visibly wrong); users hitting that path should migrate to
+      // classic_pbr_full or a ladder-aware preset. Zero overhead for
+      // single-bucket scenes, which remain the common case.
+      for(std::size_t bi = 0; bi < channel.buckets.size(); ++bi)
+      {
+        auto* tex = channel.buckets[bi].array;
+        if(!tex)
+          continue;
+        // sampler_handle is null when the bucket is the init-time
+        // fallback (bucket 0 with no real sources). Renderer falls
+        // back to its own shader-config sampler when null. Real
+        // material buckets populate the per-bucket sampler in
+        // rebuildChannel above so per-glTF-texture wrap/filter
+        // modes propagate end-to-end.
+        void* sampler_h = static_cast<void*>(channel.buckets[bi].sampler);
+        // Suffixed, always.
+        g.auxiliary_textures.push_back(
+            {.name = std::string(channelName(ch))
+                     + std::to_string((int)bi),
+             .native_handle = tex,
+             .sampler_handle = sampler_h});
+        // Unsuffixed alias only for bucket 0.
+        if(bi == 0)
+        {
+          g.auxiliary_textures.push_back(
+              {.name = channelName(ch),
+               .native_handle = tex,
+               .sampler_handle = sampler_h});
+        }
+      }
+      // Dynamic slot textures: one aux entry per used slot, named
+      // `<channelDynBase><slot>` (e.g., "baseColorDyn0"). Consumer
+      // shaders declare matching sampler2D uniforms and branch on the
+      // textureRefs source bits to pick static array vs dyn sampler.
+      const auto& dyn = texChannel(ch).dynamicTextures;
+      const char* dynBase = channelDynBaseName(ch);
+      for(int s = 0; s < (int)dyn.size(); ++s)
+      {
+        if(auto* tex = dyn[s])
+        {
+          g.auxiliary_textures.push_back(
+              {.name = std::string(dynBase) + std::to_string(s),
+               .native_handle = tex});
+        }
+      }
+    }
+    if(this->scene.state)
+    {
+      // Scene-wide environment textures, exposed under well-known aux
+      // names. Consumer shaders declare matching INPUTS (e.g.
+      // `{"NAME": "irradiance_map", "TYPE": "cubemap"}`) and the
+      // existing aux-resolver picks them up over the already-wired
+      // scene cable. No hidden dataflow: the scene cable is explicit;
+      // we're just publishing named sub-resources onto it (same
+      // pattern as skybox, base_color_array, etc.).
+      const auto& env = this->scene.state->environment;
+      if(auto* skybox = static_cast<QRhiTexture*>(
+             env.skybox_texture.native_handle))
+      {
+        g.auxiliary_textures.push_back(
+            {.name = "skybox", .native_handle = skybox});
+      }
+      if(auto* t = static_cast<QRhiTexture*>(env.irradiance_map.native_handle))
+      {
+        g.auxiliary_textures.push_back(
+            {.name = "irradiance_map", .native_handle = t});
+      }
+      if(auto* t = static_cast<QRhiTexture*>(env.prefiltered_map.native_handle))
+      {
+        g.auxiliary_textures.push_back(
+            {.name = "prefiltered_map", .native_handle = t});
+      }
+      if(auto* t = static_cast<QRhiTexture*>(env.brdf_lut.native_handle))
+      {
+        g.auxiliary_textures.push_back(
+            {.name = "brdf_lut", .native_handle = t});
+      }
+      // Shadow-map array lives off scene_state (not environment) since
+      // it's tied to the shadow_cascades_info authored by
+      // ShadowCascadeSetup.
+      if(auto* t = static_cast<QRhiTexture*>(
+             this->scene.state->shadow_cascades.shadow_map_array
+                 .native_handle))
+      {
+        g.auxiliary_textures.push_back(
+            {.name = "shadow_map_array", .native_handle = t});
+      }
+    }
+  }
+
+  // Texture outputs have been removed — every material-texture array and
+  // the skybox now ride along on the Geometry output as auxiliary_texture
+  // entries. Left in place only to satisfy the virtual override; the
+  // single remaining output port (Geometry) never takes this path.
+  QRhiTexture* textureForOutput(const Port& /*output*/) override
+  {
+    return nullptr;
+  }
+
+  // Pack every camera collected by flattenScene into a std140 UBO array.
+  // Slot 0 is always the active camera; remaining slots are the other
+  // cameras in insertion order. If the scene has no cameras we synthesize a
+  // single default entry so downstream shaders always have a valid binding.
+  //
+  // Diff-uploads against m_cachedCameras to avoid Dynamic-buffer churn when
+  // camera parameters don't change frame to frame.
+  void packAndUploadCameras(
+      RenderList& renderer, QRhiResourceUpdateBatch& res, const FlatScene& fs)
+  {
+    // Per-frame idempotency. update() is dispatched once per outgoing
+    // edge — running this function more than once in the same frame
+    // would corrupt camera_prev: the snapshot-before-overwrite step
+    // (line below) reads m_cachedCameras to seed camera_prev, then
+    // overwrites m_cachedCameras with the new fresh. A second call
+    // within the same frame would snapshot the just-overwritten
+    // (current-frame) data into camera_prev → camera_prev == camera →
+    // motion = 0 even on real motion frames. RenderList::frame is
+    // incremented at the end of each renderInternal pass, so it's a
+    // reliable per-frame token here.
+    if(m_lastCameraUploadFrame == renderer.frame)
+      return;
+
+    auto& rhi = *renderer.state.rhi;
+    // Prefer the scene's explicit render target size when an upstream
+    // producer (EnvironmentLoader / SetRenderTarget-style node) has
+    // stamped one — that size is correct for whatever off-screen pass
+    // this preprocessor drives. Fall back to the RenderList's swap-chain
+    // size, which is only right for the main window pass.
+    QSize rsize = renderer.state.renderSize;
+    if(this->scene.state)
+    {
+      const auto& env = this->scene.state->environment;
+      if((env.params_set & ossia::scene_environment::params_render_target_size)
+         && env.render_target_size[0] > 0
+         && env.render_target_size[1] > 0)
+      {
+        rsize = QSize(
+            (int)env.render_target_size[0],
+            (int)env.render_target_size[1]);
+      }
+    }
+
+    std::vector<CameraUBOData> fresh;
+    if(fs.cameras.empty())
+    {
+      // Default camera used when no camera is present in the scene.
+      ossia::camera_component cam{};
+      QMatrix4x4 view;
+      view.lookAt(
+          QVector3D(0.f, 1.f, 3.f), QVector3D(0.f, 0.f, 0.f),
+          QVector3D(0.f, 1.f, 0.f));
+      CameraUBOData d{};
+      packCameraUBO(d, cam, view.inverted(), rsize, 0.f);
+      fresh.push_back(d);
+    }
+    else
+    {
+      fresh.reserve(fs.cameras.size());
+      // Put the active camera first so shaders that index by 0 pick it up
+      // without knowing about activeCameraIndex.
+      const int active = std::max(0, fs.activeCameraIndex);
+      auto packOne = [&](const FlatScene::CameraEntry& e) {
+        CameraUBOData d{};
+        packCameraUBO(d, *e.component, e.worldTransform, rsize, 0.f);
+        fresh.push_back(d);
+      };
+      packOne(fs.cameras[(std::size_t)active]);
+      for(std::size_t i = 0; i < fs.cameras.size(); ++i)
+      {
+        if((int)i != active)
+          packOne(fs.cameras[i]);
+      }
+    }
+
+    const int64_t bytes = (int64_t)(fresh.size() * sizeof(CameraUBOData));
+
+    // Pre-allocate a large enough capacity so the buffer pointer is stable
+    // across typical scene changes — aux-buffer bindings downstream resolve
+    // to this QRhiBuffer* at geometry-rebuild time, and growing invalidates
+    // those bindings. 16 cameras × 240 B = 3840 B covers every realistic
+    // multi-view case (cubemap = 6, stereo = 2, typical single = 1).
+    constexpr int64_t kMinCap = 16 * (int64_t)sizeof(CameraUBOData);
+    const int64_t wantCap = std::max(bytes, kMinCap);
+
+    if(!m_camerasBuffer || m_camerasCap < wantCap)
+    {
+      if(m_camerasBuffer)
+        renderer.releaseBuffer(m_camerasBuffer);
+      if(m_camerasPrevBuffer)
+        renderer.releaseBuffer(m_camerasPrevBuffer);
+      m_camerasBuffer = rhi.newBuffer(
+          QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, (quint32)wantCap);
+      m_camerasBuffer->setName("ScenePreprocessor::cameras");
+      m_camerasBuffer->create();
+      m_camerasPrevBuffer = rhi.newBuffer(
+          QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, (quint32)wantCap);
+      m_camerasPrevBuffer->setName("ScenePreprocessor::cameras_prev");
+      m_camerasPrevBuffer->create();
+      m_camerasCap = wantCap;
+      m_cachedCameras.clear();
+      // Force the upload below to actually run after realloc — the
+      // freshly created buffers contain garbage and must be filled.
+      m_lastCameraUploadFrame = -1;
+    }
+
+    // Upload `camera_prev` from the CPU mirror of what's currently in the
+    // GPU `camera` buffer (= last frame's content, since we're about to
+    // overwrite it with `fresh` below). On the first frame m_cachedCameras
+    // is empty — seed prev with current so MV = 0 (no history snap).
+    //
+    // Earlier impl held a separate m_prevCameras shadow that was only
+    // refreshed on cache MISS, while the prev-buffer upload ran every
+    // frame. With cache-hit/miss alternation (renderSize toggles, multi-
+    // producer env-merge order, animation tick != render tick) this left
+    // camera_prev lagging by 2 frames on the post-hit miss frame —
+    // GPU camera_prev ended up byte-equal to GPU camera, so motion = 0
+    // every other frame and downstream temporal upscalers / reproject
+    // shaders flickered between correct and zero output.
+    //
+    // Mirroring m_worldTransformsPrevBuffer's pattern (snapshot-current-
+    // before-overwrite) makes the prev semantic a function of the GPU
+    // buffer's last frame content, not of cache-hit history. Always
+    // upload current too — the diff-skip saved <4 KB of Dynamic-UBO
+    // churn per frame and was the source of the bug.
+    const auto& prevPayload
+        = m_cachedCameras.empty() ? fresh : m_cachedCameras;
+    const int64_t prevBytes
+        = (int64_t)(prevPayload.size() * sizeof(CameraUBOData));
+    res.updateDynamicBuffer(
+        m_camerasPrevBuffer, 0, (quint32)prevBytes, prevPayload.data());
+
+    res.updateDynamicBuffer(m_camerasBuffer, 0, (quint32)bytes, fresh.data());
+    m_cachedCameras = std::move(fresh);
+    m_lastCameraUploadFrame = renderer.frame;
+
+    // The camera UBO isn't exposed on an external output port anymore —
+    // it rides along on the geometry as the `camera` auxiliary buffer
+    // (attached in rebuildMDI), so try_bind_from_geometry resolves the
+    // shader's `uniform camera` input by name without a dedicated cable.
+  }
+
+  void update(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge*) override
+  {
+    // Re-flatten when the CONTENT actually changed, not just when a push
+    // occurred this frame. Producers (glTF/FBX loaders, Light)
+    // now re-push every frame so that multi-source scenes stay consistent
+    // across frames; the merge cache in NodeRenderer keeps the resulting
+    // scene_state shared_ptr stable when no input changed. That makes the
+    // pointer + version check a reliable "did the content change" test,
+    // and we can skip the sceneChanged forced-rebuild entirely.
+    bool needsRebuild = !m_outputSpec.meshes;
+    if(this->scene.state.get() != m_cachedSceneState)
+      needsRebuild = true;
+    if(this->scene.state && this->scene.state->version != m_cachedVersion)
+      needsRebuild = true;
+
+    // Always refresh the camera UBOs every frame, regardless of whether
+    // mesh-rebuild fires. Decoupling camera updates from the rebuild gate
+    // is required for motion-vector reprojection to be correct:
+    //
+    //   * "Camera moves, then stops": without per-frame upload, the last
+    //     rebuild leaves camera_prev = old, camera = new in the GPU UBOs.
+    //     scene_state stops bumping its version → no further rebuild →
+    //     UBOs frozen at the motion-in-progress state → motion-vector
+    //     consumers see ghost motion forever after the camera stopped.
+    //
+    //   * "Static camera + animated geometry": some scene producers bump
+    //     scene_state.version on transform changes, others don't. If the
+    //     gate misses, the camera UBO never updates even when the camera
+    //     does change. Always running packAndUploadCameras here makes
+    //     motion-vector correctness independent of which producer is in
+    //     play.
+    //
+    // packAndUploadCameras synthesises a default camera when fs.cameras
+    // is empty, so this runs unconditionally — keeps m_camerasBuffer
+    // allocated and bound even when no scene producer is wired yet.
+    //
+    // Per-frame guard (threedim#12): update() is dispatched once per
+    // outgoing edge, and packAndUploadCameras already early-returns when
+    // it has already run this frame (m_lastCameraUploadFrame ==
+    // renderer.frame). But the flattenScene() feeding it is NOT free — it
+    // packs every material, runs skeleton FK and allocates a shared_ptr
+    // wrapper per primitive — so running it once per edge wastes that work
+    // on edges 2..K whose packAndUploadCameras is a no-op anyway. Gate the
+    // whole camera flatten+upload on the same per-frame token so it runs at
+    // most once per frame regardless of edge count.
+    if(m_lastCameraUploadFrame != renderer.frame)
+    {
+      FlatScene cameraFs;
+      flattenScene(this->scene, cameraFs, /*aspectRatio=*/1.f);
+      packAndUploadCameras(renderer, res, cameraFs);
+    }
+
+    if(!needsRebuild)
+    {
+      // Still consume the sceneChanged flag so we don't loop on it forever.
+      this->sceneChanged = false;
+      return;
+    }
+
+    BUFTRACE() << "ScenePreprocessor::update REBUILD cached_state="
+               << (const void*)m_cachedSceneState
+               << " cached_ver=" << (qint64)m_cachedVersion
+               << " new_state=" << (void*)this->scene.state.get()
+               << " new_ver="
+               << (this->scene.state ? (qint64)this->scene.state->version : (qint64)-1)
+               << " mdi_indices="
+               << (void*)(m_registry ? m_registry->meshStreamBuffer(
+                       GpuResourceRegistry::MeshStream::Indices) : nullptr)
+               << " (downstream shader bindings still reference the "
+                  "pre-rebuild MDI buffers until the next acquireMesh)";
+
+    // Walk the scene. flattenScene is O(nodes) — cheap compared to any
+    // GPU upload — so we always do it. The expensive work (vertex/index
+    // concat + upload) is then gated by the mesh fingerprint below.
+    {
+      FlatScene fs;
+      flattenScene(this->scene, fs, /*aspectRatio=*/1.f);
+
+      std::vector<uint32_t> materialTagHashes;
+      if(this->scene.state && this->scene.state->materials)
+      {
+        const auto& mats = *this->scene.state->materials;
+        materialTagHashes.reserve(mats.size());
+        for(const auto& m : mats)
+          materialTagHashes.push_back(
+              m ? (uint32_t)ossia::hash_string(m->tag) : 0u);
+      }
+
+      // Allocate Material arena slots for every loader material (materials
+      // entering the scene without a live producer's raw_slot) + upload
+      // MaterialGPU bytes. Producer-authored materials already have valid
+      // slots kept fresh by their own update(); we skip those here.
+      // Slot allocation persists across frames via m_loaderMaterialSlots —
+      // cheap cache hit for scenes that don't change. When a material
+      // disappears (removed from scene_state.materials), its slot is
+      // reclaimed by the garbage-collection pass below.
+      if(this->scene.state && m_registry)
+      {
+        const std::vector<ossia::material_component_ptr> empty_mats;
+        const auto& mats = this->scene.state->materials
+                               ? *this->scene.state->materials
+                               : empty_mats;
+        ossia::hash_set<const ossia::material_component*> seen;
+        seen.reserve(mats.size() + fs.instances.size());
+        const auto register_loader_material
+            = [&](const ossia::material_component* mat) {
+          if(!mat)
+            return;
+          seen.insert(mat);
+          // Producer-authored material: its own update() maintains the
+          // slot contents every frame. Skip.
+          if(m_registry->isLive(mat->raw_slot))
+            return;
+          // Loader material: allocate a slot on first sight, upload
+          // packed MaterialGPU bytes. No per-frame re-upload: loader
+          // materials are immutable between file-loads, so the slot
+          // bytes we wrote on first sight are still valid.
+          auto [it, inserted]
+              = m_loaderMaterialSlots.emplace(mat, GpuResourceRegistry::Slot{});
+          if(inserted)
+          {
+            it->second = m_registry->allocate(
+                GpuResourceRegistry::Arena::Material, sizeof(MaterialGPU));
+            // No upload here — textureRefs aren't resolved yet. The
+            // upload happens after the rebuildChannel loop, once the
+            // per-channel layerMaps know which source lands on which
+            // layer. Arena-full case: the GC pass below drops the
+            // invalid entry on the next material list change.
+          }
+        };
+        for(const auto& mat_ptr : mats)
+          register_loader_material(mat_ptr.get());
+        // Instancer prototypes carry their own material_component
+        // pointers that aren't in scene_state.materials (they're owned
+        // by the prototype mesh_component). Without registering them
+        // here, arenaSlotForMaterial(prim.material) falls back to slot
+        // 0 (the seedDefaults white-dielectric) and every loader-built
+        // instance group renders with that default — see diagnostic 029.
+        for(const auto& inst_draw : fs.instances)
+        {
+          const auto* inst = inst_draw.instance.get();
+          if(!inst || !inst->prototype)
+            continue;
+          for(const auto& prim : inst->prototype->primitives)
+            register_loader_material(prim.material.get());
+        }
+        // Garbage-collect slots whose materials disappeared from the
+        // scene. Scanning after the allocation pass ensures entries
+        // still present are kept.
+        for(auto it = m_loaderMaterialSlots.begin();
+            it != m_loaderMaterialSlots.end();)
+        {
+          if(seen.find(it->first) == seen.end())
+          {
+            if(it->second.valid())
+              m_registry->free(it->second);
+            it = m_loaderMaterialSlots.erase(it);
+          }
+          else
+          {
+            ++it;
+          }
+        }
+      }
+
+      // Build / refresh every material-texture channel AND patch
+      // fs.materials[i].textureRefs[ch] with the assigned layer indices.
+      // Must happen before the scene_materials SSBO upload below so
+      // materials are written with the right refs.
+      //
+      // Each channel has its own QRhiTextureArray (sRGB for base color
+      // & emissive, linear for MR & normal — see channelFlags). When a
+      // channel's QRhiTexture* gets reallocated (layer count grew, …)
+      // the emitted auxiliary_texture entry's native_handle changes —
+      // downstream's rebindAuxTextures picks that up via the per-frame
+      // geometry lookup, but ONLY if downstream's geometryChanged fires,
+      // which requires a fresh meshes shared_ptr. Roll the realloc
+      // signal into the same `auxBuffersChanged` flag the SSBO-grow path
+      // uses: rebuildMDI() rebuilds the meshes vector every time that
+      // flag fires, giving the downstream a pointer identity change.
+      //
+      // Fingerprint the materials list once and pass the equality result
+      // to each channel so we don't re-walk the list ChannelCount times.
+      std::vector<uint64_t> fingerprint;
+      computeMaterialsFingerprint(fingerprint);
+      // Append prototype-material identity into the fingerprint so a
+      // prototype-only change (model swap, variant select) re-triggers
+      // the channel rebuild + upload below.
+      for(const auto& inst_draw : fs.instances)
+      {
+        const auto* inst = inst_draw.instance.get();
+        if(!inst || !inst->prototype)
+          continue;
+        for(const auto& prim : inst->prototype->primitives)
+        {
+          const auto* mat = prim.material.get();
+          fingerprint.push_back(
+              mat
+                  ? (mat->stable_id != 0
+                         ? mat->stable_id
+                         : reinterpret_cast<uint64_t>(mat))
+                  : 0u);
+        }
+      }
+      const bool sameMaterialsContent
+          = (fingerprint == m_cachedMaterialsFingerprint);
+
+      bool channelReallocated = false;
+      for(int i = 0; i < ChannelCount; ++i)
+      {
+        if(rebuildChannel(
+               static_cast<MaterialChannel>(i), sameMaterialsContent,
+               renderer, res, fs))
+          channelReallocated = true;
+      }
+      if(!sameMaterialsContent)
+        m_cachedMaterialsFingerprint = std::move(fingerprint);
+
+      // Loader-material arena slot upload: now that rebuildChannel has
+      // patched fs.materials[i].textureRefs with the resolved per-channel
+      // layer indices, stream each loader material's packed MaterialGPU
+      // bytes into its Material arena slot. Producer-authored materials
+      // (PBRMesh, MaterialOverride-if-migrated, CSF mesh producers) keep
+      // their own slot fresh in their update() hooks — we skip those.
+      //
+      // Uploads happen only when the materials content actually changed
+      // (sameMaterialsContent==false) OR when a channel reallocated and
+      // shifted layer indices. Steady-state frames with an unchanged
+      // scene touch zero bytes here.
+      if(m_registry && this->scene.state
+         && (!sameMaterialsContent || channelReallocated))
+      {
+        const std::vector<ossia::material_component_ptr> empty_mats;
+        const auto& mats = this->scene.state->materials
+                               ? *this->scene.state->materials
+                               : empty_mats;
+        const std::size_t n
+            = std::min(fs.materials.size(), mats.size());
+        for(std::size_t i = 0; i < n; ++i)
+        {
+          const auto* mat = mats[i].get();
+          if(!mat)
+            continue;
+          if(m_registry->isLive(mat->raw_slot))
+            continue;  // producer-authored — slot owned by producer
+          auto it = m_loaderMaterialSlots.find(mat);
+          if(it == m_loaderMaterialSlots.end() || !it->second.valid())
+            continue;
+          m_registry->updateSlot(
+              res, it->second, &fs.materials[i], sizeof(MaterialGPU));
+        }
+        // Instancer-prototype materials registered above also need
+        // their MaterialGPU bytes uploaded — they aren't in
+        // fs.materials so we pack on the fly. textureRefs come from the
+        // rebuildChannel walk (which now also visits prototype
+        // materials) so dedup with channel buckets is preserved.
+        ossia::hash_set<const ossia::material_component*> uploaded;
+        uploaded.reserve(mats.size() + fs.instances.size());
+        for(const auto& mp : mats)
+          if(mp)
+            uploaded.insert(mp.get());
+        for(const auto& inst_draw : fs.instances)
+        {
+          const auto* inst = inst_draw.instance.get();
+          if(!inst || !inst->prototype)
+            continue;
+          for(const auto& prim : inst->prototype->primitives)
+          {
+            const auto* mat = prim.material.get();
+            if(!mat)
+              continue;
+            if(!uploaded.insert(mat).second)
+              continue; // shared with scene material or another prim
+            if(m_registry->isLive(mat->raw_slot))
+              continue;
+            auto it = m_loaderMaterialSlots.find(mat);
+            if(it == m_loaderMaterialSlots.end() || !it->second.valid())
+              continue;
+            MaterialGPU packed = packMaterial(*mat);
+            // Patch textureRefs from the per-channel buckets. Mirrors
+            // patchMaterialRefsFromCache but inline since prototype
+            // materials aren't in fs.materials.
+            for(int chi = 0; chi < ChannelCount; ++chi)
+            {
+              const auto ch = static_cast<MaterialChannel>(chi);
+              const auto& channel = texChannel(ch);
+              uint32_t ref = tex_ref_none();
+              if(const auto* tref = channelRef(ch, *mat); tref)
+              {
+                if(!tref->source && tref->texture.valid())
+                {
+                  // Stable-id keyed (GpuResourceRegistry.cpp).
+                  auto* dynTex = static_cast<QRhiTexture*>(
+                      tref->texture.native_handle);
+                  auto dit = dynTex
+                                 ? channel.dynamicSlotMap.find(
+                                       dynTex->globalResourceId())
+                                 : channel.dynamicSlotMap.end();
+                  if(dit != channel.dynamicSlotMap.end())
+                    ref = tex_ref_dynamic((uint32_t)dit->second);
+                }
+                else if(const auto* s = tref->source.get(); s)
+                {
+                  for(std::size_t bi = 0; bi < channel.buckets.size(); ++bi)
+                  {
+                    auto bit = channel.buckets[bi].layerMap.find(s);
+                    if(bit != channel.buckets[bi].layerMap.end())
+                    {
+                      ref = tex_ref_static(
+                          (uint32_t)bi, (uint32_t)bit->second);
+                      break;
+                    }
+                  }
+                }
+              }
+              if(ch == ChannelOcclusion)
+                packed.occlusion_textureRef = ref;
+              else
+                packed.textureRefs[chi] = ref;
+            }
+            m_registry->updateSlot(
+                res, it->second, &packed, sizeof(MaterialGPU));
+          }
+        }
+      }
+
+      // Ensure the scene-wide SSBOs exist at a large-enough capacity. Only
+      // allocates / resizes when the count grew past the current cap; the
+      // common steady-state case is a no-op.
+      //
+      // Both `scene_materials_ext` and `scene_material_uv_xforms` are
+      // indexed by Material ARENA SLOT in the shader (shader does
+      // `entries[pd.material_index]` where pd.material_index is the
+      // arena slot, parallel to `scene_materials` which IS the arena).
+      // Their CPU side must therefore be sized + filled by arena slot
+      // too, NOT by fs.materials position. See the freshMaterialUVTransforms
+      // build below for the same arena-slot-indexed pattern.
+      uint32_t maxArenaSlot = 0;
+      if(this->scene.state && this->scene.state->materials)
+      {
+        for(const auto& m : *this->scene.state->materials)
+        {
+          if(!m)
+            continue;
+          maxArenaSlot
+              = std::max(maxArenaSlot, arenaSlotForMaterial(m.get()));
+        }
+      }
+      // Instancer / loader prototype materials are NOT in
+      // scene.state->materials but DO get an arena slot via
+      // m_loaderMaterialSlots (registered above), and their slot is what
+      // arenaSlotForMaterial() — hence PerDrawGPU.material_index — resolves
+      // to for those draws. If such a slot exceeds the scene-material max,
+      // the shader's `scene_materials_ext[material_index]` /
+      // `uv_xforms[material_index]` would read past the bound aux range
+      // (threedim#11). Fold those slots into the extent so the aux buffers
+      // are sized to cover every reachable material_index.
+      for(const auto& [mat, slot] : m_loaderMaterialSlots)
+      {
+        if(slot.valid())
+          maxArenaSlot = std::max(maxArenaSlot, slot.slot_index);
+      }
+      const std::size_t arenaSlotEntries
+          = (std::size_t)maxArenaSlot + 1;
+      const int64_t matsExtBytes
+          = std::max<int64_t>(
+              16,
+              (int64_t)arenaSlotEntries * sizeof(MaterialExtensionsGPU));
+      auto& rhi = *renderer.state.rhi;
+      // Track buffer-pointer churn: when grow reallocates any aux buffer we
+      // MUST republish m_outputSpec.meshes so downstream's SRB rebinds to
+      // the new pointer. Otherwise the sink keeps its old aux.buffer
+      // (released via RenderList::releaseBuffer) and reads undefined memory.
+      // Channel-array reallocation also counts as an aux change for the
+      // purposes of bumping the mesh identity downstream — see the
+      // rebuildChannel call above.
+      bool auxBuffersChanged = channelReallocated;
+      // Returns true on (re)allocation. Same prefix-staleness invariant
+      // as the static growBuf above: callers MUST clear the matching
+      // diffUpload mirror on `true` so the new (uninitialised) buffer
+      // gets the full fresh contents instead of just the appended tail.
+      // Also zero-fills the freshly allocated buffer (Vulkan does NOT
+      // zero VkBuffers on creation — sparse-uploaded SSBOs would
+      // otherwise read garbage from device-memory pages).
+      auto grow = [&](QRhiBuffer*& buf, int64_t& cap, int64_t need, const char* nm) {
+        if(buf && cap >= need) return false;
+        int64_t newCap = cap > 0 ? cap : 16;
+        while(newCap < need) newCap *= 2;
+        if(buf) renderer.releaseBuffer(buf);
+        buf = rhi.newBuffer(QRhiBuffer::Static, QRhiBuffer::StorageBuffer, newCap);
+        buf->setName(nm);
+        buf->create();
+        // Zero-fill via the thread-local zero pool (see RhiClearBuffer.hpp).
+        RhiClearBuffer::clearBuffer(rhi, res, buf, 0, (quint32)newCap);
+        cap = newCap;
+        auxBuffersChanged = true;
+        return true;
+      };
+      // scene_lights now points at the RawLight arena (fixed capacity)
+      // and scene_materials points at the Material arena — no grow here
+      // for either.
+      // Realloc → clear the diffUpload mirror (lines 4740 / 4742) so the
+      // freshly-allocated GPU buffer's prefix isn't left as garbage.
+      // Same prefix-staleness invariant as growBuf — see its comment.
+      if(grow(m_materialsExtBuffer, m_materialsExtCap, matsExtBytes,
+              "ScenePreprocessor::materials_ext"))
+        m_cachedMaterialExt.clear();
+
+      // Per-material UV transforms (KHR_texture_transform). Sized by
+      // arena-slot count (see comment above scene_materials_ext); the
+      // freshMaterialUVTransforms vector built below uses the same
+      // indexing.
+      const int64_t uvXformBytes
+          = std::max<int64_t>(
+              16,
+              (int64_t)arenaSlotEntries * sizeof(MaterialUVTransformGPU));
+      if(grow(m_materialUVTransformsBuffer, m_materialUVTransformsCap, uvXformBytes,
+              "ScenePreprocessor::material_uv_xforms"))
+        m_cachedMaterialUVTransforms.clear();
+      // scene_light_indices: compact uint array of arena slot indices.
+      // Count the lights with valid arena slots (filter out 0xFFFFFFFF
+      // sentinels from producer-less lights).
+      std::vector<uint32_t> freshLightIndices;
+      freshLightIndices.reserve(fs.lightArenaSlots.size());
+      for(uint32_t s : fs.lightArenaSlots)
+        if(s != 0xFFFFFFFFu)
+          freshLightIndices.push_back(s);
+      // 16 KiB floor (= 4096 light index slots) so override CSFs like
+      // pack_lights_from_points / wander_lights_inline / grid_lights_inline
+      // can publish up to 4k procedural lights without OOB-clamping
+      // themselves to the scene-graph-derived size. RawLight arena
+      // (GpuResourceRegistry::Arena::RawLight, currently 4096 slots) is
+      // the matching ceiling — keep the two values consistent: this
+      // floor must equal arena_slot_count * 4 bytes. If you bump one
+      // without the other, either (a) procedural CSFs hit the lower
+      // bound and clamp early, or (b) scene_light_indices references
+      // slot indices past the arena size and rasterizers read garbage.
+      const int64_t lightIdxBytes
+          = std::max<int64_t>(16384, (int64_t)freshLightIndices.size() * 4);
+      if(grow(m_lightIndicesBuffer, m_lightIndicesCap, lightIdxBytes,
+              "ScenePreprocessor::light_indices"))
+        m_cachedLightIndices.clear();
+
+      // Allocate the scene_counts buffer once (16 bytes, never grows).
+      //
+      // Usage: Static + StorageBuffer (SSBO-only).
+      //
+      // Historical context: this buffer used to be allocated as
+      // UniformBuffer | StorageBuffer to satisfy a dual-bind contract —
+      // rasterizers declared `scene_counts` with TYPE: "uniform" (UBO
+      // bind) while override CSFs (pack_lights_from_points etc.)
+      // declared the same name with ACCESS: "read_write" (SSBO bind).
+      // QRhi forbids Dynamic + StorageBuffer, so the buffer had to be
+      // Static. But D3D11 / GLES don't support NonDynamicUniformBuffers
+      // — `Static + UniformBuffer` fails create() silently there, and
+      // the override-CSF write pattern was unreachable on every desktop
+      // backend except Vulkan / Metal / D3D12.
+      //
+      // Resolution: drop the UBO half entirely. All bundled shaders
+      // (presets/rasterizers/*.frag, presets/filters/*.csf,
+      // presets/lighting/*.csf, presets/volumetric/*.csf) declare
+      // `scene_counts` as a storage buffer. Rasterizers (top-level
+      // INPUTS) declare it with `TYPE: "storage", ACCESS: "read_only"`
+      // → parser emits `layout(std430) readonly buffer scene_counts_buf
+      // { ... } scene_counts;`. Filters / lighting / volumetric (nested
+      // AUXILIARY, where SSBO is the default kind) just need
+      // `ACCESS: "read_only"` to get the readonly qualifier on the
+      // emitted block. Override-CSFs that write the buffer keep their
+      // `ACCESS: "read_write"` declaration as-is.
+      //
+      // The shader-side access pattern `scene_counts.light_count` is
+      // identical against UBO or SSBO declarations; std140 vs std430
+      // layouts agree on a 4-uint struct (16 bytes, no padding either
+      // way).
+      //
+      // Advanced users writing their own shaders MAY still declare
+      // `TYPE: "uniform"` for `scene_counts` — the parser supports it
+      // — but they're responsible for ensuring the target backend
+      // supports the resulting non-dynamic UBO bind. Bundled shaders
+      // avoid it so they work on every backend.
+      if(!m_sceneCountsBuffer)
+      {
+        m_sceneCountsBuffer = rhi.newBuffer(
+            QRhiBuffer::Static, QRhiBuffer::StorageBuffer,
+            sizeof(SceneCountsUBO));
+        m_sceneCountsBuffer->setName("ScenePreprocessor::scene_counts");
+        m_sceneCountsBuffer->create();
+        // Zero-fill: Vulkan doesn't initialise VkBuffer memory. Until
+        // the first scene_counts upload (gated below on actual count
+        // changes), shaders reading scene_counts.light_count etc. would
+        // see device-memory garbage — wildly different per resize as the
+        // freshly allocated buffer lands on a different memory page.
+        // SceneCountsUBO is a POD-of-uint32 — the all-zeros pattern
+        // matches its default-constructed state.
+        RhiClearBuffer::clearBuffer(
+            rhi, res, m_sceneCountsBuffer, 0, sizeof(SceneCountsUBO));
+      }
+
+      // Allocate the shadow_cascades UBO once (544 B, never grows). Lazy:
+      // only materialise the buffer when a scene actually authors cascades
+      // — the vast majority of scenes without shadow-receiving rasterizers
+      // pay zero GPU memory for this path.
+      if(!m_shadowCascadesBuffer)
+      {
+        m_shadowCascadesBuffer = rhi.newBuffer(
+            QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer,
+            sizeof(ShadowCascadesUBO));
+        m_shadowCascadesBuffer->setName("ScenePreprocessor::shadow_cascades");
+        m_shadowCascadesBuffer->create();
+        // Zero-fill so a no-shadow-cascade scene reads cascade_count=0
+        // (the shader's "skip shadow sampling" sentinel) instead of
+        // device-memory garbage on the first frame after a fresh
+        // RenderList. RhiClearBuffer auto-routes Dynamic UBOs through
+        // chunked updateDynamicBuffer (cap 65535 B per call); 560 B
+        // here fits in a single chunk.
+        RhiClearBuffer::clearBuffer(
+            rhi, res, m_shadowCascadesBuffer, 0, sizeof(ShadowCascadesUBO));
+      }
+
+      // Camera UBO upload moved to the top of update() so it runs every
+      // frame, decoupled from the mesh-rebuild gate (motion vectors need
+      // per-frame camera_prev refresh; see comment at the head of
+      // update()). The QRhiBuffer pointer is allocated on first call
+      // there, so by the time rebuildMDI runs below, m_camerasBuffer is
+      // non-null and ready to be attached as an aux on the emitted
+      // geometry — same contract as before.
+
+      // Pack the MERGED scene_environment into our own Env arena slot.
+      // merge_scenes composes contributions from every EnvironmentLoader
+      // / CubemapLoader / future IBL-precompute producer field-by-field
+      // via the `params_set` bitmask, so this->scene.state->environment
+      // holds the final composed state. Individual producer Env slots
+      // still get written by those producers (they're POSTing their
+      // own contribution for any future consumer wanting per-producer
+      // data), but the scene_environment binding goes to our slot.
+      if(m_registry && m_envSlot.valid() && this->scene.state)
+      {
+        const auto& env = this->scene.state->environment;
+        EnvParamsUBO gpu{};
+        gpu.ambient[0] = env.ambient_color[0];
+        gpu.ambient[1] = env.ambient_color[1];
+        gpu.ambient[2] = env.ambient_color[2];
+        gpu.ambient[3] = env.ambient_intensity;
+        gpu.fog_color_density[0] = env.fog.color[0];
+        gpu.fog_color_density[1] = env.fog.color[1];
+        gpu.fog_color_density[2] = env.fog.color[2];
+        gpu.fog_color_density[3] = env.fog.density;
+        gpu.fog_range[0] = env.fog.start;
+        gpu.fog_range[1] = env.fog.end;
+        gpu.fog_range[2] = float(env.fog.mode);
+        gpu.fog_range[3] = env.fog.enabled ? 1.f : 0.f;
+        gpu.exposure_gamma[0] = env.exposure;
+        gpu.exposure_gamma[1] = env.gamma;
+        gpu.exposure_gamma[2] = 0.f;
+        gpu.exposure_gamma[3] = 0.f;
+        if(!m_envSlotSeeded
+           || std::memcmp(&gpu, &m_lastEnvUpload, sizeof(EnvParamsUBO)) != 0)
+        {
+          m_registry->updateSlot(res, m_envSlot, &gpu, sizeof(gpu));
+          m_lastEnvUpload = gpu;
+          m_envSlotSeeded = true;
+        }
+      }
+
+      // Upload this preprocessor's private world-transforms buffer.
+      // Per-preprocessor (not a shared registry arena) because two
+      // preprocessors consuming different filtered views of the same
+      // source scene legitimately compute different world matrices
+      // for the same scene_transform — a shared arena would have them
+      // stomp. Layout: indexed by the RawTransform arena slot index
+      // (not walk order). Consumer shaders / compute passes read
+      // `world_transforms.data[slot_index]` for any light / particle /
+      // effect that needs slot-addressable world-space composition.
+      {
+        auto& rhi = *renderer.state.rhi;
+        // Size to the full RawTransform arena capacity — sparse, but
+        // bounded (16384 slots × 64 B = 1 MiB). Slot-indexed lookup
+        // gives O(1) addressing without a per-frame translation table.
+        const uint32_t xform_slot_count
+            = renderer.registry().arenaSlotCount(
+                GpuResourceRegistry::Arena::RawTransform);
+        const int64_t want_bytes
+            = (int64_t)xform_slot_count * (int64_t)sizeof(WorldTransformMat4);
+        if(!m_worldTransformsBuffer || m_worldTransformsCap < want_bytes)
+        {
+          if(m_worldTransformsBuffer)
+            renderer.releaseBuffer(m_worldTransformsBuffer);
+          if(m_worldTransformsPrevBuffer)
+            renderer.releaseBuffer(m_worldTransformsPrevBuffer);
+          // QRhi forbids Dynamic + StorageBuffer — the SSBO path is
+          // host-coherent differently from a Dynamic UBO's per-frame
+          // rotation. Static + uploadStaticBuffer is the correct pair.
+          m_worldTransformsBuffer = rhi.newBuffer(
+              QRhiBuffer::Static, QRhiBuffer::StorageBuffer, (quint32)want_bytes);
+          m_worldTransformsBuffer->setName("ScenePreprocessor::world_transforms");
+          m_worldTransformsBuffer->create();
+          // Prev buffer: same shape as current, sampled alongside it
+          // as the `world_transforms_prev` aux for motion-vector /
+          // TAA / reprojection shaders. Populated each frame by a
+          // single GPU-side copyBuffer in runInitialPasses — see
+          // m_worldTransformsPrevBuffer doc for the deferred-write
+          // ordering that keeps the copy reading frame-N-1 data.
+          m_worldTransformsPrevBuffer = rhi.newBuffer(
+              QRhiBuffer::Static, QRhiBuffer::StorageBuffer, (quint32)want_bytes);
+          m_worldTransformsPrevBuffer->setName(
+              "ScenePreprocessor::world_transforms_prev");
+          m_worldTransformsPrevBuffer->create();
+          // Zero-fill both buffers. world_transforms is sparse —
+          // only slots used by actual scene_transforms get written,
+          // unused arena slots stay at their initial value. After a
+          // fresh RenderList (resize), Vulkan hands us a VkBuffer with
+          // device-memory garbage; any consumer indexing
+          // world_transforms.data[L.transform_slot] for a slot the
+          // producer hasn't populated reads garbage. Lights end up
+          // with non-deterministic world positions per resize → the
+          // user's "wildly different lighting on every resize"
+          // symptom.
+          //
+          // _prev: the runInitialPasses copyBuffer(current → prev) on
+          // the first post-resize frame would otherwise propagate the
+          // current buffer's garbage into prev for any shader sampling
+          // world_transforms_prev.
+          //
+          // RhiClearBuffer's batch variant pulls from the thread-local
+          // zero pool — both 1 MiB clears reuse the same backing
+          // vector (no per-buffer allocation).
+          RhiClearBuffer::clearBuffer(
+              rhi, res, m_worldTransformsBuffer, 0, (quint32)want_bytes);
+          RhiClearBuffer::clearBuffer(
+              rhi, res, m_worldTransformsPrevBuffer, 0, (quint32)want_bytes);
+          m_worldTransformsCap = want_bytes;
+        }
+        // Sparse upload: one small write per scene_transform. Typical
+        // scene has 1-50 transforms, so this is cheaper than packing
+        // into a contiguous staging buffer. The arena-slot offsets
+        // naturally cluster at the low indices (free-list LIFO stack
+        // pops 0, 1, 2, … first) so uploads are cache-friendly.
+        //
+        // The actual uploadStaticBuffer is DEFERRED to runInitialPasses
+        // so the prev-snapshot copyBuffer (which runs ahead of the
+        // submitted writes) reads frame N-1 contents of current. Here
+        // we just stash (slot, matrix) pairs; runInitialPasses drains
+        // the list into the post-snapshot resource batch.
+        m_pendingWorldXformWrites.clear();
+        m_pendingWorldXformWrites.reserve(fs.worldTransforms.size());
+        for(const auto& wt : fs.worldTransforms)
+        {
+          WorldTransformMat4 m;
+          writeMat4(m.m, wt.world);
+          m_pendingWorldXformWrites.emplace_back(wt.transform_slot, m);
+        }
+      }
+
+      // Pack per-draw data once (cheap — just struct copy per draw).
+      // `pd.material_index` is the Material-arena slot index (task 28a)
+      // resolved by arenaSlotForMaterial(); shaders read
+      // `scene_materials.entries[material_index]` directly against the
+      // registry's Material arena. rebuildMDI() uses the same helper
+      // on the full-rebuild path so the encoding is consistent.
+      //
+      // `pd.transform_slot` + `pd.skeleton_offset` + per_draw_bounds are
+      // packed in lockstep with the other fields; fast path stays cheap
+      // (one struct copy + one aabb copy per draw) and keeps the per_draw_bounds
+      // sidecar in sync with per_draws for downstream culling CSFs.
+      std::vector<uint32_t> fastSkinJointOffsets;
+      fastSkinJointOffsets.reserve(fs.skins.size());
+      {
+        uint32_t running = 0;
+        for(const auto& sk : fs.skins)
+        {
+          fastSkinJointOffsets.push_back(running);
+          running += (uint32_t)sk.joint_matrices.size();
+        }
+      }
+
+      std::vector<PerDrawGPU> freshPerDraws;
+      std::vector<PerDrawBoundsGPU> freshPerDrawBounds;
+      freshPerDraws.reserve(fs.draws.size());
+      freshPerDrawBounds.reserve(fs.draws.size());
+      for(const auto& dc : fs.draws)
+      {
+        // Mirror emitDraw's skip predicate exactly (threedim#3): a draw with
+        // no usable positions, or with GPU-backed indices, is dropped by
+        // rebuildMDI and therefore occupies NO per_draws slot. Filtering the
+        // fast-path mirror only by `vertices > 0` would keep such draws and
+        // shift every following slot, so diffUpload would write a draw's
+        // model matrix into its neighbour's GPU slot.
+        if(!dc.mesh || dc.mesh->vertices <= 0 || !m_registry)
+          continue;
+        if(!meshEmitsDraw(*dc.mesh))
+          continue;
+        PerDrawGPU pd{};
+        writeMat4(pd.model, dc.worldTransform);
+        QMatrix4x4 nm = dc.worldTransform.inverted().transposed();
+        nm.setColumn(3, QVector4D(0, 0, 0, 1));
+        nm.setRow(3, QVector4D(0, 0, 0, 1));
+        writeMat4(pd.normal, nm);
+        pd.material_index = arenaSlotForMaterial(dc.material.get());
+        // tag_hash still keyed on the scene-material index (CPU-only
+        // per-pass filter — not shader-visible as material identity).
+        pd.tag_hash
+            = (dc.materialIndex >= 0
+               && (std::size_t)dc.materialIndex < materialTagHashes.size())
+                ? materialTagHashes[dc.materialIndex]
+                : 0u;
+        pd.transform_slot = dc.transform_slot;
+        pd.skeleton_offset
+            = (dc.skinIndex >= 0
+               && (std::size_t)dc.skinIndex < fastSkinJointOffsets.size())
+                  ? fastSkinJointOffsets[dc.skinIndex]
+                  : 0xFFFFFFFFu;
+        freshPerDraws.push_back(pd);
+        freshPerDrawBounds.push_back(packBounds(dc.local_bounds));
+      }
+
+      // Mesh fingerprint: the sequence of DrawCall::stable_id's — the
+      // addresses of the source mesh_primitives (or legacy ossia::geometry
+      // entries) that back each draw. Those addresses are invariant across
+      // frames as long as the mesh_component shared_ptrs and their
+      // primitives vectors don't change; walking the same scene tree twice
+      // thus produces identical fingerprints and we can skip the full
+      // vertex/index rebuild. (Contrast: `dc.mesh` is a fresh
+      // primitiveToGeometry() wrapper pointer that differs every frame.)
+      //
+      // We also mix in the upstream GPU-resident attribute buffer handles
+      // (positions/normals/texcoords/tangents). `m_pendingGpuCopies` holds
+      // raw QRhiBuffer* captured in queueSlabCopy at rebuildMDI time and
+      // re-issued every frame from runInitialPasses; if an upstream node
+      // rebuilds its QRhiBuffer (CSF compute pipeline rebuild, Instancer
+      // prototype swap, GPU mesh-handle pool churn) while the source
+      // mesh_primitive address stays identical, the fast path would skip
+      // rebuildMDI and the queue would re-issue copies from a freed
+      // QRhiBuffer*. Including the upstream buffer pointers here makes any
+      // such swap force a full rebuild → fresh op.src in the queue.
+      std::vector<uint64_t> freshMeshFingerprint;
+      freshMeshFingerprint.reserve(fs.draws.size() * 5);
+      for(const auto& dc : fs.draws)
+      {
+        if(dc.mesh && dc.mesh->vertices > 0 && dc.stable_id)
+        {
+          freshMeshFingerprint.push_back(dc.stable_id);
+          // Mix one entry per attribute: upstream QRhiBuffer* identity (or
+          // 0 when the attribute is CPU-sourced / missing). A swap from
+          // CPU→GPU sourcing or a buffer pointer change → fingerprint
+          // mismatch → rebuildMDI repopulates m_pendingGpuCopies.
+          auto bufId = [&](ossia::attribute_semantic sem) -> uint64_t {
+            const auto v = extractGpuAttribute(*dc.mesh, sem);
+            return reinterpret_cast<uintptr_t>(v.buf);
+          };
+          freshMeshFingerprint.push_back(
+              bufId(ossia::attribute_semantic::position));
+          freshMeshFingerprint.push_back(
+              bufId(ossia::attribute_semantic::normal));
+          freshMeshFingerprint.push_back(
+              bufId(ossia::attribute_semantic::texcoord0));
+          freshMeshFingerprint.push_back(
+              bufId(ossia::attribute_semantic::tangent));
+        }
+      }
+
+      // Cloud fingerprint (threedim#2): rebuildPrimitiveClouds is only
+      // invoked on the full-rebuild branch, so any change to the primitive
+      // cloud set must mismatch this fingerprint to force that branch. We
+      // hash the same fields the function's internal per-bucket fingerprint
+      // and bucket geometry depend on — raw_data identity + content version,
+      // primitive_count, transform_slot, the world matrix (drives
+      // CloudMetaGPU.model + AABBs), and the bucket key derived from
+      // format_id — so added / removed / moved / re-uploaded clouds all flip
+      // it. Count is mixed first so a pure add/remove is always detected.
+      uint64_t freshCloudFingerprint = 0;
+      ossia::hash_combine(
+          freshCloudFingerprint, (uint64_t)fs.primitive_clouds.size());
+      for(const auto& d : fs.primitive_clouds)
+      {
+        if(!d.cloud)
+        {
+          ossia::hash_combine(freshCloudFingerprint, (uint64_t)0);
+          continue;
+        }
+        // Bucket key (mirrors rebuildPrimitiveClouds): hash(format_id), or
+        // the cloud pointer when format_id is empty.
+        const uint64_t bucket_key
+            = !d.cloud->format_id.empty()
+                  ? (uint64_t)(uint32_t)ossia::hash_string(d.cloud->format_id)
+                  : (uint64_t)(uintptr_t)d.cloud.get();
+        ossia::hash_combine(freshCloudFingerprint, bucket_key);
+
+        const auto* raw = d.cloud->raw_data.get();
+        ossia::hash_combine(freshCloudFingerprint, (uint64_t)(uintptr_t)raw);
+        const uint64_t content_id
+            = raw ? (raw->content_hash != 0 ? raw->content_hash
+                                            : (uint64_t)raw->dirty_index)
+                  : 0u;
+        ossia::hash_combine(freshCloudFingerprint, content_id);
+        ossia::hash_combine(
+            freshCloudFingerprint, (uint64_t)d.cloud->primitive_count);
+        ossia::hash_combine(
+            freshCloudFingerprint, (uint64_t)d.transform_slot);
+        ossia::hash_combine(
+            freshCloudFingerprint,
+            ossia::hash_bytes(d.worldTransform.constData(), 64));
+      }
+
+      // Pack per-material UV transforms (KHR_texture_transform) and
+      // material extensions. Both buffers are read by the shader as
+      // `entries[pd.material_index]` where pd.material_index is the
+      // Material ARENA SLOT INDEX (parallel to `scene_materials`,
+      // which IS the registry's Material arena). The buffers therefore
+      // must also be arena-slot-indexed, not fs.materials-indexed —
+      // otherwise a 1-material scene whose loader-material lands at
+      // arena slot 1 reads entries[1] which is OUT OF BOUNDS, returning
+      // zeros, collapsing every UV transform to (0,0) scale → all
+      // textures sample pixel (0,0) → uniform color (the "solid gray
+      // DamagedHelmet" symptom).
+      std::vector<MaterialUVTransformGPU> freshMaterialUVTransforms(
+          arenaSlotEntries);
+      std::vector<MaterialExtensionsGPU> freshMaterialExtensions(
+          arenaSlotEntries);
+      if(this->scene.state && this->scene.state->materials)
+      {
+        const auto& mats = *this->scene.state->materials;
+        auto pack_xform = [](float* dst_offset_scale, float* dst_rot,
+                             const ossia::texture_ref& tr) {
+          dst_offset_scale[0] = tr.uv_transform.offset[0];
+          dst_offset_scale[1] = tr.uv_transform.offset[1];
+          dst_offset_scale[2] = tr.uv_transform.scale[0];
+          dst_offset_scale[3] = tr.uv_transform.scale[1];
+          *dst_rot = tr.uv_transform.rotation;
+        };
+        for(std::size_t i = 0; i < mats.size(); ++i)
+        {
+          if(!mats[i])
+            continue;
+          const uint32_t slot = arenaSlotForMaterial(mats[i].get());
+          if(slot >= arenaSlotEntries)
+            continue;
+          auto& g = freshMaterialUVTransforms[slot];
+          pack_xform(g.bc_offset_scale,     &g.rotations0[0], mats[i]->base_color_texture);
+          pack_xform(g.mr_offset_scale,     &g.rotations0[1], mats[i]->metallic_roughness_texture);
+          pack_xform(g.normal_offset_scale, &g.rotations0[2], mats[i]->normal_texture);
+          pack_xform(g.em_offset_scale,     &g.rotations0[3], mats[i]->emissive_texture);
+          pack_xform(g.occ_offset_scale,    &g.rotations1[0], mats[i]->occlusion_texture);
+
+          // Material extensions are already packed by flattenScene at
+          // fs.material_extensions[i]; copy into the arena-slot index.
+          if(i < fs.material_extensions.size())
+            freshMaterialExtensions[slot] = fs.material_extensions[i];
+        }
+      }
+
+      const bool meshesUnchanged
+          = (freshMeshFingerprint == m_cachedMeshFingerprint)
+            && m_outputSpec.meshes
+            // If any aux buffer was just reallocated we need to republish
+            // the output geometry so downstream picks up the new pointers.
+            // rebuildMDI does this cleanly by building a fresh geometry
+            // with wrapGpu() wrappers over the current buffer pointers.
+            && !auxBuffersChanged
+            // Cloud set unchanged (threedim#2): rebuildPrimitiveClouds only
+            // runs on the full-rebuild branch and re-appends its bucket
+            // geometries onto the freshly rebuilt mesh list, so any cloud
+            // add / remove / move / re-upload must drop us off the fast path.
+            && (freshCloudFingerprint == m_cachedCloudFingerprint)
+            // The fast path's freshPerDraws / freshMeshFingerprint cover
+            // fs.draws ONLY. fs.instances cmds (their world transforms,
+            // instance counts, prototype identities, per-instance
+            // GPU-buffer copies) are processed exclusively inside
+            // rebuildMDI(); skipping it means Instancer control changes
+            // and per-particle-data updates from upstream CSF compute
+            // pipelines never reach the GPU. Force the full rebuild
+            // path whenever any instance group is present.
+            && fs.instances.empty();
+
+      if(meshesUnchanged)
+      {
+        // Fast path: only diff-upload the small scene-level SSBOs. The
+        // big vertex/index/indirect buffers are left alone, and
+        // m_outputSpec.meshes is kept as the same shared_ptr (so
+        // NodeRenderer::process on the downstream side sees
+        // `this->geometry == v` and doesn't even flag geometryChanged).
+        // scene_lights is the RawLight arena; producers keep it fresh
+        // in their own update() hooks. Only the compact indices list
+        // needs a diff upload.
+        diffUpload(res, m_lightIndicesBuffer, m_cachedLightIndices,
+                   freshLightIndices);
+        // scene_materials: producer + loader-material upload pass
+        // above already pushed MaterialGPU bytes into the Material
+        // arena. Nothing to diff-upload here.
+        diffUpload(res, m_materialsExtBuffer, m_cachedMaterialExt,
+                   freshMaterialExtensions);
+        diffUpload(res, m_materialUVTransformsBuffer,
+                   m_cachedMaterialUVTransforms, freshMaterialUVTransforms);
+        diffUpload(res, m_mdi.per_draws,   m_cachedPerDraws,  freshPerDraws);
+        // per_draw_bounds is static across a frame (local-space AABB,
+        // never changes per-frame for the same topology) — on the fast
+        // path the mirror and fresh arrays match element-for-element and
+        // diffUpload short-circuits to zero uploads. Kept in the fast
+        // path for robustness (e.g. a material-swap flow that re-picks
+        // a primitive variant with different bounds under the hood).
+        diffUpload(res, m_mdi.per_draw_bounds, m_cachedPerDrawBounds,
+                   freshPerDrawBounds);
+      }
+      else
+      {
+        // Something structural changed (meshes added/removed/reordered).
+        // Fall back to the full rebuild path. scene_lights arena bytes
+        // are maintained by each Light producer's update() hook — we
+        // only push the compacted indices list here.
+        if(!freshLightIndices.empty())
+          res.uploadStaticBuffer(
+              m_lightIndicesBuffer, 0,
+              freshLightIndices.size() * sizeof(uint32_t),
+              freshLightIndices.data());
+        // scene_materials: arena upload already happened above (see
+        // the "loader-material arena slot upload" block).
+        if(!freshMaterialExtensions.empty())
+          res.uploadStaticBuffer(
+              m_materialsExtBuffer, 0,
+              freshMaterialExtensions.size() * sizeof(MaterialExtensionsGPU),
+              freshMaterialExtensions.data());
+        if(!freshMaterialUVTransforms.empty())
+          res.uploadStaticBuffer(
+              m_materialUVTransformsBuffer, 0,
+              freshMaterialUVTransforms.size() * sizeof(MaterialUVTransformGPU),
+              freshMaterialUVTransforms.data());
+
+        rebuildMDI(renderer, res, fs, materialTagHashes);
+        rebuildPrimitiveClouds(renderer, res, fs);
+
+        // Seed the CPU mirrors from the fresh data so subsequent frames
+        // can take the fast path via diffUpload.
+        m_cachedMeshFingerprint = std::move(freshMeshFingerprint);
+        m_cachedCloudFingerprint = freshCloudFingerprint;
+        m_cachedLightIndices = std::move(freshLightIndices);
+        m_cachedMaterialExt = std::move(freshMaterialExtensions);
+        m_cachedMaterialUVTransforms = std::move(freshMaterialUVTransforms);
+        // m_cachedPerDraws / m_cachedPerDrawBounds are NOT seeded here:
+        // rebuildMDI() already assigned them from acc.perDraws (the
+        // actually-emitted set, after emitDraw's skip predicate), so the
+        // mirror matches the GPU per_draws layout slot-for-slot. Seeding
+        // from freshPerDraws (filtered only by vertices>0) would reintroduce
+        // the threedim#3 divergence whenever a draw was skipped.
+      }
+
+      // Camera + Env UBOs are packed above, before rebuildMDI, so that the
+      // geometry's auxiliary entries reference valid buffer pointers. The
+      // pre-sized capacity keeps those pointers stable across parameter
+      // changes on the fast path (no re-rebuild needed).
+
+      // scene_counts SSBO: tell shaders the authoritative N for each
+      // SSBO (so they don't rely on `.length()` which reports buffer
+      // capacity and includes zeroed tail slots when counts shrank).
+      // Uploaded only when a count actually changed.
+      // light_count is the arena-addressable subset (matches
+      // m_cachedLightIndices / scene_light_indices). Post 28b-shader
+      // flip: shaders iterate via the indices buffer, so this count
+      // drives that loop.
+      SceneCountsUBO sc{
+          (uint32_t)m_cachedLightIndices.size(),
+          (uint32_t)fs.materials.size(),
+          (uint32_t)m_mdi.drawCount,
+          0u};
+      if(std::memcmp(&sc, &m_cachedSceneCounts, sizeof(sc)) != 0)
+      {
+        // Allocation is Static + StorageBuffer on every backend, so the
+        // upload always goes through uploadStaticBuffer — at 16 bytes
+        // the difference vs updateDynamicBuffer is negligible anyway.
+        res.uploadStaticBuffer(m_sceneCountsBuffer, 0, sizeof(sc), &sc);
+        m_cachedSceneCounts = sc;
+      }
+
+      // shadow_cascades UBO: populated from scene_state.shadow_cascades
+      // (authored upstream by Threedim::ShadowCascadeSetup). Straight
+      // struct copy — the CPU-side shadow_cascades_info layout mirrors
+      // the GPU ShadowCascadesUBO field-for-field: light_view_proj[8]
+      // (column-major mat4 array), split_view_depths[9] compacted into
+      // cascade_split_distances[8], cascade_count (uint32). Diff-uploaded
+      // against the cached snapshot so frames without topology / camera
+      // changes cost zero UBO bytes.
+      //
+      // When no upstream authored cascades (the field defaults to
+      // cascade_count=0), we still publish the UBO with zero count so
+      // downstream shaders that declare `shadow_cascades` as INPUT have
+      // a valid binding and fall through their own "cascade_count == 0
+      // → skip shadow sampling" guard.
+      ShadowCascadesUBO sh{};
+      if(this->scene.state)
+      {
+        const auto& src = this->scene.state->shadow_cascades;
+        sh.cascade_count
+            = std::min<uint32_t>(src.cascade_count,
+                                 ossia::shadow_cascades_info::max_cascades);
+        std::memcpy(
+            sh.light_view_proj, src.light_view_proj,
+            sizeof(sh.light_view_proj));
+        // Shaders sample cascade_split_distances[k] for cascade picks;
+        // slot k is the far-plane Z of cascade k (view-space).
+        // CPU-side stores count+1 entries in split_view_depths[]; copy
+        // up to max_cascades slots so UBO and source stay symmetric.
+        // For k >= count we emit 0 — the shader's pickCascade() clamps
+        // against cascade_count first, so trailing zeros are never read.
+        const uint32_t kLayoutSlots = ossia::shadow_cascades_info::max_cascades; // 8
+        for(uint32_t k = 0; k < kLayoutSlots; ++k)
+        {
+          // split_view_depths[] has (count+1) entries; slot k is the far
+          // plane of cascade k. Guard with <= cascade_count (not <) so
+          // the sentinel entry at index cascade_count is also copied.
+          sh.cascade_split_distances[k]
+              = (k <= sh.cascade_count)
+                    ? src.split_view_depths[k]
+                    : 0.f;
+        }
+      }
+      if(!m_shadowCascadesSeeded
+         || std::memcmp(&sh, &m_cachedShadowCascades,
+                        sizeof(ShadowCascadesUBO)) != 0)
+      {
+        res.updateDynamicBuffer(
+            m_shadowCascadesBuffer, 0, sizeof(sh), &sh);
+        m_cachedShadowCascades = sh;
+        m_shadowCascadesSeeded = true;
+      }
+
+      // Instance components are now handled directly inside rebuildMDI
+      // (above) — every fs.instances entry rides through the same
+      // unified indirect-cmd batch as fs.draws. No separate sub-mesh
+      // emission step is needed.
+    }
+
+    m_cachedSceneState = this->scene.state.get();
+    m_cachedVersion = this->scene.state ? this->scene.state->version : -1;
+    this->sceneChanged = false;
+
+    // Skybox + texture-channel changes propagate through the geometry's
+    // auxiliary_texture entries on Geometry Out — consumer shaders
+    // re-resolve pointers per frame via try_bind_texture_from_geometry.
+    // Phase 4 also bumps mesh identity on channel-array realloc so
+    // downstream's update() reruns without missing a rebind.
+  }
+
+  // Resolve an MDI attribute enum to the matching arena stream buffer
+  // (Plan 09 S4 — streams moved from MDIState to the registry).
+  QRhiBuffer* mdiBufferFor(MdiAttr a) const noexcept
+  {
+    if(!m_registry)
+      return nullptr;
+    using Stream = GpuResourceRegistry::MeshStream;
+    switch(a)
+    {
+      case MdiAttr::Positions: return m_registry->meshStreamBuffer(Stream::Positions);
+      case MdiAttr::Normals:   return m_registry->meshStreamBuffer(Stream::Normals);
+      case MdiAttr::Texcoords: return m_registry->meshStreamBuffer(Stream::Texcoords);
+      case MdiAttr::Tangents:  return m_registry->meshStreamBuffer(Stream::Tangents);
+    }
+    return nullptr;
+  }
+
+  // Issue every pending GPU→GPU copy queued during update(). Called every
+  // frame in runInitialPasses regardless of whether update() rebuilt the
+  // accumulator — upstream GPU buffer CONTENTS change every frame (CSF
+  // compute writes) while the buffer HANDLES + MDI offsets stay stable as
+  // long as no draw-topology change occurred. The queue is rebuilt (via
+  // clear + repopulate at the top of the accumulator loop) only when the
+  // scene actually changed; otherwise the same ops fire with fresh data.
+  //
+  // Stride-equal-to-element copies collapse to a single copyBuffer;
+  // vec4→vec3-style strided copies fall back to a per-vertex loop (one
+  // copyBuffer per vertex — acceptable for typical CSF point clouds of
+  // a few thousand vertices).
+  void issuePendingGpuCopies(RenderList& renderer, QRhiCommandBuffer& cb)
+  {
+    if(m_pendingGpuCopies.empty())
+      return;
+    auto* rhi = renderer.state.rhi;
+    if(!rhi)
+      return;
+    cb.beginExternal();
+    // One compute→transfer barrier for the whole batch instead of one per
+    // copy call — eliminates N−1 redundant pipeline stalls on Vulkan.
+    score::gfx::beginBufferCopyBarrier(*rhi, cb);
+    // Scratch reused across ops — avoids reallocating for each strided op.
+    std::vector<score::gfx::BufferCopyRegion> regions;
+    for(const auto& op : m_pendingGpuCopies)
+    {
+      // Explicit dst wins over the mesh-stream lookup — used by the
+      // unified-MDI per-instance concat copies (translations / colors)
+      // which target preprocessor-owned buffers, not arena streams.
+      QRhiBuffer* dst = op.dst ? op.dst : mdiBufferFor(op.attr);
+      if(!op.src || !dst)
+        continue;
+      if(op.src_stride == 0 || op.src_stride == op.element_size)
+      {
+        // Tight source layout — one copy, no per-call barrier (batched).
+        score::gfx::copyBuffer(
+            *rhi, cb, op.src, dst,
+            op.vertex_count * op.element_size,
+            op.src_offset, op.dst_offset,
+            score::gfx::BufferCopyBarrier::None);
+      }
+      else
+      {
+        // Strided source — src slot size differs from MDI slot size.
+        // Per-vertex copy of min(src_stride, element_size) bytes: the
+        // overlap between the two layouts (e.g. tight vec3 src (12 B) →
+        // padded-vec4 MDI slot (16 B) → copy the 12 B of real data into
+        // each slot's low bytes; zero-fill from uploadStaticBuffer covers
+        // the trailing padding).
+        const int per_vertex
+            = std::min(op.src_stride, op.element_size);
+        regions.clear();
+        regions.reserve(op.vertex_count);
+        for(int v = 0; v < op.vertex_count; ++v)
+        {
+          regions.push_back(
+              {op.src_offset + v * op.src_stride,
+               op.dst_offset + v * op.element_size,
+               per_vertex});
+        }
+        score::gfx::copyBufferRegions(
+            *rhi, cb, op.src, dst, regions.data(), (int)regions.size(),
+            score::gfx::BufferCopyBarrier::None);
+      }
+    }
+    score::gfx::endBufferCopyBarrier(*rhi, cb);
+    cb.endExternal();
+    // Intentionally NOT clearing m_pendingGpuCopies here — the list is
+    // owned by the accumulator and persists across cache-hit frames so
+    // updates to upstream buffer contents keep flowing through.
+  }
+
+  // Push the produced geometry_spec to the downstream renderer's input port.
+  void runInitialPasses(
+      RenderList& renderer, QRhiCommandBuffer& commands,
+      QRhiResourceUpdateBatch*& res, Edge& edge) override
+  {
+    // Plan 09 S6: debug marker for capture-tool readability.
+    commands.debugMarkBegin(QByteArrayLiteral("ScenePreprocessor"));
+    struct MarkEnd
+    {
+      QRhiCommandBuffer* c;
+      ~MarkEnd() { c->debugMarkEnd(); }
+    } _me{&commands};
+
+    // GPU→GPU copies run before the geometry_spec hand-off so the
+    // destination MDI buffers are populated by the time the downstream
+    // rasterizer starts reading them. Frame-gated (threedim#13) — the
+    // copies target shared MDI buffers, so one batch per frame serves every
+    // consumer; without the gate a node feeding K downstreams issues K
+    // identical copy batches. Same renderer.frame token discipline as the
+    // world-transforms snapshot below.
+    if(m_lastGpuCopiesFrame != renderer.frame)
+    {
+      issuePendingGpuCopies(renderer, commands);
+      m_lastGpuCopiesFrame = renderer.frame;
+    }
+
+    // Snapshot last frame's world_transforms into the prev buffer via
+    // a pure GPU copy, then apply this frame's per-slot writes via the
+    // (post-snapshot) resource-update batch. The ordering invariant is:
+    //
+    //   commands stream      : ... [updateBatch_N applied] [copyBuffer current→prev] ...
+    //   res (next batch)     :                                                  [uploadStaticBuffer per slot]
+    //   RenderList submits   :                                                  ^ next iteration
+    //
+    // So the copy reads m_worldTransformsBuffer at its frame-N-1
+    // contents (no frame-N writes have hit it yet — those are queued
+    // in `*res`, applied AFTER this function returns), and the next
+    // beginPass sees current = frame N + prev = frame N-1.
+    //
+    // Gate on renderer.frame because runInitialPasses fires once per
+    // outgoing edge: without the guard a node feeding K downstreams
+    // would queue K back-to-back current→prev copies (the second-and-
+    // later seeing prev = current = frame N) and would re-upload the
+    // pending writes K times. Within one frame renderer.frame is
+    // stable; across frames it advances monotonically, so the
+    // mismatch correctly discriminates "first call this frame".
+    //
+    // Fire EVERY frame (not gated on pending non-empty): for a static
+    // scene the per-frame copy is what KEEPS prev == current, so
+    // motion vectors stay zero. A previous attempt to skip when pending
+    // was empty froze prev at the value from the last animated frame
+    // and produced ghost motion on idle scenes.
+    //
+    // The previous CB-pointer discriminator was broken: every QRhi
+    // backend's QRhiSwapChain::currentFrameCommandBuffer returns the
+    // address of a single by-value cbWrapper member, so the pointer is
+    // constant across frames and the gate fired exactly once per
+    // swapchain lifetime — freezing world_transforms / _prev at frame
+    // 0 (motion vectors / TAA / reprojection silently broken).
+    //
+    // Frame 0 sees prev=zeroes → first-frame MV is large; consumer
+    // shaders handle that via frame-index / temporal accumulation.
+    // Auto barrier covers the compute↔transfer hazards around the copy.
+    if(m_worldTransformsBuffer && m_worldTransformsPrevBuffer
+       && m_worldTransformsCap > 0
+       && m_lastSnapshotFrame != renderer.frame)
+    {
+      commands.beginExternal();
+      copyBuffer(
+          *renderer.state.rhi, commands,
+          m_worldTransformsBuffer, m_worldTransformsPrevBuffer,
+          (int)m_worldTransformsCap);
+      commands.endExternal();
+
+      // Drain deferred per-slot writes into the next resource batch
+      // (`res` — distinct from the batch already submitted in
+      // RenderList::renderInternal before this function ran). The
+      // batch is submitted later, AFTER the copy above has executed.
+      if(res && !m_pendingWorldXformWrites.empty())
+      {
+        for(const auto& [slot, m] : m_pendingWorldXformWrites)
+        {
+          const uint32_t byte_offset
+              = slot * (uint32_t)sizeof(WorldTransformMat4);
+          res->uploadStaticBuffer(
+              m_worldTransformsBuffer, byte_offset,
+              (quint32)sizeof(WorldTransformMat4), &m);
+        }
+        m_pendingWorldXformWrites.clear();
+      }
+
+      m_lastSnapshotFrame = renderer.frame;
+    }
+
+    auto* src = edge.source;
+    const int src_port_idx = src && src->node
+        ? int(std::find(src->node->output.begin(), src->node->output.end(), src)
+              - src->node->output.begin())
+        : -1;
+
+    // Only the Geometry output (port 0) pushes a geometry_spec — it's
+    // the sole remaining output. Guard kept for robustness in case the
+    // port layout is extended again.
+    if(src_port_idx != 0)
+      return;
+    if(!m_outputSpec.meshes)
+      return;
+
+    auto* sink = edge.sink;
+    if(!sink || !sink->node)
+      return;
+
+    auto rn_it = sink->node->renderedNodes.find(&renderer);
+    if(rn_it == sink->node->renderedNodes.end())
+      return;
+
+    auto it = std::find(sink->node->input.begin(), sink->node->input.end(), sink);
+    if(it == sink->node->input.end())
+      return;
+
+    int port_idx = (int)(it - sink->node->input.begin());
+    BUFTRACE() << "ScenePreprocessor → sink_node=" << sink->node->nodeId
+               << " port=" << port_idx
+               << " mdi_indices="
+               << (void*)(m_registry ? m_registry->meshStreamBuffer(
+                       GpuResourceRegistry::MeshStream::Indices) : nullptr)
+               << " mdi_positions="
+               << (void*)(m_registry ? m_registry->meshStreamBuffer(
+                       GpuResourceRegistry::MeshStream::Positions) : nullptr)
+               << " mdi_drawCmds=" << (void*)m_mdi.indirect_draw_cmds
+               << " mdi_drawCount=" << (quint32)m_mdi.drawCount;
+    rn_it->second->process(port_idx, m_outputSpec, edge.source);
+  }
+
+  void runRenderPass(RenderList&, QRhiCommandBuffer&, Edge&) override { }
+
+  // Data-only renderer — no per-edge GPU pass state to release. All GPU
+  // resources live on the renderer itself (buffers, textures) and are
+  // dropped in releaseState; nothing is keyed by output edge.
+  void removeOutputPass(RenderList&, Edge&) override { }
+};
+
+ScenePreprocessorNode::ScenePreprocessorNode()
+{
+  // Port 0: Scene input (carries scene_spec — carries EVERYTHING,
+  // including the environment and its skybox/IBL textures).
+  input.push_back(new Port{this, {}, Types::Scene, {}});
+
+  // Single outlet: geometry (concatenated MDI geometry). Scene-wide
+  // UBOs/SSBOs (per_draws, indirect_draw_cmds, scene_lights,
+  // scene_materials, scene_counts, camera, env) ride along as
+  // auxiliary_buffer entries; per-channel material texture arrays
+  // (base_color_array, metal_rough_array, normal_array, emissive_array)
+  // and the environment skybox ride along as auxiliary_texture entries.
+  // Consumer shaders bind them all by name via
+  // try_bind_from_geometry / try_bind_texture_from_geometry.
+  output.push_back(new Port{this, {}, Types::Geometry, {}});
+}
+
+ScenePreprocessorNode::~ScenePreprocessorNode() = default;
+
+NodeRenderer* ScenePreprocessorNode::createRenderer(RenderList& /*r*/) const noexcept
+{
+  return new RenderedScenePreprocessorNode{*this};
+}
+
+}
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/ScenePreprocessorNode.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/ScenePreprocessorNode.hpp
new file mode 100644
index 0000000000..c8cdfc5388
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/ScenePreprocessorNode.hpp
@@ -0,0 +1,54 @@
+#pragma once
+#include <Gfx/Graph/Node.hpp>
+
+namespace score::gfx
+{
+
+/**
+ * @brief Bridge from `scene_spec` (hierarchical, CPU) to `geometry_spec`
+ *        (flat, GPU-resident).
+ *
+ * Receives a `scene_spec` on its input port, walks the hierarchy, and emits
+ * a `geometry_spec` on its output port containing one geometry per scene
+ * mesh primitive. Each output geometry carries a set of well-known
+ * auxiliary buffers:
+ *
+ *   - `scene_lights`           : LightGPU[]     (per scene light_component)
+ *   - `scene_materials`        : MaterialGPU[]  (per scene material)
+ *   - `scene_materials_ext`    : MaterialExtGPU[] (extended material data)
+ *   - `per_draws`              : PerDrawGPU[]   (one per draw: model/normal mat,
+ *                                                material/transform/skeleton slots)
+ *   - `indirect_draw_cmds`     : IndirectCmd[]  (MDI command buffer; one per draw)
+ *   - `scene_counts`           : SceneCountsUBO (draw/light/material counts)
+ *   - `camera`                 : CameraUBO      (current-frame camera matrices)
+ *   - `camera_prev`            : CameraUBO      (previous-frame camera matrices)
+ *   - `env`                    : EnvUBO         (environment/fog parameters)
+ *   - `world_transforms`       : mat4[]         (current frame, slot-indexed)
+ *   - `world_transforms_prev`  : mat4[]         (previous frame, for TAA/motion)
+ *   - `scene_light_indices`    : uint[]         (light culling index list)
+ *
+ * Conditionally emitted (when present in the scene):
+ *   - `scene_material_uv_xforms` : mat3[]       (per-material UV transforms)
+ *   - `per_draw_bounds`          : AABB[]        (per-draw world-space bounds)
+ *   - `shadow_cascades`          : CascadeUBO[]  (shadow cascade matrices)
+ *
+ * Per-draw indexing in shaders uses the MDI `firstInstance` / `gl_DrawID`
+ * mechanism. Shaders read `per_draws[gl_DrawID]` to recover model/normal
+ * matrices and slot indices into the shared tables.
+ *
+ * Inputs:
+ *   - Port 0: Scene (Types::Scene)
+ *
+ * Outputs:
+ *   - Port 0: Geometry (Types::Geometry) — flattened scene
+ */
+class SCORE_PLUGIN_GFX_EXPORT ScenePreprocessorNode : public ProcessNode
+{
+public:
+  ScenePreprocessorNode();
+  ~ScenePreprocessorNode() override;
+
+  score::gfx::NodeRenderer* createRenderer(RenderList& r) const noexcept override;
+};
+
+}
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/ScreenNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/ScreenNode.cpp
index 8683367fbc..43b28cd5ea 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/ScreenNode.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/ScreenNode.cpp
@@ -7,6 +7,7 @@
 
 #include <score/application/GUIApplicationContext.hpp>
 #include <score/gfx/OpenGL.hpp>
+#include <score/tools/Debug.hpp>
 
 #include <QtGui/private/qrhinull_p.h>
 
@@ -42,12 +43,95 @@
 #include <QtGui/private/qrhimetal_p.h>
 #endif
 
+#include <QCryptographicHash>
+#include <QDir>
+#include <QFile>
 #include <QOffscreenSurface>
 #include <QScreen>
+#include <QStandardPaths>
+#include <QThreadPool>
 #include <QWindow>
 
+#include <utility>
+
 namespace score::gfx
 {
+namespace
+{
+// Persistent pipeline cache. Saved on QRhi destruction, loaded right after
+// QRhi creation. Keyed per backend so different APIs don't overwrite each
+// other's cache. Gated on QRhi::Feature::PipelineCacheDataLoadSave.
+static QString pipelineCacheFilePath(GraphicsApi api)
+{
+  QString root = QStandardPaths::writableLocation(QStandardPaths::CacheLocation);
+  if(root.isEmpty())
+    root = QDir::tempPath();
+  QDir().mkpath(root + QStringLiteral("/ossia-score/pipeline-cache"));
+  const char* apiName = "unknown";
+  switch(api)
+  {
+    case Null:   apiName = "null"; break;
+    case OpenGL: apiName = "gl"; break;
+    case Vulkan: apiName = "vk"; break;
+    case D3D11:  apiName = "d3d11"; break;
+    case D3D12:  apiName = "d3d12"; break;
+    case Metal:  apiName = "metal"; break;
+  }
+  return QStringLiteral("%1/ossia-score/pipeline-cache/%2.bin")
+      .arg(root)
+      .arg(QString::fromLatin1(apiName));
+}
+
+static void tryLoadPipelineCache(QRhi* rhi, GraphicsApi api)
+{
+  if(!rhi || !rhi->isFeatureSupported(QRhi::PipelineCacheDataLoadSave))
+    return;
+  QFile f(pipelineCacheFilePath(api));
+  if(!f.open(QIODevice::ReadOnly))
+    return;
+  rhi->setPipelineCacheData(f.readAll());
+}
+
+// Pure disk I/O — no QRhi access, so it is safe to run off the render thread.
+static void writePipelineCacheToDisk(QByteArray data, GraphicsApi api)
+{
+  if(data.isEmpty())
+    return;
+  QFile f(pipelineCacheFilePath(api));
+  if(!f.open(QIODevice::WriteOnly | QIODevice::Truncate))
+    return;
+  f.write(data);
+}
+
+// Synchronous store: grabs the cache bytes from the QRhi (must be on the
+// render thread) and writes them inline. Used on shutdown (preRhiDestroy)
+// where the QRhi is about to be destroyed and we must finish before it goes.
+static void tryStorePipelineCache(QRhi* rhi, GraphicsApi api)
+{
+  if(!rhi || !rhi->isFeatureSupported(QRhi::PipelineCacheDataLoadSave))
+    return;
+  writePipelineCacheToDisk(rhi->pipelineCacheData(), api);
+}
+
+// Mid-session store: grabs the cache bytes on the render thread (QRhi access),
+// then offloads the blocking file write to a worker thread so the render
+// thread doesn't stall on disk I/O right after a PSO-compile burst. The
+// QByteArray is copied into the task (implicitly shared, cheap) and outlives
+// the QRhi-independent write.
+static void tryStorePipelineCacheAsync(QRhi* rhi, GraphicsApi api)
+{
+  if(!rhi || !rhi->isFeatureSupported(QRhi::PipelineCacheDataLoadSave))
+    return;
+  QByteArray data = rhi->pipelineCacheData();
+  if(data.isEmpty())
+    return;
+  QThreadPool::globalInstance()->start(
+      [data = std::move(data), api]() mutable {
+    writePipelineCacheToDisk(std::move(data), api);
+  });
+}
+}
+
 std::shared_ptr<RenderState>
 createRenderState(GraphicsApi graphicsApi, QSize sz, QWindow* window)
 {
@@ -58,14 +142,29 @@ createRenderState(GraphicsApi graphicsApi, QSize sz, QWindow* window)
   const auto& settings = score::AppContext().settings<Gfx::Settings::Model>();
   state.samples = settings.resolveSamples(graphicsApi);
 
-  auto populateCaps = [](RenderState& s) {
-#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0)
+  auto populateCaps = [graphicsApi](RenderState& s) {
+    // Load persisted pipeline cache (if any) and set up a save-on-destroy
+    // hook that writes it back before QRhi is deleted.
     if(s.rhi)
     {
-      s.caps.drawIndirect = s.rhi->isFeatureSupported(QRhi::DrawIndirect);
-      s.caps.drawIndirectMulti = s.rhi->isFeatureSupported(QRhi::DrawIndirectMulti);
+      tryLoadPipelineCache(s.rhi, graphicsApi);
+      QRhi* rhiPtr = s.rhi;
+      s.preRhiDestroy = [rhiPtr, graphicsApi]() {
+        tryStorePipelineCache(rhiPtr, graphicsApi);
+      };
+      // Plan 09 S6: mid-session flush for crash-resilient cache
+      // persistence. RenderList::render throttles this after PSO
+      // stalls; the QRhi read happens here on the render thread but the
+      // blocking file write is offloaded to a worker so the render
+      // thread isn't stalled on disk right after a PSO-compile burst.
+      s.savePipelineCache = [rhiPtr, graphicsApi]() {
+        tryStorePipelineCacheAsync(rhiPtr, graphicsApi);
+      };
+    }
+    if(s.rhi)
+    {
+      s.caps.populate(*s.rhi);
     }
-#endif
     // Clamp the requested sample count against what the hardware actually
     // supports. Without this, asking for e.g. 16x MSAA on a card that only
     // does 8x silently mismatches between the value stored in
@@ -109,6 +208,17 @@ createRenderState(GraphicsApi graphicsApi, QSize sz, QWindow* window)
 #ifndef NDEBUG
   flags |= QRhi::EnableDebugMarkers;
 #endif
+  // Let the RHI save per-backend pipeline binary cache so subsequent runs
+  // skip the initial pipeline compilation cost (big win for Vulkan/D3D12).
+  flags |= QRhi::EnablePipelineCacheDataSave;
+
+  // Enable per-command-buffer GPU timestamps. Required for the per-pass
+  // GPU timing panel (Plan 09 S6) — without this flag,
+  // QRhiCommandBuffer::lastCompletedGpuTime() returns 0 on Vulkan/D3D12/Metal.
+  // Negligible overhead when no timer instance is active.
+#if QT_VERSION >= QT_VERSION_CHECK(6, 6, 0)
+  flags |= QRhi::EnableTimestamps;
+#endif
 
 #ifndef QT_NO_OPENGL
   if(graphicsApi == OpenGL)
@@ -289,13 +399,18 @@ ScreenNode::~ScreenNode()
 {
   if(m_swapChain)
   {
-    m_swapChain->deleteLater();
-
+    // Order matters: clear the alias + flag on the Window BEFORE releasing
+    // the QRhiSwapChain. A queued QExposeEvent landing between the deferred
+    // delete and the nullings would otherwise observe the inconsistent
+    // state (m_hasSwapChain == true && m_swapChain still aliasing freed
+    // memory). See diagnostic 047.
     if(m_window)
     {
-      m_window->m_swapChain = nullptr;
       m_window->m_hasSwapChain = false;
+      m_window->m_swapChain = nullptr;
     }
+
+    m_swapChain->deleteLater();
   }
 
   if(m_window && m_window->state)
@@ -360,8 +475,8 @@ void ScreenNode::onRendererChange()
         return;
       }
     }
+    m_window->m_canRender = false;
   }
-  m_window->m_canRender = false;
 }
 
 void ScreenNode::stopRendering()
@@ -380,7 +495,13 @@ void ScreenNode::stopRendering()
 
 void ScreenNode::setRenderer(std::shared_ptr<RenderList> r)
 {
-  m_window->state->renderer = r;
+  // m_window can be null after destroyOutput() (which calls m_window.reset()).
+  // Reachable from Graph::createOutputRenderList paths after a graphics-API
+  // switch / sample-count change / output-disable cycle. Sibling guards
+  // already exist in stopRendering and onRendererChange below; this one
+  // was missed when those were patched.
+  if(m_window && m_window->state)
+    m_window->state->renderer = r;
 }
 
 RenderList* ScreenNode::renderer() const
@@ -425,12 +546,28 @@ void ScreenNode::setConfiguration(Configuration conf)
 
 void ScreenNode::setSwapchainFlag(Gfx::SwapchainFlag flag)
 {
+  if(m_swapchainFlag == flag)
+    return;
   m_swapchainFlag = flag;
+  // Live flag change (sRGB toggle) requires the swapchain to be recreated
+  // with the new flag bits — setFlags happens in createOutput at line ~667.
+  // destroyOutput tears down; Graph::createOutputRenderList rebuilds on
+  // next reconcile (same pattern updateGraphicsAPI uses for sample-count).
+  if(m_window)
+    destroyOutput();
 }
 
 void ScreenNode::setSwapchainFormat(Gfx::SwapchainFormat format)
 {
+  if(m_swapchainFormat == format)
+    return;
   m_swapchainFormat = format;
+  // Same rebuild rationale as setSwapchainFlag above. setFormat happens at
+  // line ~650 inside createOutput; without the rebuild the field stayed
+  // updated but the live swapchain kept its prior format (HDR↔SDR toggle
+  // was silently inert).
+  if(m_window)
+    destroyOutput();
 }
 
 void ScreenNode::setSize(QSize sz)
@@ -628,6 +765,35 @@ void ScreenNode::destroyOutput()
   if(!m_window)
     return;
 
+  // Drain the GPU before tearing anything down. Without this, queued frames
+  // can still reference the swapchain / RPD / depth-stencil while we're
+  // freeing them — and worse, when setSwapchainFormat / setSwapchainFlag
+  // call destroyOutput synchronously (commit e2afe7874), the host window's
+  // last beginFrame may still hold an unfinished cbWrapper referenced by
+  // ScenePreprocessor's per-frame copyBuffer (commit fe146c8de). The next
+  // runInitialPasses then records vkCmdCopyBuffer / vkCmdPipelineBarrier
+  // into a CB whose underlying VkCommandBuffer was already vkEndCommandBuffer'd
+  // (VUID-vkCmdCopyBuffer-commandBuffer-recording / VUID-vkCmdPipelineBarrier-
+  // commandBuffer-recording), often followed by a device loss.
+  //
+  // MultiWindowNode::destroyOutput already does this at line ~1068; mirror it.
+  if(m_window->state && m_window->state->rhi)
+  {
+    // Pre-condition: destroyOutput must not be called inside a frame
+    // (between beginFrame and endFrame). If this fires, some upstream
+    // path triggered a teardown mid-render — the cascade would be
+    // worse than just deferring to next frame.
+    SCORE_ASSERT(!m_window->state->rhi->isRecordingFrame());
+    m_window->state->rhi->finish();
+  }
+
+  // Persist-across-rebuild contract: the registry survives RL teardown
+  // so we must explicitly release its QRhi resources here, BEFORE
+  // RenderState::destroy() (called below via m_window->state->destroy())
+  // frees the device. destroyOwned() `delete`s the buffer / texture /
+  // sampler wrappers directly while the QRhi is still alive.
+  releaseRegistry();
+
   delete m_depthStencil;
   m_depthStencil = nullptr;
 
@@ -643,14 +809,19 @@ void ScreenNode::destroyOutput()
   //delete s.renderBuffer;
   //s.renderBuffer = nullptr;
 
-  delete m_swapChain;
-  m_swapChain = nullptr;
-
+  // Order matters: clear the alias + flag on the Window BEFORE deleting
+  // the QRhiSwapChain (see diagnostic 047). A queued event reaching
+  // Window::exposeEvent between the delete and the nulling would
+  // otherwise observe (m_hasSwapChain == true && m_swapChain dangling).
   if(m_window)
   {
+    m_window->m_hasSwapChain = false;
     m_window->m_swapChain = nullptr;
   }
 
+  delete m_swapChain;
+  m_swapChain = nullptr;
+
   if(m_window)
   {
     if(auto s = m_window->state)
@@ -742,6 +913,13 @@ score::gfx::OutputNodeRenderer* ScreenNode::createRenderer(RenderList& r) const
   score::gfx::TextureRenderTarget rt;
   rt.renderTarget = m_swapChain->currentFrameRenderTarget();
   rt.renderPass = r.state.renderPassDescriptor;
+  // No depth attachment exposed here on purpose: ScaledRenderer is a
+  // fullscreen-quad blit that samples the upstream color texture and does
+  // not run depth test. All precision-critical 3D rendering happens
+  // upstream into an intermediate D32F offscreen render target allocated
+  // by createRenderTarget(...) in Utils.cpp. The swap chain's D24S8
+  // DepthStencil buffer is only attached at the QRhi level for the final
+  // blit pass — irrelevant to 3D depth precision.
   // FIXME why doesn't it work?
   // return new BasicRenderer{rt, r.state, *this};
   return new Gfx::ScaledRenderer{rt, r.state, *this};
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/SimpleRenderedISFNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/SimpleRenderedISFNode.cpp
index 85da1f6bf3..3c504bb19f 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/SimpleRenderedISFNode.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/SimpleRenderedISFNode.cpp
@@ -1,3 +1,5 @@
+#include <Gfx/Graph/PipelineStateHelpers.hpp>
+#include <Gfx/Graph/RenderList.hpp>
 #include <Gfx/Graph/RenderedISFSamplerUtils.hpp>
 #include <Gfx/Graph/SimpleRenderedISFNode.hpp>
 
@@ -8,13 +10,48 @@
 namespace score::gfx
 {
 
+static const constexpr auto blit_vs = R"_(#version 450
+layout(location = 0) in vec2 position;
+layout(location = 1) in vec2 texcoord;
+layout(location = 0) out vec2 v_texcoord;
+
+layout(std140, binding = 0) uniform renderer_t {
+  mat4 clipSpaceCorrMatrix;
+  vec2 renderSize;
+} renderer;
+
+out gl_PerVertex { vec4 gl_Position; };
+
+void main()
+{
+  v_texcoord = texcoord;
+  gl_Position = renderer.clipSpaceCorrMatrix * vec4(position.xy, 0.0, 1.);
+#if defined(QSHADER_HLSL) || defined(QSHADER_MSL)
+  gl_Position.y = - gl_Position.y;
+#endif
+}
+)_";
+
+static const constexpr auto blit_fs = R"_(#version 450
+layout(std140, binding = 0) uniform renderer_t {
+  mat4 clipSpaceCorrMatrix;
+  vec2 renderSize;
+} renderer;
+
+layout(binding = 3) uniform sampler2D blitTexture;
+layout(location = 0) in vec2 v_texcoord;
+layout(location = 0) out vec4 fragColor;
+
+void main() { fragColor = texture(blitTexture, v_texcoord); }
+)_";
+
 SimpleRenderedISFNode::SimpleRenderedISFNode(const ISFNode& node) noexcept
     : score::gfx::NodeRenderer{node}
     , n{const_cast<ISFNode&>(node)}
 {
 }
 
-void SimpleRenderedISFNode::updateInputTexture(const Port& input, QRhiTexture* tex)
+void SimpleRenderedISFNode::updateInputTexture(const Port& input, QRhiTexture* tex, QRhiTexture* depthTex)
 {
   int sampler_idx = 0;
   for(auto* p : node.input)
@@ -22,7 +59,12 @@ void SimpleRenderedISFNode::updateInputTexture(const Port& input, QRhiTexture* t
     if(p == &input)
       break;
     if(p->type == Types::Image)
+    {
       sampler_idx++;
+      // Skip the depth sampler that follows ports with SamplableDepth
+      if((p->flags & Flag::SamplableDepth) == Flag::SamplableDepth)
+        sampler_idx++;
+    }
   }
 
   if(sampler_idx < (int)m_inputSamplers.size())
@@ -35,6 +77,57 @@ void SimpleRenderedISFNode::updateInputTexture(const Port& input, QRhiTexture* t
         if(pass.p.srb)
           score::gfx::replaceTexture(*pass.p.srb, sampl.sampler, tex);
     }
+
+    // Update the depth sampler if the port has SamplableDepth
+    if(depthTex
+       && (input.flags & Flag::SamplableDepth) == Flag::SamplableDepth
+       && sampler_idx + 1 < (int)m_inputSamplers.size())
+    {
+      auto& depthSampl = m_inputSamplers[sampler_idx + 1];
+      if(depthSampl.texture != depthTex)
+      {
+        depthSampl.texture = depthTex;
+        for(auto& [e, pass] : m_passes)
+          if(pass.p.srb)
+            score::gfx::replaceTexture(*pass.p.srb, depthSampl.sampler, depthTex);
+      }
+    }
+  }
+}
+
+void SimpleRenderedISFNode::updateInputSamplerFilter(
+    const Port& input, const RenderTargetSpecs& spec)
+{
+  int sampler_idx = 0;
+  for(auto* p : node.input)
+  {
+    if(p == &input)
+      break;
+    if(p->type == Types::Image)
+      sampler_idx++;
+  }
+
+  if(sampler_idx < (int)m_inputSamplers.size())
+  {
+    auto* sampler = m_inputSamplers[sampler_idx].sampler;
+    if(sampler->magFilter() == spec.mag_filter
+       && sampler->minFilter() == spec.min_filter
+       && sampler->mipmapMode() == spec.mipmap_mode
+       && sampler->addressU() == spec.address_u
+       && sampler->addressV() == spec.address_v
+       && sampler->addressW() == spec.address_w)
+    {
+      // See RenderedISFNode::updateInputSamplerFilter — skip the
+      // sampler->create() when nothing actually needs updating.
+      return;
+    }
+    sampler->setMagFilter(spec.mag_filter);
+    sampler->setMinFilter(spec.min_filter);
+    sampler->setMipmapMode(spec.mipmap_mode);
+    sampler->setAddressU(spec.address_u);
+    sampler->setAddressV(spec.address_v);
+    sampler->setAddressW(spec.address_w);
+    sampler->create();
   }
 }
 
@@ -79,7 +172,8 @@ std::vector<Sampler> SimpleRenderedISFNode::allSamplers() const noexcept
 }
 
 void SimpleRenderedISFNode::initPass(
-    const TextureRenderTarget& renderTarget, RenderList& renderer, Edge& edge)
+    const TextureRenderTarget& renderTarget, RenderList& renderer, Edge& edge,
+    QRhiResourceUpdateBatch& res)
 {
   auto& model_passes = n.descriptor().passes;
   SCORE_ASSERT(model_passes.size() == 1);
@@ -92,12 +186,53 @@ void SimpleRenderedISFNode::initPass(
   pubo->setName("SimpleRenderedISFNode::initPass::pubo");
   pubo->create();
 
+  // Allocate storage resources (SSBOs + images) declared in the shader.
+  // Reuse the caller's `res` batch rather than allocating a fresh one —
+  // the earlier `rhi.nextResourceUpdateBatch()` here was never released
+  // or submitted (the "tmp gets merged at next endFrame" comment was
+  // wrong: QRhi does NOT auto-reclaim unreleased batches). That leaked
+  // one pool slot per addOutputPass call, which exhausts the 64-slot
+  // pool after ~60 resize cycles under X11 async resize where each
+  // resize tick rebuilds the RenderList (and thus re-inits every ISF
+  // renderer's passes) without any intervening frame.
+  ensureStorageResources(
+      rhi, res, renderer, n.descriptor(), m_storage, renderer.state.renderSize);
+  bindUpstreamBuffers(renderer, n.input, m_storage);
+
+  // Build the extra-binding list (storage + multiview UBO).
+  auto extraRhiBindings = buildExtraBindings(m_storage);
+  if(m_multiViewUBO)
+  {
+    // Multiview UBO binds right after storage resources.
+    int mvBinding = m_firstStorageBinding;
+    for(const auto& e : m_storage.ssbos)
+    {
+      if(e.binding >= 0) mvBinding = std::max(mvBinding, e.binding + 1);
+      if(e.prev_binding >= 0) mvBinding = std::max(mvBinding, e.prev_binding + 1);
+    }
+    for(const auto& e : m_storage.images)
+      if(e.binding >= 0) mvBinding = std::max(mvBinding, e.binding + 1);
+
+    extraRhiBindings.append(QRhiShaderResourceBinding::uniformBuffer(
+        mvBinding,
+        QRhiShaderResourceBinding::VertexStage | QRhiShaderResourceBinding::FragmentStage,
+        m_multiViewUBO));
+  }
+
+  // Compute effective pipeline state: global default + per-pass override.
+  auto eff_state = mergeState(
+      n.descriptor().default_state, model_passes[0].override_state);
+
   // Create the main pass
   try
   {
     auto [v, s] = score::gfx::makeShaders(renderer.state, n.m_vertexS, n.m_fragmentS);
-    auto pip = score::gfx::buildPipeline(
-        renderer, *m_mesh, v, s, renderTarget, pubo, m_materialUBO, allSamplers());
+    auto pip = score::gfx::buildPipelineWithState(
+        renderer, *m_mesh, v, s, renderTarget, pubo, m_materialUBO, allSamplers(),
+        std::span<QRhiShaderResourceBinding>(
+            extraRhiBindings.data(), (std::size_t)extraRhiBindings.size()),
+        eff_state,
+        n.descriptor().multiview_count);
     if(pip.pipeline)
     {
       m_passes.emplace_back(&edge, Pass{renderTarget, pip, pubo});
@@ -119,6 +254,36 @@ void SimpleRenderedISFNode::initMRTPass(RenderList& renderer, QRhiResourceUpdate
   const auto& outputs = n.descriptor().outputs;
   QSize sz = renderer.state.renderSize;
 
+  // Detect layered / multiview rendering needs.
+  int maxLayers = 1;
+  for(const auto& out : outputs)
+    if(out.layers > maxLayers)
+      maxLayers = out.layers;
+  const int mvCount = n.descriptor().multiview_count;
+  const bool wantMultiview
+      = mvCount >= 2 && renderer.state.caps.multiview;
+  if(wantMultiview && mvCount > maxLayers)
+    maxLayers = mvCount;
+
+  // Per-OUTPUT sample count: MSAA must be uniform across all colour
+  // attachments of a render pass, so pick the highest SAMPLES requested by
+  // any OUTPUT and use it as the render pass's sample count. Clamped later
+  // against QRhi::supportedSampleCounts() in createRenderTarget.
+  //
+  // IMPORTANT: the textures we allocate below stay SINGLE-SAMPLE — they
+  // are the RESOLVE TARGETS. createRenderTarget(mrtSamples) allocates
+  // multi-sample colorRenderBuffer attachments internally and wires each
+  // of these textures as its resolve destination (Vulkan contract: a
+  // resolve target must be single-sample). Downstream shaders sample the
+  // already-resolved single-sample textures, so there's no MSAA stride
+  // mismatch. (Previous code called setSampleCount(mrtSamples) on these
+  // textures, which produced MSAA storage sampled as if it were
+  // single-sample — visible as evenly-spaced horizontal stripes
+  // proportional to the sample count.)
+  int mrtSamples = std::max(renderer.samples(), 1);
+  for(const auto& out : outputs)
+    mrtSamples = std::max(mrtSamples, out.samples);
+
   // Create color and depth textures based on OUTPUTS declarations
   std::vector<QRhiTexture*> colorTextures;
   QRhiTexture* depthTex = nullptr;
@@ -127,32 +292,81 @@ void SimpleRenderedISFNode::initMRTPass(RenderList& renderer, QRhiResourceUpdate
   {
     if(out.type == "depth")
     {
-      depthTex = rhi.newTexture(
-          QRhiTexture::D32F, sz, 1,
-          QRhiTexture::RenderTarget);
+      auto depthFmt = parseOutputFormat(out.format, QRhiTexture::D32F);
+      QRhiTexture::Flags dflags = QRhiTexture::RenderTarget;
+      if(maxLayers > 1)
+      {
+        dflags |= QRhiTexture::TextureArray;
+        depthTex = rhi.newTextureArray(depthFmt, maxLayers, sz, 1, dflags);
+      }
+      else
+      {
+        depthTex = rhi.newTexture(depthFmt, sz, 1, dflags);
+      }
       depthTex->setName(("SimpleRenderedISFNode::MRT::depth::" + out.name).c_str());
       SCORE_ASSERT(depthTex->create());
     }
     else
     {
-      auto* tex = rhi.newTexture(
-          QRhiTexture::RGBA8, sz, 1,
-          QRhiTexture::RenderTarget | QRhiTexture::UsedWithLoadStore);
+      auto fmt = parseOutputFormat(out.format, QRhiTexture::RGBA8);
+      QRhiTexture::Flags flags = QRhiTexture::RenderTarget | QRhiTexture::UsedWithLoadStore;
+      const int layers = std::max({1, out.layers, (wantMultiview ? mvCount : 1)});
+      QRhiTexture* tex = nullptr;
+      if(layers > 1)
+      {
+        flags |= QRhiTexture::TextureArray;
+        tex = rhi.newTextureArray(fmt, layers, sz, 1, flags);
+      }
+      else
+      {
+        tex = rhi.newTexture(fmt, sz, 1, flags);
+      }
       tex->setName(("SimpleRenderedISFNode::MRT::color::" + out.name).c_str());
       SCORE_ASSERT(tex->create());
       colorTextures.push_back(tex);
     }
   }
 
-  if(colorTextures.empty())
+  // Depth-only shader: the only output is depth.
+  if(colorTextures.empty() && depthTex)
+  {
+    // Build the RT AROUND the node-owned depth texture (which may be a
+    // TextureArray when maxLayers > 1). The previous code asked
+    // createDepthOnlyRenderTarget to allocate its own depth texture and then
+    // deleted it — but the render pass still referenced it (use-after-free),
+    // and textureForOutput() returned a texture that was never rendered to.
+    m_mrtRenderTarget = createDepthOnlyRenderTarget(
+        renderer.state, depthTex, mrtSamples, /*samplableDepth=*/true);
+  }
+  else if(wantMultiview && !colorTextures.empty())
+  {
+    // Attach ALL color textures so attachments == pipeline blend targets.
+    m_mrtRenderTarget = createMultiViewRenderTarget(
+        renderer.state,
+        std::span<QRhiTexture* const>{colorTextures.data(), colorTextures.size()},
+        mvCount, depthTex, mrtSamples);
+  }
+  else if(maxLayers > 1 && !colorTextures.empty())
+  {
+    // Pick layer 0 by default; per-pass LAYER is handled by the pass loop.
+    // Attach ALL color textures so attachments == pipeline blend targets.
+    m_mrtRenderTarget = createLayeredRenderTarget(
+        renderer.state,
+        std::span<QRhiTexture* const>{colorTextures.data(), colorTextures.size()},
+        0, depthTex, mrtSamples);
+  }
+  else if(!colorTextures.empty())
+  {
+    m_mrtRenderTarget = createRenderTarget(
+        renderer.state,
+        std::span<QRhiTexture* const>{colorTextures.data(), colorTextures.size()},
+        depthTex,
+        mrtSamples);
+  }
+  else
+  {
     return;
-
-  // Create the multi-attachment render target
-  m_mrtRenderTarget = createRenderTarget(
-      renderer.state,
-      std::span<QRhiTexture* const>{colorTextures.data(), colorTextures.size()},
-      depthTex,
-      renderer.samples());
+  }
 
   // Create the pipeline and pass using this render target
   QRhiBuffer* pubo = rhi.newBuffer(
@@ -160,11 +374,39 @@ void SimpleRenderedISFNode::initMRTPass(RenderList& renderer, QRhiResourceUpdate
   pubo->setName("SimpleRenderedISFNode::initMRTPass::pubo");
   pubo->create();
 
+  // Extra bindings: storage + multiview UBO (same as initPass).
+  auto extraRhiBindings = buildExtraBindings(m_storage);
+  if(m_multiViewUBO)
+  {
+    int mvBinding = m_firstStorageBinding;
+    for(const auto& e : m_storage.ssbos)
+    {
+      if(e.binding >= 0) mvBinding = std::max(mvBinding, e.binding + 1);
+      if(e.prev_binding >= 0) mvBinding = std::max(mvBinding, e.prev_binding + 1);
+    }
+    for(const auto& e : m_storage.images)
+      if(e.binding >= 0) mvBinding = std::max(mvBinding, e.binding + 1);
+
+    extraRhiBindings.append(QRhiShaderResourceBinding::uniformBuffer(
+        mvBinding,
+        QRhiShaderResourceBinding::VertexStage | QRhiShaderResourceBinding::FragmentStage,
+        m_multiViewUBO));
+  }
+
+  const auto& passes = n.descriptor().passes;
+  auto eff_state = mergeState(
+      n.descriptor().default_state,
+      passes.empty() ? isf::pipeline_state{} : passes[0].override_state);
+
   try
   {
     auto [v, s] = score::gfx::makeShaders(renderer.state, n.m_vertexS, n.m_fragmentS);
-    auto pip = score::gfx::buildPipeline(
-        renderer, *m_mesh, v, s, m_mrtRenderTarget, pubo, m_materialUBO, allSamplers());
+    auto pip = score::gfx::buildPipelineWithState(
+        renderer, *m_mesh, v, s, m_mrtRenderTarget, pubo, m_materialUBO, allSamplers(),
+        std::span<QRhiShaderResourceBinding>(
+            extraRhiBindings.data(), (std::size_t)extraRhiBindings.size()),
+        eff_state,
+        wantMultiview ? mvCount : 0);
     if(pip.pipeline)
     {
       // Use nullptr edge — MRT passes are shared across all output edges
@@ -181,82 +423,53 @@ void SimpleRenderedISFNode::initMRTPass(RenderList& renderer, QRhiResourceUpdate
   }
 }
 
-void SimpleRenderedISFNode::initMRTBlitPasses(RenderList& renderer, QRhiResourceUpdateBatch& res)
+void SimpleRenderedISFNode::initMRTBlitPass(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge& edge)
 {
-  static const constexpr auto blit_vs = R"_(#version 450
-layout(location = 0) in vec2 position;
-layout(location = 1) in vec2 texcoord;
-layout(location = 0) out vec2 v_texcoord;
-
-layout(std140, binding = 0) uniform renderer_t {
-  mat4 clipSpaceCorrMatrix;
-  vec2 renderSize;
-} renderer;
-
-out gl_PerVertex { vec4 gl_Position; };
+  QRhiTexture* srcTex = textureForOutput(*edge.source);
+  if(!srcTex)
+    return;
 
-void main()
-{
-  v_texcoord = texcoord;
-  gl_Position = renderer.clipSpaceCorrMatrix * vec4(position.xy, 0.0, 1.);
-#if defined(QSHADER_HLSL) || defined(QSHADER_MSL)
-  gl_Position.y = - gl_Position.y;
-#endif
-}
-)_";
+  auto rt = renderer.renderTargetForOutput(edge);
+  if(!rt.renderTarget)
+    return;
 
-  static const constexpr auto blit_fs = R"_(#version 450
-layout(std140, binding = 0) uniform renderer_t {
-  mat4 clipSpaceCorrMatrix;
-  vec2 renderSize;
-} renderer;
+  auto [vertexS, fragmentS] = score::gfx::makeShaders(renderer.state, blit_vs, blit_fs);
 
-layout(binding = 3) uniform sampler2D blitTexture;
-layout(location = 0) in vec2 v_texcoord;
-layout(location = 0) out vec4 fragColor;
+  QRhiSampler* sampler = renderer.state.rhi->newSampler(
+      QRhiSampler::Linear, QRhiSampler::Linear, QRhiSampler::None,
+      QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge);
+  sampler->setName("SimpleRenderedISFNode::MRT::blitSampler");
+  sampler->create();
+  m_blitSamplersByEdge[&edge] = sampler;
 
-void main() { fragColor = texture(blitTexture, v_texcoord); }
-)_";
+  auto pip = score::gfx::buildPipeline(
+      renderer, *m_mesh, vertexS, fragmentS, rt, nullptr, nullptr,
+      std::array<Sampler, 1>{Sampler{sampler, srcTex}});
 
-  auto [vertexS, fragmentS] = score::gfx::makeShaders(renderer.state, blit_vs, blit_fs);
+  if(pip.pipeline)
+  {
+    m_passes.emplace_back(&edge, Pass{rt, pip, nullptr});
+  }
+  else
+  {
+    m_blitSamplersByEdge.erase(&edge);
+    delete sampler;
+  }
+}
 
+void SimpleRenderedISFNode::initMRTBlitPasses(RenderList& renderer, QRhiResourceUpdateBatch& res)
+{
   // For each output port, create a blit pass for each downstream edge
   for(auto* output_port : n.output)
   {
-    QRhiTexture* srcTex = textureForOutput(*output_port);
-    if(!srcTex)
-      continue;
-
     for(Edge* edge : output_port->edges)
     {
-      auto rt = renderer.renderTargetForOutput(*edge);
-      if(!rt.renderTarget)
-        continue;
-
-      QRhiSampler* sampler = renderer.state.rhi->newSampler(
-          QRhiSampler::Linear, QRhiSampler::Linear, QRhiSampler::None,
-          QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge);
-      sampler->setName("SimpleRenderedISFNode::MRT::blitSampler");
-      sampler->create();
-      m_blitSamplers.push_back(sampler);
-
-      auto pip = score::gfx::buildPipeline(
-          renderer, *m_mesh, vertexS, fragmentS, rt, nullptr, nullptr,
-          std::array<Sampler, 1>{Sampler{sampler, srcTex}});
-
-      if(pip.pipeline)
-      {
-        m_passes.emplace_back(edge, Pass{rt, pip, nullptr});
-      }
-      else
-      {
-        delete sampler;
-      }
+      initMRTBlitPass(renderer, res, *edge);
     }
   }
 }
 
-void SimpleRenderedISFNode::init(RenderList& renderer, QRhiResourceUpdateBatch& res)
+void SimpleRenderedISFNode::initState(RenderList& renderer, QRhiResourceUpdateBatch& res)
 {
   QRhi& rhi = *renderer.state.rhi;
 
@@ -272,7 +485,7 @@ void SimpleRenderedISFNode::init(RenderList& renderer, QRhiResourceUpdateBatch&
     }
   }
 
-  // Create the material UBO
+  // Create the material UBO and upload initial data
   m_materialSize = n.m_materialSize;
   if(m_materialSize > 0)
   {
@@ -280,6 +493,8 @@ void SimpleRenderedISFNode::init(RenderList& renderer, QRhiResourceUpdateBatch&
         = rhi.newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, m_materialSize);
     m_materialUBO->setName("SimpleRenderedISFNode::init::m_materialUBO");
     SCORE_ASSERT(m_materialUBO->create());
+    if(n.m_material_data)
+      res.updateDynamicBuffer(m_materialUBO, 0, m_materialSize, n.m_material_data.get());
   }
 
   // Create the samplers
@@ -287,110 +502,140 @@ void SimpleRenderedISFNode::init(RenderList& renderer, QRhiResourceUpdateBatch&
   SCORE_ASSERT(m_inputSamplers.empty());
   SCORE_ASSERT(m_audioSamplers.empty());
 
-  m_inputSamplers = initInputSamplers(this->n, renderer, n.input);
+  m_inputSamplers = initInputSamplers(this->n, renderer, n.input, &n.descriptor());
 
   m_audioSamplers = initAudioTextures(renderer, n.m_audio_textures);
 
-  // Create the passes
+  // Collect graphics-visible storage buffers and images declared in the
+  // shader (storage_input with visibility=fragment/vertex/both, or
+  // csf_image_input with non-compute visibility). Bindings start right
+  // after the sampler bindings.
+  {
+    const int firstStorageBinding
+        = 3 + (int)m_inputSamplers.size() + (int)m_audioSamplers.size();
+    m_firstStorageBinding = firstStorageBinding;
+    collectGraphicsStorageResources(n.descriptor(), firstStorageBinding, m_storage);
+  }
+
+  // Allocate the multiview UBO when MULTIVIEW >= 2 is declared.
+  if(n.descriptor().multiview_count >= 2)
+  {
+    const int mvCount = n.descriptor().multiview_count;
+    m_multiViewUBO = rhi.newBuffer(
+        QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer,
+        sizeof(float[16]) * mvCount);
+    m_multiViewUBO->setName("SimpleRenderedISFNode::multiview_ubo");
+    SCORE_ASSERT(m_multiViewUBO->create());
+  }
+
   // Count outputs to determine if we need MRT
   {
     const auto& outputs = n.descriptor().outputs;
     int colorCount = 0;
     bool hasDepth = false;
+    bool hasLayered = false;
     for(const auto& out : outputs)
     {
       if(out.type == "depth")
         hasDepth = true;
       else
         colorCount++;
+      if(out.layers > 1)
+        hasLayered = true;
     }
-    // MRT is only needed for multiple color attachments or depth output
-    m_hasMRT = colorCount > 1 || hasDepth;
+    // MRT is needed for multiple color attachments, depth output, or layered
+    // output (TextureArray). Multiview also requires the MRT path.
+    m_hasMRT = colorCount > 1 || hasDepth || hasLayered
+               || n.descriptor().multiview_count >= 2;
   }
 
+  m_initialized = true;
+}
+
+void SimpleRenderedISFNode::addOutputPass(RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res)
+{
   if(m_hasMRT)
   {
-    // MRT: create internal render target, render in runInitialPasses,
-    // then blit to downstream render targets in runRenderPass
-    initMRTPass(renderer, res);
+    // Create the shared MRT internal render target on first output edge
+    if(m_mrtRenderTarget.texture == nullptr)
+    {
+      initMRTPass(renderer, res);
+    }
 
-    // Create blit passes for each downstream edge across all output ports
-    initMRTBlitPasses(renderer, res);
+    // Create the blit pass for this single edge
+    initMRTBlitPass(renderer, res, edge);
   }
   else
   {
-    // Default single-output path (also handles OUTPUTS with a single color)
-    if(n.output[0]->edges.empty())
-      qDebug(" WTF EMPTY");
-    for(Edge* edge : n.output[0]->edges)
+    auto rt = renderer.renderTargetForOutput(edge);
+    if(rt.renderTarget)
     {
-      auto rt = renderer.renderTargetForOutput(*edge);
-      if(rt.renderTarget)
-      {
-        initPass(rt, renderer, *edge);
-      }
-      else
-      {
-        qDebug("WTF NO RT");
-      }
+      initPass(rt, renderer, edge, res);
     }
   }
 }
 
-void SimpleRenderedISFNode::update(
-    RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge)
+void SimpleRenderedISFNode::removeOutputPass(RenderList& renderer, Edge& edge)
 {
-  m_mrtRenderedThisFrame = false;
-
-  n.standardUBO.passIndex = 0;
-  n.standardUBO.frameIndex++;
-  auto sz = renderer.renderSize(edge);
-  n.standardUBO.renderSize[0] = sz.width();
-  n.standardUBO.renderSize[1] = sz.height();
-
-  // Update audio textures
-  if(!n.m_audio_textures.empty() && !m_audioTex)
+  // Find and erase the pass for this edge
+  auto it = ossia::find_if(m_passes, [&](auto& p) { return p.first == &edge; });
+  if(it != m_passes.end())
   {
-    m_audioTex.emplace();
+    it->second.p.release();
+    if(it->second.processUBO)
+      it->second.processUBO->deleteLater();
+    m_passes.erase(it);
   }
 
-  bool audioChanged = false;
-  for(auto& audio : n.m_audio_textures)
+  if(m_hasMRT)
   {
-    if(std::optional<Sampler> sampl
-       = m_audioTex->updateAudioTexture(audio, renderer, n.m_material_data.get(), res))
+    // Release the blit sampler for this edge
+    auto sit = m_blitSamplersByEdge.find(&edge);
+    if(sit != m_blitSamplersByEdge.end())
     {
-      // Texture changed -> material changed
-      audioChanged = true;
+      delete sit->second;
+      m_blitSamplersByEdge.erase(sit);
+    }
 
-      auto& [rhiSampler, tex] = *sampl;
-      for(auto& [e, pass] : m_passes)
+    // If no more blit passes remain (only the shared MRT pass with nullptr edge),
+    // release MRT resources
+    bool hasBlitPasses = false;
+    for(auto& [e, pass] : m_passes)
+    {
+      if(e != nullptr)
       {
-        score::gfx::replaceTexture(
-            *pass.p.srb, rhiSampler, tex ? tex : &renderer.emptyTexture());
+        hasBlitPasses = true;
+        break;
       }
     }
+    if(!hasBlitPasses)
+    {
+      // Remove the shared MRT pass
+      auto mrtIt = ossia::find_if(m_passes, [](auto& p) { return p.first == nullptr; });
+      if(mrtIt != m_passes.end())
+      {
+        mrtIt->second.p.release();
+        if(mrtIt->second.processUBO)
+          mrtIt->second.processUBO->deleteLater();
+        m_passes.erase(mrtIt);
+      }
+      m_mrtRenderTarget.release();
+    }
   }
+}
 
-  // Update material
-  if(m_materialUBO && m_materialSize > 0 && (materialChanged || audioChanged))
-  {
-    char* data = n.m_material_data.get();
-    res.updateDynamicBuffer(m_materialUBO, 0, m_materialSize, data);
-  }
-
-  // Update all the process UBOs
-  for(auto& [e, pass] : m_passes)
-  {
-    if(pass.processUBO)
-      res.updateDynamicBuffer(
-          pass.processUBO, 0, sizeof(ProcessUBO), &this->n.standardUBO);
-  }
+bool SimpleRenderedISFNode::hasOutputPassForEdge(Edge& edge) const
+{
+  return ossia::find_if(m_passes, [&](const auto& p) { return p.first == &edge; })
+         != m_passes.end();
 }
 
-void SimpleRenderedISFNode::release(RenderList& r)
+void SimpleRenderedISFNode::releaseState(RenderList& r)
 {
-  // customRelease
+  if(!m_initialized)
+    return;
+
+  // Release all remaining passes
   {
     for(auto& texture : n.m_audio_textures)
     {
@@ -430,11 +675,11 @@ void SimpleRenderedISFNode::release(RenderList& r)
     // texture is deleted elsewhere
   }
   m_audioSamplers.clear();
-  for(auto sampler : m_blitSamplers)
+  for(auto& [edge, sampler] : m_blitSamplersByEdge)
   {
     delete sampler;
   }
-  m_blitSamplers.clear();
+  m_blitSamplersByEdge.clear();
 
   delete m_materialUBO;
   m_materialUBO = nullptr;
@@ -447,6 +692,145 @@ void SimpleRenderedISFNode::release(RenderList& r)
     m_mrtRenderTarget.release();
     m_hasMRT = false;
   }
+
+  // Release storage resources (owned SSBOs + storage images).
+  m_storage.release();
+
+  if(m_multiViewUBO)
+  {
+    m_multiViewUBO->deleteLater();
+    m_multiViewUBO = nullptr;
+  }
+
+  m_initialized = false;
+}
+
+void SimpleRenderedISFNode::addInputEdge(RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res)
+{
+  if(edge.sink->type == Types::Image)
+  {
+    // Find upstream texture
+    if(auto it = edge.source->node->renderedNodes.find(&renderer);
+       it != edge.source->node->renderedNodes.end())
+    {
+      if(auto* tex = it->second->textureForOutput(*edge.source))
+      {
+        auto rt = renderer.renderTargetForInputPort(*edge.sink);
+        updateInputTexture(*edge.sink, tex, rt.depthTexture);
+      }
+    }
+  }
+}
+
+void SimpleRenderedISFNode::removeInputEdge(RenderList& renderer, Edge& edge)
+{
+  if(edge.sink->type == Types::Image)
+  {
+    // Ports declared with DEPTH: true have a second sampler binding for the
+    // `_depth` companion. When the cable is removed, the upstream renderer
+    // is often released immediately after — so the depth sampler's cached
+    // QRhiTexture* becomes a dangling pointer. Pass an empty-texture
+    // placeholder for the depth side too so the SRB never holds a freed
+    // VkImageView. Without this, vkUpdateDescriptorSets / end-of-frame
+    // pipeline barrier both crash on the stale handle.
+    const bool hasDepthCompanion
+        = (edge.sink->flags & Flag::SamplableDepth) == Flag::SamplableDepth;
+    QRhiTexture* depthFallback
+        = hasDepthCompanion ? &renderer.emptyTexture() : nullptr;
+    updateInputTexture(*edge.sink, &renderer.emptyTexture(), depthFallback);
+  }
+}
+
+void SimpleRenderedISFNode::init(RenderList& renderer, QRhiResourceUpdateBatch& res)
+{
+  initState(renderer, res);
+
+  for(auto* out_port : n.output)
+    for(auto* edge : out_port->edges)
+      addOutputPass(renderer, *edge, res);
+}
+
+void SimpleRenderedISFNode::update(
+    RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge)
+{
+  m_mrtRenderedThisFrame = false;
+
+  n.standardUBO.passIndex = 0;
+  n.standardUBO.frameIndex++;
+  auto sz = renderer.renderSize(edge);
+  n.standardUBO.renderSize[0] = sz.width();
+  n.standardUBO.renderSize[1] = sz.height();
+
+  // Update audio textures
+  if(!n.m_audio_textures.empty() && !m_audioTex)
+  {
+    m_audioTex.emplace();
+  }
+
+  bool audioChanged = false;
+  std::size_t audio_idx = 0;
+  for(auto& audio : n.m_audio_textures)
+  {
+    if(std::optional<Sampler> sampl
+       = m_audioTex->updateAudioTexture(audio, renderer, n.m_material_data.get(), res))
+    {
+      // Texture changed -> material changed
+      audioChanged = true;
+
+      auto& [rhiSampler, tex, fb_] = *sampl;
+      // Keep m_audioSamplers[i].texture in sync with the live GPU texture so
+      // any later pipeline rebuild (e.g. rt_changed path in RenderList::render
+      // triggering removeOutputPass + addOutputPass) uses the live binding
+      // instead of the placeholder empty texture.
+      if(audio_idx < m_audioSamplers.size())
+        m_audioSamplers[audio_idx].texture = tex;
+
+      for(auto& [e, pass] : m_passes)
+      {
+        score::gfx::replaceTexture(
+            *pass.p.srb, rhiSampler, tex ? tex : &renderer.emptyTexture());
+      }
+    }
+    ++audio_idx;
+  }
+
+  // Update material
+  if(m_materialUBO && m_materialSize > 0 && (materialChanged || audioChanged))
+  {
+    char* data = n.m_material_data.get();
+    res.updateDynamicBuffer(m_materialUBO, 0, m_materialSize, data);
+  }
+  materialChanged = false;
+
+  // Reset event ports now that the UBO has captured their pulse value.
+  // If anything fired, force next frame's upload so the reset-to-zero
+  // propagates out through the normally-gated upload path.
+  if(n.resetEventPortsAfterFrame())
+    materialChanged = true;
+
+  // Re-bind upstream buffers (UBOs / read-only SSBOs sourced from upstream
+  // ports). Cables can be added or replaced after init, so this must run
+  // every frame. We pass each pass's SRB so that buffer swaps patch the
+  // descriptor set in place; without this, uniform_input cables connected
+  // post-init never reach the shader and the placeholder UBO stays bound
+  // (zero-filled → degenerate matrices on the GPU).
+  for(auto& [e, pass] : m_passes)
+  {
+    bindUpstreamBuffers(renderer, n.input, m_storage, pass.p.srb);
+  }
+
+  // Update all the process UBOs
+  for(auto& [e, pass] : m_passes)
+  {
+    if(pass.processUBO)
+      res.updateDynamicBuffer(
+          pass.processUBO, 0, sizeof(ProcessUBO), &this->n.standardUBO);
+  }
+}
+
+void SimpleRenderedISFNode::release(RenderList& r)
+{
+  releaseState(r);
 }
 
 void SimpleRenderedISFNode::runInitialPasses(
@@ -469,19 +853,27 @@ void SimpleRenderedISFNode::runInitialPasses(
   SCORE_ASSERT(pass.p.srb);
 
   cb.beginPass(
-      pass.renderTarget.renderTarget, Qt::transparent, {1.0f, 0}, updateBatch);
+      pass.renderTarget.renderTarget, Qt::transparent, {0.0f, 0}, updateBatch);
   updateBatch = nullptr;
 
   cb.setGraphicsPipeline(pass.p.pipeline);
   cb.setShaderResources(pass.p.srb);
 
-  auto* tex = pass.renderTarget.texture;
-  cb.setViewport(QRhiViewport(
-      0, 0, tex->pixelSize().width(), tex->pixelSize().height()));
+  auto* tex = pass.renderTarget.texture ? pass.renderTarget.texture
+              : pass.renderTarget.depthTexture;
+  if(tex)
+  {
+    cb.setViewport(QRhiViewport(
+        0, 0, tex->pixelSize().width(), tex->pixelSize().height()));
+  }
 
-  m_mesh->draw(this->m_meshBuffer, cb);
+  drawMeshWithOptionalIndirect(*m_mesh, this->m_meshBuffer, cb);
 
   cb.endPass();
+
+  // Persistent SSBO ping-pong: swap current and previous for next frame.
+  if(pass.p.srb)
+    swapPersistentSSBOs(m_storage, *pass.p.srb);
 }
 
 void SimpleRenderedISFNode::runRenderPass(
@@ -523,10 +915,7 @@ void SimpleRenderedISFNode::runRenderPass(
   auto it = ossia::find_if(this->m_passes, [&](auto& p) { return p.first == &edge; });
   // Maybe the shader could not be created
   if(it == this->m_passes.end())
-  {
-    qDebug(" NO PASS FOUND");
     return;
-  }
 
   auto& pass = it->second;
 
@@ -545,11 +934,17 @@ void SimpleRenderedISFNode::runRenderPass(
     {
       cb.setGraphicsPipeline(pipeline);
       cb.setShaderResources(srb);
-      cb.setViewport(QRhiViewport(
-          0, 0, texture->pixelSize().width(), texture->pixelSize().height()));
+      if(texture)
+      {
+        cb.setViewport(QRhiViewport(
+            0, 0, texture->pixelSize().width(), texture->pixelSize().height()));
+      }
 
-      m_mesh->draw(this->m_meshBuffer, cb);
+      drawMeshWithOptionalIndirect(*m_mesh, this->m_meshBuffer, cb);
     }
+
+    // Persistent SSBO ping-pong for next frame.
+    swapPersistentSSBOs(m_storage, *srb);
   }
 }
 
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/SimpleRenderedISFNode.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/SimpleRenderedISFNode.hpp
index 1a832c3280..539827e3db 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/SimpleRenderedISFNode.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/SimpleRenderedISFNode.hpp
@@ -1,9 +1,12 @@
 #pragma once
 
 #include <Gfx/Graph/ISFNode.hpp>
+#include <Gfx/Graph/IsfBindingsBuilder.hpp>
 #include <Gfx/Graph/NodeRenderer.hpp>
 #include <Gfx/Graph/RenderedISFUtils.hpp>
 
+#include <ossia/detail/small_flat_map.hpp>
+
 namespace score::gfx
 {
 // Used for the simple case of a single, non-persistent pass (the most common case)
@@ -14,13 +17,22 @@ struct SimpleRenderedISFNode : score::gfx::NodeRenderer
 
   virtual ~SimpleRenderedISFNode();
 
-  void updateInputTexture(const Port& input, QRhiTexture* tex) override;
+  void updateInputTexture(const Port& input, QRhiTexture* tex, QRhiTexture* depthTex = nullptr) override;
+  void updateInputSamplerFilter(const Port& input, const RenderTargetSpecs& spec) override;
   QRhiTexture* textureForOutput(const Port& output) override;
 
   void init(RenderList& renderer, QRhiResourceUpdateBatch& res) override;
   void update(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) override;
   void release(RenderList& r) override;
 
+  void initState(RenderList& renderer, QRhiResourceUpdateBatch& res) override;
+  void releaseState(RenderList& r) override;
+  void addOutputPass(RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) override;
+  void removeOutputPass(RenderList& renderer, Edge& edge) override;
+  bool hasOutputPassForEdge(Edge& edge) const override;
+  void addInputEdge(RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) override;
+  void removeInputEdge(RenderList& renderer, Edge& edge) override;
+
   void runInitialPasses(
       RenderList&, QRhiCommandBuffer& commands, QRhiResourceUpdateBatch*& res,
       Edge& edge) override;
@@ -28,9 +40,12 @@ struct SimpleRenderedISFNode : score::gfx::NodeRenderer
   void runRenderPass(RenderList&, QRhiCommandBuffer& commands, Edge& edge) override;
 
 private:
-  void initPass(const TextureRenderTarget& rt, RenderList& renderer, Edge& edge);
+  void initPass(
+      const TextureRenderTarget& rt, RenderList& renderer, Edge& edge,
+      QRhiResourceUpdateBatch& res);
   void initMRTPass(RenderList& renderer, QRhiResourceUpdateBatch& res);
   void initMRTBlitPasses(RenderList& renderer, QRhiResourceUpdateBatch& res);
+  void initMRTBlitPass(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge& edge);
 
   std::vector<Sampler> allSamplers() const noexcept;
 
@@ -40,7 +55,7 @@ struct SimpleRenderedISFNode : score::gfx::NodeRenderer
 
   std::vector<Sampler> m_inputSamplers;
   std::vector<Sampler> m_audioSamplers;
-  std::vector<QRhiSampler*> m_blitSamplers;
+  ossia::small_flat_map<Edge*, QRhiSampler*, 4> m_blitSamplersByEdge;
 
   const Mesh* m_mesh{};
   MeshBuffers m_meshBuffer{};
@@ -54,5 +69,15 @@ struct SimpleRenderedISFNode : score::gfx::NodeRenderer
   TextureRenderTarget m_mrtRenderTarget;
   bool m_hasMRT{false};
   bool m_mrtRenderedThisFrame{false};
+
+  // Graphics-visible storage buffers / images (see IsfBindingsBuilder).
+  GraphicsStorageResources m_storage;
+
+  // Multiview UBO: N × mat4 view-projection matrices uploaded per frame.
+  QRhiBuffer* m_multiViewUBO{};
+
+  // Cached number of bindings consumed by storage resources (recorded in
+  // initState so that runtime buffer rebinds can reuse the same layout).
+  int m_firstStorageBinding{-1};
 };
 }
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/TexgenNode.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/TexgenNode.hpp
index 08dbb3d1bf..57841faad9 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/TexgenNode.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/TexgenNode.hpp
@@ -67,10 +67,10 @@ struct TexgenNode : NodeModel
     ~Rendered() { }
 
     QRhiTexture* texture{};
-    void init(RenderList& renderer, QRhiResourceUpdateBatch& res) override
+    void initState(RenderList& renderer, QRhiResourceUpdateBatch& res) override
     {
-      const auto& mesh = renderer.defaultTriangle();
-      defaultMeshInit(renderer, mesh, res);
+      m_mesh = &renderer.defaultTriangle();
+      defaultMeshInit(renderer, *m_mesh, res);
       processUBOInit(renderer);
       m_material.init(renderer, node.input, m_samplers);
       std::tie(m_vertexS, m_fragmentS)
@@ -93,7 +93,8 @@ struct TexgenNode : NodeModel
         sampler->create();
         m_samplers.push_back({sampler, texture});
       }
-      defaultPassesInit(renderer, mesh);
+
+      m_initialized = true;
     }
 
     void update(
@@ -116,8 +117,8 @@ struct TexgenNode : NodeModel
             QRhiTexture::RGBA8, sz, 1, QRhiTexture::Flag{});
         newtex->create();
         for(auto& [edge, pass] : this->m_p)
-          if(pass.srb)
-            score::gfx::replaceTexture(*pass.srb, m_samplers[0].sampler, newtex);
+          if(pass.p.srb)
+            score::gfx::replaceTexture(*pass.p.srb, m_samplers[0].sampler, newtex);
         texture = newtex;
 
         if(oldtex && oldtex != &renderer.emptyTexture())
@@ -139,12 +140,15 @@ struct TexgenNode : NodeModel
       }
     }
 
-    void release(RenderList& r) override
+    void releaseState(RenderList& r) override
     {
-      texture->deleteLater();
-      texture = nullptr;
+      if(texture)
+      {
+        texture->deleteLater();
+        texture = nullptr;
+      }
 
-      defaultRelease(r);
+      GenericNodeRenderer::releaseState(r);
     }
 
     int t = 0;
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/TextNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/TextNode.cpp
index b0c5c7f7ed..0b52aae4bc 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/TextNode.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/TextNode.cpp
@@ -112,11 +112,11 @@ class TextNode::Renderer : public GenericNodeRenderer
     m_uploaded = false;
   }
 
-  void init(RenderList& renderer, QRhiResourceUpdateBatch& res) override
+  void initState(RenderList& renderer, QRhiResourceUpdateBatch& res) override
   {
     rerender();
-    const auto& mesh = renderer.defaultQuad();
-    defaultMeshInit(renderer, mesh, res);
+    m_mesh = &renderer.defaultQuad();
+    defaultMeshInit(renderer, *m_mesh, res);
     processUBOInit(renderer);
     m_material.init(renderer, node.input, m_samplers);
     std::tie(m_vertexS, m_fragmentS) = score::gfx::makeShaders(
@@ -145,7 +145,7 @@ class TextNode::Renderer : public GenericNodeRenderer
       m_samplers.push_back({sampler, tex});
     }
 
-    defaultPassesInit(renderer, mesh);
+    m_initialized = true;
   }
 
   void update(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) override
@@ -179,7 +179,7 @@ class TextNode::Renderer : public GenericNodeRenderer
     defaultRenderPass(renderer, mesh, cb, edge);
   }
 
-  void release(RenderList& r) override
+  void releaseState(RenderList& r) override
   {
     for(auto tex : m_textures)
     {
@@ -187,7 +187,7 @@ class TextNode::Renderer : public GenericNodeRenderer
     }
     m_textures.clear();
 
-    defaultRelease(r);
+    GenericNodeRenderer::releaseState(r);
   }
 
   QImage m_img;
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/TextureLoader.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/TextureLoader.cpp
new file mode 100644
index 0000000000..f6fb9a5249
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/TextureLoader.cpp
@@ -0,0 +1,190 @@
+#include <Gfx/Graph/TextureLoader.hpp>
+#include <Gfx/Hashes.hpp>
+
+#include <ossia/detail/hash.hpp>
+
+#include <QByteArray>
+#include <QImage>
+#include <QImageReader>
+
+#include <private/qrhi_p.h>
+
+namespace score::gfx
+{
+
+// -----------------------------------------------------------------------------
+// CPU decode
+// -----------------------------------------------------------------------------
+
+std::optional<DecodedImage> decodeImageFromPath(const QString& path)
+{
+  // Decode straight off disk. We previously reused Gfx::ImageCache here, but
+  // that cache is refcounted and TextureLoader never released its acquisition,
+  // so every unique path ever decoded leaked one QImage for the program
+  // lifetime (drag-drop reloads, library scans, image_input swaps all bled
+  // memory). The TextureCache below already de-duplicates per-renderer GPU
+  // uploads, and AssetTable handles cross-output dedup keyed on content hash,
+  // so the extra CPU-side cache layer wasn't pulling its weight.
+  QImage img(path);
+  if(img.isNull())
+    return std::nullopt;
+
+  DecodedImage out;
+  out.image = std::move(img);
+  // Canonical RGBA8888 layout so QRhi's RGBA8 textures sample correctly.
+  if(out.image.format() != QImage::Format_RGBA8888)
+    out.image.convertTo(QImage::Format_RGBA8888);
+  out.debug_name = path;
+  return out;
+}
+
+std::optional<DecodedImage> decodeImageFromMemory(
+    const QByteArray& bytes, const QString& mime_hint)
+{
+  QImage img;
+  // QImage::loadFromData accepts a format hint as a const char* (e.g. "PNG").
+  // Strip the "image/" prefix from the MIME type if present, then upper-case.
+  QByteArray fmt;
+  if(!mime_hint.isEmpty())
+  {
+    QString s = mime_hint;
+    if(s.startsWith("image/"))
+      s = s.mid(6);
+    fmt = s.toUpper().toLatin1();
+  }
+  if(!img.loadFromData(bytes, fmt.isEmpty() ? nullptr : fmt.constData()))
+    return std::nullopt;
+
+  DecodedImage out;
+  out.image = std::move(img);
+  if(out.image.format() != QImage::Format_RGBA8888)
+    out.image.convertTo(QImage::Format_RGBA8888);
+  out.debug_name = QStringLiteral("blob:") + mime_hint;
+  return out;
+}
+
+// -----------------------------------------------------------------------------
+// GPU upload
+// -----------------------------------------------------------------------------
+
+QRhiTexture* uploadImageToTexture(
+    QRhi& rhi, QRhiResourceUpdateBatch& batch, const QImage& img, bool srgb,
+    const QString& debug_name)
+{
+  if(img.isNull())
+    return nullptr;
+
+  // sRGB is a Flag bit (not a separate format) — Qt RHI allocates an RGBA8
+  // texture with sRGB sampling semantics when the flag is present.
+  // MipMapped + UsedWithGenerateMips: required for the generateMips() call
+  // below. Without a mip chain, sampling a high-resolution material texture
+  // (Sponza floor at distance, etc.) point-samples the base level at sub-
+  // pixel rate → uniform noise / TV-static aliasing.
+  QRhiTexture::Flags flags
+      = QRhiTexture::MipMapped | QRhiTexture::UsedWithGenerateMips;
+  if(srgb)
+    flags |= QRhiTexture::sRGB;
+  // sampleCount=1 (no MSAA on a sampled material texture). The mip count
+  // itself is implicit — set by the MipMapped flag and floor(log2(max(w,h)))+1.
+  auto* tex = rhi.newTexture(QRhiTexture::RGBA8, img.size(), 1, flags);
+  if(!tex)
+    return nullptr;
+  if(!debug_name.isEmpty())
+    tex->setName(debug_name.toUtf8());
+  if(!tex->create())
+  {
+    delete tex;
+    return nullptr;
+  }
+  // QRhi accepts QImage directly; format conversion is handled internally.
+  batch.uploadTexture(tex, img);
+  // Filter the base level into the mip chain. Cheap (one-shot, on first
+  // upload) and unblocks min-filter-linear-mipmap-linear sampling on the
+  // material samplers — kills the floor-noise aliasing.
+  batch.generateMips(tex);
+  return tex;
+}
+
+// -----------------------------------------------------------------------------
+// One-shot helpers
+// -----------------------------------------------------------------------------
+
+QRhiTexture* loadAndUploadTexture(
+    QRhi& rhi, QRhiResourceUpdateBatch& batch, const QString& path, bool srgb)
+{
+  auto decoded = decodeImageFromPath(path);
+  if(!decoded)
+    return nullptr;
+  return uploadImageToTexture(
+      rhi, batch, decoded->image, srgb, decoded->debug_name);
+}
+
+QRhiTexture* loadAndUploadTexture(
+    QRhi& rhi, QRhiResourceUpdateBatch& batch, const QByteArray& bytes,
+    const QString& mime_hint, bool srgb)
+{
+  auto decoded = decodeImageFromMemory(bytes, mime_hint);
+  if(!decoded)
+    return nullptr;
+  return uploadImageToTexture(
+      rhi, batch, decoded->image, srgb, decoded->debug_name);
+}
+
+// -----------------------------------------------------------------------------
+// TextureCache
+// -----------------------------------------------------------------------------
+
+std::size_t TextureCache::KeyHash::operator()(const Key& k) const noexcept
+{
+  std::size_t seed = hash_qstring(k.origin);
+  ossia::hash_combine(seed, (uint8_t)(k.srgb ? 1 : 0));
+  return seed;
+}
+
+TextureCache::~TextureCache()
+{
+  clear();
+}
+
+void TextureCache::clear()
+{
+  for(auto& [key, tex] : m_textures)
+    if(tex)
+      tex->deleteLater();
+  m_textures.clear();
+}
+
+QRhiTexture* TextureCache::acquireFromPath(
+    QRhi& rhi, QRhiResourceUpdateBatch& batch, const QString& path, bool srgb)
+{
+  if(path.isEmpty())
+    return nullptr;
+  Key k{path, srgb};
+  if(auto it = m_textures.find(k); it != m_textures.end())
+    return it->second;
+
+  auto* tex = loadAndUploadTexture(rhi, batch, path, srgb);
+  if(tex)
+    m_textures.emplace(std::move(k), tex);
+  // Decode failures are not cached — let the next call retry. Caller
+  // handles the nullptr return as the "missing texture" fallback.
+  return tex;
+}
+
+QRhiTexture* TextureCache::acquireFromMemory(
+    QRhi& rhi, QRhiResourceUpdateBatch& batch, const QByteArray& bytes,
+    const QString& mime_hint, uint64_t content_hash, bool srgb)
+{
+  Key k{
+      QStringLiteral("blob:") + QString::number(content_hash, 16),
+      srgb};
+  if(auto it = m_textures.find(k); it != m_textures.end())
+    return it->second;
+
+  auto* tex = loadAndUploadTexture(rhi, batch, bytes, mime_hint, srgb);
+  if(tex)
+    m_textures.emplace(std::move(k), tex);
+  return tex;
+}
+
+}  // namespace score::gfx
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/TextureLoader.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/TextureLoader.hpp
new file mode 100644
index 0000000000..e670b5e80c
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/TextureLoader.hpp
@@ -0,0 +1,132 @@
+#pragma once
+#include <score_plugin_gfx_export.h>
+
+#include <QImage>
+#include <QString>
+
+#include <cstdint>
+#include <optional>
+#include <unordered_map>
+
+class QRhi;
+class QRhiTexture;
+class QRhiResourceUpdateBatch;
+class QByteArray;
+
+namespace score::gfx
+{
+
+// =============================================================================
+// CPU-side decode result.
+//
+// The default implementation produces RGBA8888 data via QImageReader. The
+// `srgb` flag is metadata only — it does NOT alter the pixel bytes, it just
+// records whether the caller intends those bytes to be interpreted as sRGB
+// when the texture is sampled (set the QRhiTexture format to RGBA8 vs sRGB8A8
+// at upload time accordingly).
+//
+// Future swap-in candidates: OIIO (HDR/EXR), KTX2 (transcoded BCn), AVIF.
+// =============================================================================
+struct DecodedImage
+{
+  QImage image;        // QImage::Format_RGBA8888 (no premul)
+  QString debug_name;  // For QRhiTexture::setName()
+};
+
+// =============================================================================
+// Decode helpers — synchronous, called on the render thread.
+//
+// Both variants decode directly with QImage; cross-output dedup is handled at
+// the TextureCache (per-renderer GPU side) and AssetTable (content-hash
+// keyed) layers. We don't share a CPU-side cache here — the previous reuse
+// of Gfx::ImageCache leaked every decoded path for the program lifetime.
+// =============================================================================
+
+SCORE_PLUGIN_GFX_EXPORT
+std::optional<DecodedImage> decodeImageFromPath(const QString& path);
+
+SCORE_PLUGIN_GFX_EXPORT
+std::optional<DecodedImage> decodeImageFromMemory(
+    const QByteArray& bytes, const QString& mime_hint);
+
+// =============================================================================
+// GPU upload — pure RHI, no I/O. Allocates a freshly-sized QRhiTexture
+// (RGBA8 or sRGB8_ALPHA8 depending on `srgb`), records the upload into
+// `batch`. Caller owns the returned pointer (delete via deleteLater()).
+//
+// Returns nullptr on QRhi allocation failure.
+// =============================================================================
+
+SCORE_PLUGIN_GFX_EXPORT
+QRhiTexture* uploadImageToTexture(
+    QRhi& rhi, QRhiResourceUpdateBatch& batch, const QImage& img, bool srgb,
+    const QString& debug_name = {});
+
+// =============================================================================
+// One-shot decode + upload helpers. Convenience for callers that don't need
+// to reuse the decoded CPU bytes.
+// =============================================================================
+
+SCORE_PLUGIN_GFX_EXPORT
+QRhiTexture* loadAndUploadTexture(
+    QRhi& rhi, QRhiResourceUpdateBatch& batch, const QString& path, bool srgb);
+
+SCORE_PLUGIN_GFX_EXPORT
+QRhiTexture* loadAndUploadTexture(
+    QRhi& rhi, QRhiResourceUpdateBatch& batch, const QByteArray& bytes,
+    const QString& mime_hint, bool srgb);
+
+// =============================================================================
+// Per-renderer GPU texture cache.
+//
+// QRhiTexture* is bound to one QRhi instance, so this cache MUST live on the
+// render-side node (e.g. ScenePreprocessorNode), not as a global singleton. Owns
+// the textures it returns; clear() (also runs in the dtor) schedules each via
+// deleteLater().
+//
+// Keys: a file path OR a stable content hash (for embedded glTF/FBX blobs).
+// Two entries with the same origin but different sRGB flags coexist.
+// =============================================================================
+class SCORE_PLUGIN_GFX_EXPORT TextureCache
+{
+public:
+  TextureCache() = default;
+  ~TextureCache();
+
+  TextureCache(const TextureCache&) = delete;
+  TextureCache& operator=(const TextureCache&) = delete;
+  TextureCache(TextureCache&&) noexcept = default;
+  TextureCache& operator=(TextureCache&&) noexcept = default;
+
+  // First call decodes + uploads via `batch`; later calls hit the cache.
+  // Returns nullptr if the file can't be decoded.
+  QRhiTexture* acquireFromPath(
+      QRhi& rhi, QRhiResourceUpdateBatch& batch, const QString& path, bool srgb);
+
+  // Same, for embedded blobs. `content_hash` is supplied by the caller — its
+  // identity (not its value) is what guards re-upload.
+  QRhiTexture* acquireFromMemory(
+      QRhi& rhi, QRhiResourceUpdateBatch& batch, const QByteArray& bytes,
+      const QString& mime_hint, uint64_t content_hash, bool srgb);
+
+  // Schedule deleteLater() on every owned texture and drop the map.
+  void clear();
+
+  std::size_t size() const noexcept { return m_textures.size(); }
+
+private:
+  struct Key
+  {
+    QString origin;  // file path, or "blob:<hash-hex>" for memory blobs
+    bool srgb{};
+    bool operator==(const Key&) const noexcept = default;
+  };
+  struct KeyHash
+  {
+    std::size_t operator()(const Key& k) const noexcept;
+  };
+
+  std::unordered_map<Key, QRhiTexture*, KeyHash> m_textures;
+};
+
+}  // namespace score::gfx
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/Uniforms.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/Uniforms.hpp
index 74000ee39b..f8c5cb4090 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/Uniforms.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/Uniforms.hpp
@@ -20,6 +20,7 @@ enum class Types : int8_t
   Camera,
   Geometry,
   Buffer,
+  Scene,
 };
 
 enum class Flag : uint32_t
@@ -27,7 +28,32 @@ enum class Flag : uint32_t
   // Grabs texture at the source instead of
   // asking it to render. Used for instance to get cubemap textures.
   GrabsFromSource = (1 << 0),
-  SamplableDepth = (1 << 1)
+  SamplableDepth  = (1 << 1),
+
+  // Sink expects a sampler2DArray (texture carries multiple layers).
+  TextureArray    = (1 << 2),
+
+  // Sink expects imageLoad/imageStore (storage image) rather than sampledTexture.
+  StorageImage    = (1 << 3),
+
+  // Buffer port carries indirect-draw arguments (QRhiDrawIndirectCommand[]).
+  IndirectDraw    = (1 << 4),
+
+  // Image port is a multiview texture array (one layer per view).
+  MultiView       = (1 << 5),
+
+  // Output port produces only depth (no color attachment).
+  DepthOnly       = (1 << 6),
+
+  // Buffer port is bound as a uniform buffer (UBO, std140) rather than as a
+  // storage buffer (SSBO, std430). Used for `uniform_input` from upstream.
+  UniformBuffer   = (1 << 7),
+
+  // Sink expects a sampler3D (texture is a 3D volume).
+  ThreeDimensional = (1 << 8),
+
+  // Sink expects a samplerCube.
+  Cubemap          = (1 << 9),
 };
 
 static constexpr inline Flag operator&(Flag lhs, Flag rhs)
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/Utils.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/Utils.cpp
index c103e2e47d..e92268c89f 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/Utils.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/Utils.cpp
@@ -1,10 +1,17 @@
 #include <Gfx/Graph/RenderList.hpp>
 #include <Gfx/Graph/NodeRenderer.hpp>
+#include <Gfx/Graph/PipelineStateHelpers.hpp>
 #include <Gfx/Graph/ShaderCache.hpp>
 #include <Gfx/Graph/Utils.hpp>
+#include <Gfx/Graph/VertexFallbackDefaults.hpp>
+#include <Gfx/Graph/VertexFallbackPool.hpp>
+
+#include <isf.hpp>
 
 #include <score/tools/Debug.hpp>
 
+#include <QDebug>
+
 namespace score::gfx
 {
 TextureRenderTarget
@@ -27,7 +34,7 @@ createRenderTarget(const RenderState& state, QRhiTexture* tex, int samples, bool
   bool useDepthResolve = false;
   if(samplableDepth && samples > 1)
   {
-#if QT_VERSION >= QT_VERSION_CHECK(6, 7, 0)
+#if QT_VERSION >= QT_VERSION_CHECK(6, 8, 0)
     useDepthResolve = state.rhi->isFeatureSupported(QRhi::ResolveDepthStencil);
 #endif
     if(!useDepthResolve)
@@ -67,7 +74,7 @@ createRenderTarget(const RenderState& state, QRhiTexture* tex, int samples, bool
 
     if(useDepthResolve)
     {
-#if QT_VERSION >= QT_VERSION_CHECK(6, 7, 0)
+#if QT_VERSION >= QT_VERSION_CHECK(6, 8, 0)
       // Multisample depth attachment used during rendering; resolves into
       // ret.depthTexture at endPass(). Owned via ret.msDepthTexture so it
       // is released alongside the rest of the RT.
@@ -88,12 +95,18 @@ createRenderTarget(const RenderState& state, QRhiTexture* tex, int samples, bool
   }
   else if(depth)
   {
-    ret.depthRenderBuffer = state.rhi->newRenderBuffer(
-        QRhiRenderBuffer::DepthStencil, tex->pixelSize(), effectiveSamples);
-    ret.depthRenderBuffer->setName("createRenderTarget::ret.depthRenderBuffer");
-    SCORE_ASSERT(ret.depthRenderBuffer->create());
+    // Reverse-Z project rule: intermediate 3D render targets always use
+    // D32F float depth. D24 fixed-point combined with reverse-Z yields
+    // strictly worse precision than standard-Z would, so renderbuffer
+    // depth is no longer an option here. Stencil is dropped (no shader in
+    // the codebase currently uses it — revisit via D32FS8 if needed).
+    ret.depthTexture = state.rhi->newTexture(
+        QRhiTexture::D32F, tex->pixelSize(), effectiveSamples,
+        QRhiTexture::RenderTarget);
+    ret.depthTexture->setName("createRenderTarget::depthTexture (D32F, non-samplable)");
+    SCORE_ASSERT(ret.depthTexture->create());
 
-    desc.setDepthStencilBuffer(ret.depthRenderBuffer);
+    desc.setDepthTexture(ret.depthTexture);
   }
 
   auto renderTarget = state.rhi->newTextureRenderTarget(desc);
@@ -147,7 +160,7 @@ TextureRenderTarget createRenderTarget(
   bool useDepthResolve = false;
   if(depthTex && samples > 1)
   {
-#if QT_VERSION >= QT_VERSION_CHECK(6, 7, 0)
+#if QT_VERSION >= QT_VERSION_CHECK(6, 8, 0)
     useDepthResolve = state.rhi->isFeatureSupported(QRhi::ResolveDepthStencil);
 #endif
     if(!useDepthResolve)
@@ -188,7 +201,7 @@ TextureRenderTarget createRenderTarget(
 #if QT_VERSION >= QT_VERSION_CHECK(6, 6, 0)
     if(useDepthResolve)
     {
-#if QT_VERSION >= QT_VERSION_CHECK(6, 7, 0)
+#if QT_VERSION >= QT_VERSION_CHECK(6, 8, 0)
       // Multisample depth attachment used during rendering, resolves into
       // the caller-supplied depthTex on endPass(). We own msDepthTexture.
       ret.msDepthTexture = state.rhi->newTexture(
@@ -233,6 +246,16 @@ TextureRenderTarget createRenderTarget(
   return ret;
 }
 
+// NOTE on the reinterpret_cast<QRhiShaderResourceBinding::Data*> below (and in
+// replaceSampler / replaceTexture / etc.): QRhiShaderResourceBinding stores its
+// payload in a private nested ::Data whose only public accessor is the const
+// data() method — there is no public mutator. We rebind buffers/samplers/
+// textures in-place by casting the binding to its layout-compatible private
+// Data. This relies on QRhiShaderResourceBinding being a thin wrapper whose
+// first (and only) data member IS that Data struct; that layout has been stable
+// across Qt 6.4..dev, but it is NOT a guaranteed/forward-compatible ABI. If a
+// future Qt reorders QRhiShaderResourceBinding's members this will silently
+// corrupt bindings — revisit if QRhi ever exposes a public mutating accessor.
 void replaceBuffer(
     std::vector<QRhiShaderResourceBinding>& tmp, int binding, QRhiBuffer* newBuffer)
 {
@@ -278,6 +301,16 @@ void replaceSampler(
 void replaceTexture(
     std::vector<QRhiShaderResourceBinding>& tmp, int binding, QRhiTexture* newTexture)
 {
+  // Defensive null-guard — writing a null texture into a
+  // sampledTexture / ImageLoad binding crashes the next
+  // vkUpdateDescriptorSets. Callers that genuinely want to "detach" a
+  // texture should call replaceTexture with an empty-fallback from the
+  // RenderList (renderer.emptyTexture() / …Array() / …Cube() / …3D())
+  // that matches the sampler's kind. When this is reached with null,
+  // leave the existing binding in place so the pass keeps working
+  // with whatever it had last.
+  if(!newTexture)
+    return;
   for(QRhiShaderResourceBinding& b : tmp)
   {
     auto d = reinterpret_cast<QRhiShaderResourceBinding::Data*>(&b);
@@ -300,6 +333,17 @@ void replaceTexture(
   }
 }
 
+// The replace*() overloads on QRhiShaderResourceBindings only ever rewrite
+// the *resources* inside an existing layout (buffer/texture/sampler pointer
+// in the same binding slot). That is the textbook case for QRhi's
+// updateResources() fast path: reuse the native descriptor set layout and
+// pool slot, bump the generation, let the backend rewrite only the changed
+// descriptors. The previous destroy()+create() pattern instead freed the
+// pool slot on every live edit — which is what caused the 64-slot batch
+// pool to blow up under heavy graph churn.
+//
+// See qrhivulkan.cpp:8707 (QVkShaderResourceBindings::updateResources).
+// All five backends (Vulkan/D3D11/D3D12/Metal/GL) implement the virtual.
 void replaceBuffer(QRhiShaderResourceBindings& srb, int binding, QRhiBuffer* newBuffer)
 {
   std::vector<QRhiShaderResourceBinding> tmp;
@@ -307,9 +351,8 @@ void replaceBuffer(QRhiShaderResourceBindings& srb, int binding, QRhiBuffer* new
 
   replaceBuffer(tmp, binding, newBuffer);
 
-  srb.destroy();
   srb.setBindings(tmp.begin(), tmp.end());
-  srb.create();
+  srb.updateResources();
 }
 
 void replaceSampler(
@@ -320,9 +363,8 @@ void replaceSampler(
 
   replaceSampler(tmp, binding, newSampler);
 
-  srb.destroy();
   srb.setBindings(tmp.begin(), tmp.end());
-  srb.create();
+  srb.updateResources();
 }
 
 void replaceTexture(
@@ -333,9 +375,8 @@ void replaceTexture(
 
   replaceTexture(tmp, binding, newTexture);
 
-  srb.destroy();
   srb.setBindings(tmp.begin(), tmp.end());
-  srb.create();
+  srb.updateResources();
 }
 
 void replaceSampler(
@@ -356,9 +397,8 @@ void replaceSampler(
     }
   }
 
-  srb.destroy();
   srb.setBindings(tmp.begin(), tmp.end());
-  srb.create();
+  srb.updateResources();
 }
 
 void replaceSamplerAndTexture(
@@ -381,16 +421,21 @@ void replaceSamplerAndTexture(
     }
   }
 
-  srb.destroy();
   srb.setBindings(tmp.begin(), tmp.end());
-  srb.create();
+  srb.updateResources();
 }
 
 void replaceTexture(
     QRhiShaderResourceBindings& srb, QRhiSampler* sampler, QRhiTexture* newTexture)
 {
+  // Defensive null-guard: see the other replaceTexture overload. Null
+  // leaves the current binding intact so subsequent setShaderResources
+  // calls don't hit vkUpdateDescriptorSets with VK_NULL_HANDLE.
+  if(!newTexture)
+    return;
   std::vector<QRhiShaderResourceBinding> tmp;
   tmp.assign(srb.cbeginBindings(), srb.cendBindings());
+  int matches = 0;
   for(QRhiShaderResourceBinding& b : tmp)
   {
     auto d = reinterpret_cast<QRhiShaderResourceBinding::Data*>(&b);
@@ -400,13 +445,15 @@ void replaceTexture(
       if(d->u.stex.texSamplers[0].sampler == sampler)
       {
         d->u.stex.texSamplers[0].tex = newTexture;
+        matches++;
       }
     }
   }
+  if(matches == 0)
+    return;
 
-  srb.destroy();
   srb.setBindings(tmp.begin(), tmp.end());
-  srb.create();
+  srb.updateResources();
 }
 
 void replaceTexture(
@@ -428,9 +475,46 @@ void replaceTexture(
       }
     }
   }
-  srb.destroy();
   srb.setBindings(bindings.begin(), bindings.end());
-  srb.create();
+  srb.updateResources();
+}
+
+// Unified geometry-attribute lookup, used by raw raster and CSF alike.
+// Matches the request (name + optional semantic key) to an upstream
+// ossia::geometry::attribute via a 3-stage cascade:
+//
+//   stage 1 — resolve `semantic_key` (defaults to `name`) via
+//             name_to_semantic. If it maps to a known semantic, look that
+//             up on the geometry.
+//   stage 2 — fall back to a custom-attribute lookup by `name`.
+//   stage 3 — display_name match. Catches the case where the user said
+//             { NAME: "position", SEMANTIC: "custom" } but only the real
+//             position attribute (semantic=position) exists upstream — we
+//             still want to bind to it instead of failing.
+const ossia::geometry::attribute* findGeometryAttribute(
+    const ossia::geometry& geom, std::string_view name, std::string_view semantic_key)
+{
+  if(semantic_key.empty())
+    semantic_key = name;
+  const auto sem = ossia::name_to_semantic(semantic_key);
+
+  const ossia::geometry::attribute* match = nullptr;
+  if(sem != ossia::attribute_semantic::custom)
+    match = geom.find(sem);
+  if(!match)
+    match = geom.find(name);
+  if(!match)
+  {
+    for(const auto& a : geom.attributes)
+    {
+      if(ossia::geometry::display_name(a) == name)
+      {
+        match = &a;
+        break;
+      }
+    }
+  }
+  return match;
 }
 
 bool remapPipelineVertexInputs(
@@ -445,28 +529,12 @@ bool remapPipelineVertexInputs(
 
   for(const auto& shader_var : shader_inputs)
   {
-    // Resolve shader variable name to semantic
     const std::string_view var_name(shader_var.name.constData(), shader_var.name.size());
-    auto sem = ossia::name_to_semantic(var_name);
-
-    // Find matching geometry attribute: by semantic, then custom name, then display name
-    const ossia::geometry::attribute* match = nullptr;
-    if(sem != ossia::attribute_semantic::custom)
-      match = geom.find(sem);
-    if(!match)
-      match = geom.find(var_name);
-    if(!match)
-    {
-      // Fallback: match shader variable name against attribute display names
-      for(const auto& a : geom.attributes)
-      {
-        if(ossia::geometry::display_name(a) == var_name)
-        {
-          match = &a;
-          break;
-        }
-      }
-    }
+    // Same lookup CSF uses — the explicit-SEMANTIC override is plumbed
+    // separately by callers that have access to the descriptor (see the
+    // overload below). Here, only the GLSL var name is available, so the
+    // semantic key defaults to it.
+    const auto* match = findGeometryAttribute(geom, var_name, var_name);
 
     if(!match)
       return false;
@@ -487,6 +555,202 @@ bool remapPipelineVertexInputs(
   return true;
 }
 
+bool remapPipelineVertexInputs(
+    QRhiGraphicsPipeline& pip, const QShader& vertexShader,
+    const ossia::geometry& geom, const isf::descriptor& desc)
+{
+  const auto& shader_inputs = vertexShader.description().inputVariables();
+  if(shader_inputs.empty())
+    return true;
+
+  // Build a fast NAME → SEMANTIC override map from the descriptor's
+  // VERTEX_INPUTS so we honour explicit user intent. Anything not in the
+  // map falls through to name-as-semantic-key behaviour.
+  ossia::small_flat_map<std::string_view, std::string_view, 16> overrides;
+  for(const auto& vi : desc.vertex_inputs)
+    if(!vi.semantic.empty())
+      overrides[vi.name] = vi.semantic;
+
+  QVarLengthArray<QRhiVertexInputAttribute> remappedAttrs;
+  for(const auto& shader_var : shader_inputs)
+  {
+    const std::string_view var_name(shader_var.name.constData(), shader_var.name.size());
+    std::string_view sem_key = var_name;
+    if(auto it = overrides.find(var_name); it != overrides.end())
+      sem_key = it->second;
+
+    const auto* match = findGeometryAttribute(geom, var_name, sem_key);
+    if(!match)
+      return false;
+
+    remappedAttrs.append(QRhiVertexInputAttribute(
+        match->binding, shader_var.location,
+        static_cast<QRhiVertexInputAttribute::Format>(match->format),
+        match->byte_offset));
+  }
+
+  QRhiVertexInputLayout inputLayout;
+  const auto& prevLayout = pip.vertexInputLayout();
+  inputLayout.setBindings(prevLayout.cbeginBindings(), prevLayout.cendBindings());
+  inputLayout.setAttributes(remappedAttrs.begin(), remappedAttrs.end());
+  pip.setVertexInputLayout(inputLayout);
+  return true;
+}
+
+namespace
+{
+
+// Convert the parser's attribute_type enumerator to the lowercase GLSL
+// type name the VertexFallbackDefaults resolver expects. Only the
+// fallback-eligible scalar / vec2 / vec3 / vec4 entries map to a
+// non-empty string; everything else (mat*, integer / sampler / image
+// types) returns empty, which the caller treats as "REQUIRED:false on
+// unsupported type" and fails pipeline-build.
+std::string_view declTypeFromAttributeType(isf::attribute_type t) noexcept
+{
+  switch(t)
+  {
+    case isf::attribute_type::Float: return "float";
+    case isf::attribute_type::Vec2:  return "vec2";
+    case isf::attribute_type::Vec3:  return "vec3";
+    case isf::attribute_type::Vec4:  return "vec4";
+    default: return {};
+  }
+}
+
+} // namespace
+
+bool remapPipelineVertexInputs(
+    QRhiGraphicsPipeline& pip, const QShader& vertexShader,
+    const ossia::geometry& geom, const isf::descriptor& desc,
+    QRhi& rhi, VertexFallbackPool& pool, QRhiResourceUpdateBatch& batch,
+    FallbackBindingPlan& outPlan)
+{
+  outPlan.clear();
+
+  const auto& shader_inputs = vertexShader.description().inputVariables();
+  if(shader_inputs.empty())
+    return true;
+
+  // Build a fast NAME → descriptor-entry map so every shader input can
+  // cheaply look up its REQUIRED / DEFAULT / SEMANTIC metadata. Shader
+  // reflection order is driver-dependent; we don't rely on it matching
+  // descriptor declaration order.
+  ossia::small_flat_map<std::string_view, const isf::vertex_input*, 16> descByName;
+  for(const auto& vi : desc.vertex_inputs)
+    descByName[vi.name] = &vi;
+
+  // Start from whatever bindings the pipeline already has (the mesh's
+  // per-vertex + per-instance buffers). Fallback slots get appended at
+  // the end; their binding_index in the extended vector is the index
+  // the draw-path then binds the fallback buffer at.
+  QVarLengthArray<QRhiVertexInputBinding> bindings;
+  {
+    const auto& prev = pip.vertexInputLayout();
+    for(auto it = prev.cbeginBindings(); it != prev.cendBindings(); ++it)
+      bindings.append(*it);
+  }
+
+  QVarLengthArray<QRhiVertexInputAttribute> remappedAttrs;
+  for(const auto& shader_var : shader_inputs)
+  {
+    const std::string_view var_name(
+        shader_var.name.constData(), shader_var.name.size());
+
+    // Resolve the semantic key the same way the 3-arg overload does —
+    // SEMANTIC field wins when set, else NAME is used.
+    std::string_view sem_key = var_name;
+    auto descIt = descByName.find(var_name);
+    const isf::vertex_input* descEntry
+        = (descIt != descByName.end()) ? descIt->second : nullptr;
+    if(descEntry && !descEntry->semantic.empty())
+      sem_key = descEntry->semantic;
+
+    if(const auto* match = findGeometryAttribute(geom, var_name, sem_key))
+    {
+      remappedAttrs.append(QRhiVertexInputAttribute(
+          match->binding, shader_var.location,
+          static_cast<QRhiVertexInputAttribute::Format>(match->format),
+          match->byte_offset));
+      continue;
+    }
+
+    // Miss. Strict mode (no descriptor entry or REQUIRED=true) fails.
+    if(!descEntry || descEntry->required)
+    {
+      qDebug() << "remapPipelineVertexInputs: required VERTEX_INPUT '"
+               << QString::fromUtf8(var_name.data(), (int)var_name.size())
+               << "' has no matching attribute on upstream geometry";
+      return false;
+    }
+
+    // Optional path — synthesise a fallback buffer. Two failure modes
+    // still reject the pipeline build:
+    //   - declared GLSL TYPE is unsupported (mat4 / integer / sampler)
+    //   - the semantic has no whitelist neutral AND the shader did not
+    //     supply DEFAULT in its JSON header
+    const std::string_view decl_type = declTypeFromAttributeType(descEntry->type);
+    if(decl_type.empty())
+    {
+      qDebug() << "remapPipelineVertexInputs: optional VERTEX_INPUT '"
+               << QString::fromUtf8(var_name.data(), (int)var_name.size())
+               << "' uses a type (mat4 / integer / sampler) that is not"
+                  " supported by the v1 fallback path; bind a real"
+                  " attribute or declare it REQUIRED: true";
+      return false;
+    }
+
+    const auto sem = ossia::name_to_semantic(sem_key);
+    auto spec = resolveVertexFallback(sem, decl_type, descEntry->default_val);
+    if(!spec)
+    {
+      qDebug() << "remapPipelineVertexInputs: optional VERTEX_INPUT '"
+               << QString::fromUtf8(var_name.data(), (int)var_name.size())
+               << "' (semantic '"
+               << QString::fromUtf8(sem_key.data(), (int)sem_key.size())
+               << "') has no whitelist default and no explicit DEFAULT"
+                  " was provided in the JSON header";
+      return false;
+    }
+
+    const auto fallbackEntry = pool.acquire(rhi, batch, *spec);
+    if(!fallbackEntry.buffer)
+    {
+      qDebug() << "remapPipelineVertexInputs: failed to allocate fallback"
+                  " buffer for VERTEX_INPUT '"
+               << QString::fromUtf8(var_name.data(), (int)var_name.size())
+               << "'";
+      return false;
+    }
+
+    // Append a PerInstance step_rate=1 binding to the layout, pointing
+    // at a fresh binding index. Semantically: "one instance's worth of
+    // this attribute is packed into a single-element buffer, broadcast
+    // to every vertex and every instance of the draw".
+    const int new_binding_index = bindings.size();
+    bindings.append(QRhiVertexInputBinding(
+        fallbackEntry.stride,
+        QRhiVertexInputBinding::PerInstance,
+        /*stepRate=*/1));
+
+    remappedAttrs.append(QRhiVertexInputAttribute(
+        new_binding_index, shader_var.location,
+        static_cast<QRhiVertexInputAttribute::Format>(fallbackEntry.format),
+        /*offset=*/0));
+
+    outPlan.slots.push_back(
+        FallbackBindingPlan::Slot{
+            .binding_index = new_binding_index,
+            .buffer = fallbackEntry.buffer});
+  }
+
+  QRhiVertexInputLayout inputLayout;
+  inputLayout.setBindings(bindings.begin(), bindings.end());
+  inputLayout.setAttributes(remappedAttrs.begin(), remappedAttrs.end());
+  pip.setVertexInputLayout(inputLayout);
+  return true;
+}
+
 Pipeline buildPipeline(
     const RenderList& renderer, const Mesh& mesh, const QShader& vertexS,
     const QShader& fragmentS, const TextureRenderTarget& rt,
@@ -603,23 +867,40 @@ QRhiShaderResourceBindings* createDefaultBindings(
     bindings.push_back(materialBinding);
   }
 
-  // Bind samplers
+  // Bind samplers. Null texture sources → substitute with the view-type-matched
+  // empty texture carried by `Sampler::fallback` (2D / Array / Cube / 3D).
+  // This keeps the SRB valid so the pipeline does not crash during
+  // vkUpdateDescriptorSets when an optional shader input has no upstream
+  // producer — the pass will simply sample the default fallback and render a
+  // neutral value (opaque black / transparent) for that slot. Required inputs
+  // that truly need content are the shader author's responsibility; the
+  // invariant here is "missing ⇒ render something safe, never crash".
+  //
+  // If `sampler.fallback` is null, the slot intent is assumed sampler2D
+  // (the 99 % case) and we use `RenderList::emptyTexture()`. Call sites
+  // that create Samplers for sampler3D / samplerCube / sampler2DArray
+  // slots MUST populate `fallback` with the typed empty texture — otherwise
+  // Vulkan will still reject the binding with a view-type mismatch when
+  // the 2D fallback kicks in.
   int binding = 3;
   for(auto sampler : samplers)
   {
-    assert(sampler.texture);
     auto actual_texture = sampler.texture;
 
-    // For cases where we do multi-pass rendering, set "this pass"'s input texture
-    // to an empty texture instead as we can't output to an input texture
-    if(actual_texture == rt.texture)
-      actual_texture = &renderer.emptyTexture();
+    // Multi-pass feedback short: can't sample the RT we're writing to.
+    if(actual_texture && actual_texture == rt.texture)
+      actual_texture = nullptr;
+
+    if(!actual_texture)
+      actual_texture = sampler.fallback ? sampler.fallback
+                                        : &renderer.emptyTexture();
 
-    bindings.push_back(QRhiShaderResourceBinding::sampledTexture(
-        binding,
-        QRhiShaderResourceBinding::VertexStage
-            | QRhiShaderResourceBinding::FragmentStage,
-        actual_texture, sampler.sampler));
+    bindings.push_back(
+        QRhiShaderResourceBinding::sampledTexture(
+            binding,
+            QRhiShaderResourceBinding::VertexStage
+                | QRhiShaderResourceBinding::FragmentStage,
+            actual_texture, sampler.sampler));
     binding++;
   }
 
@@ -644,27 +925,136 @@ Pipeline buildPipeline(
   return buildPipeline(renderer, mesh, vertexS, fragmentS, rt, bindings);
 }
 
+Pipeline buildPipelineWithState(
+    const RenderList& renderer, const Mesh& mesh, const QShader& vertexS,
+    const QShader& fragmentS, const TextureRenderTarget& rt, QRhiBuffer* processUBO,
+    QRhiBuffer* materialUBO, std::span<const Sampler> samplers,
+    std::span<QRhiShaderResourceBinding> extraBindings,
+    const isf::pipeline_state& state,
+    int multiViewCount,
+    bool useShadingRate)
+{
+  auto& rhi = *renderer.state.rhi;
+  auto srb = createDefaultBindings(
+      renderer, rt, processUBO, materialUBO, samplers, extraBindings);
+
+  auto ps = rhi.newGraphicsPipeline();
+  ps->setName("buildPipelineWithState::ps");
+  SCORE_ASSERT(ps);
+
+  // Plan 09 S6: VRS opt-in. Only applies when the backend supports
+  // variable-rate shading (cap set in ScreenNode::populateCaps). The
+  // actual shading-rate map or per-draw rate is set on the render
+  // target / command buffer; the pipeline just needs the flag.
+#if QT_VERSION >= QT_VERSION_CHECK(6, 9, 0)
+  if(useShadingRate && renderer.state.caps.variableRateShading)
+  {
+    ps->setFlags(ps->flags() | QRhiGraphicsPipeline::UsesShadingRate);
+  }
+#endif
+
+  const bool depthAvailable
+      = (rt.depthTexture != nullptr) || (rt.depthRenderBuffer != nullptr)
+        || (rt.msDepthTexture != nullptr);
+  const bool wantsDepthByDefault = renderer.anyNodeRequiresDepth();
+
+  // Sample count handling (same as buildPipeline()).
+  const int rtSamplesQueried = rt.sampleCount();
+  const int pipelineSamples
+      = (rtSamplesQueried > 0) ? rtSamplesQueried : renderer.samples();
+  ps->setSampleCount(pipelineSamples);
+
+  mesh.preparePipeline(*ps);
+
+  // Seed legacy premul-alpha blend on every color attachment so that shaders
+  // which declare a partial PIPELINE_STATE (e.g. only DEPTH_TEST) don't
+  // silently lose the historical default blend mode. applyPipelineState
+  // overrides per-attachment blends only when the shader sets BLEND.
+  {
+    QRhiGraphicsPipeline::TargetBlend premulAlphaBlend;
+    premulAlphaBlend.enable = true;
+    premulAlphaBlend.srcColor = QRhiGraphicsPipeline::BlendFactor::SrcAlpha;
+    premulAlphaBlend.dstColor = QRhiGraphicsPipeline::BlendFactor::OneMinusSrcAlpha;
+    premulAlphaBlend.srcAlpha = QRhiGraphicsPipeline::BlendFactor::SrcAlpha;
+    premulAlphaBlend.dstAlpha = QRhiGraphicsPipeline::BlendFactor::OneMinusSrcAlpha;
+    const int n = std::max(1, rt.colorAttachmentCount());
+    QVarLengthArray<QRhiGraphicsPipeline::TargetBlend, 4> blends;
+    blends.reserve(n);
+    for(int i = 0; i < n; ++i)
+      blends.push_back(premulAlphaBlend);
+    ps->setTargetBlends(blends.begin(), blends.end());
+  }
+
+  // Apply pipeline_state: depth, cull, front-face, blend (per-attachment),
+  // stencil, polygon mode, line width. Only fields explicitly set in `state`
+  // override the seeded defaults above + mesh.preparePipeline()'s setup.
+  applyPipelineState(
+      *ps, state, rt.colorAttachmentCount(), depthAvailable, wantsDepthByDefault);
+
+  // Semantic vertex input remapping (same as buildPipeline()).
+  if(auto* geom = mesh.semanticGeometry())
+  {
+    if(!remapPipelineVertexInputs(*ps, vertexS, *geom))
+    {
+      qDebug() << "Warning! Shader requires attributes not present in mesh";
+      delete ps;
+      return {nullptr, srb};
+    }
+  }
+
+  ps->setShaderStages(
+      {{QRhiShaderStage::Vertex, vertexS}, {QRhiShaderStage::Fragment, fragmentS}});
+  ps->setShaderResourceBindings(srb);
+
+  SCORE_ASSERT(rt.renderPass);
+  ps->setRenderPassDescriptor(rt.renderPass);
+
+  // Multiview: on Vulkan/GL the multiViewCount is picked up from the render
+  // pass descriptor's color attachment (see createMultiViewRenderTarget), but
+  // D3D12 ViewInstancing and Metal vertex amplification read it from the
+  // pipeline itself via QRhiGraphicsPipeline::multiViewCount(). So we must set
+  // it explicitly here for those backends to produce correct multiview output.
+#if QT_VERSION >= QT_VERSION_CHECK(6, 7, 0)
+  if(multiViewCount > 1 && renderer.state.caps.multiview)
+    ps->setMultiViewCount(multiViewCount);
+#else
+  (void)multiViewCount;
+#endif
+
+  if(!ps->create())
+  {
+    qDebug() << "Warning! Pipeline not created";
+    delete ps;
+    ps = nullptr;
+  }
+  return {ps, srb};
+}
+
 std::pair<QShader, QShader> makeShaders(const RenderState& v, QString vert, QString frag)
 {
   auto [vertexS, vertexError] = ShaderCache::get(v, vert.toUtf8(), QShader::VertexStage);
   if(!vertexError.isEmpty())
   {
-    qDebug() << vertexError;
-    qDebug() << vert.toStdString().data();
+    qWarning() << "Vertex shader bake failed:" << vertexError;
+    qWarning().noquote() << vert;
   }
 
   auto [fragmentS, fragmentError]
       = ShaderCache::get(v, frag.toUtf8(), QShader::FragmentStage);
   if(!fragmentError.isEmpty())
   {
-    qDebug() << fragmentError;
-    qDebug() << frag.toStdString().data();
+    qWarning() << "Fragment shader bake failed:" << fragmentError;
+    qWarning().noquote() << frag;
   }
 
-  // qDebug().noquote() << vert.toUtf8().constData();
-  if(!vertexS.isValid())
+  // QShaderBaker is configured with setPerTargetCompilation(true), so a
+  // failure on the only requested target leaves errorMessage() non-empty
+  // even when the QShader itself is "valid" via some intermediate variant.
+  // Treat any non-empty error as fatal so backend-specific bake failures
+  // (e.g. SPIRV-Cross HLSL refusing gl_NumWorkGroups) are not silent.
+  if(!vertexError.isEmpty() || !vertexS.isValid())
     throw std::runtime_error("invalid vertex shader");
-  if(!fragmentS.isValid())
+  if(!fragmentError.isEmpty() || !fragmentS.isValid())
     throw std::runtime_error("invalid fragment shader");
 
   return {vertexS, fragmentS};
@@ -676,9 +1066,12 @@ QShader makeCompute(const RenderState& v, QString compute)
   auto [computeS, computeError]
       = ShaderCache::get(v, compute.toUtf8(), QShader::ComputeStage);
   if(!computeError.isEmpty())
-    qDebug() << computeError;
+  {
+    qWarning() << "Compute shader bake failed:" << computeError;
+    qWarning().noquote() << compute;
+  }
 
-  if(!computeS.isValid())
+  if(!computeError.isEmpty() || !computeS.isValid())
     throw std::runtime_error("invalid compute shader");
   return computeS;
 }
@@ -901,11 +1294,33 @@ computeScaleForTexcoordSizing(ScaleMode mode, QSizeF renderSize, QSizeF textureS
 }
 
 std::vector<Sampler> initInputSamplers(
-    const score::gfx::Node& node, RenderList& renderer, const std::vector<Port*>& ports)
+    const score::gfx::Node& node, RenderList& renderer, const std::vector<Port*>& ports,
+    const isf::descriptor* desc)
 {
   std::vector<Sampler> samplers;
   QRhi& rhi = *renderer.state.rhi;
 
+  // Per-port sampler-config lookup. The descriptor's `inputs` list is in
+  // 1:1 order with the Port array constructed by ISFNode's visitor, so
+  // we can walk it in lockstep and capture each image_input's
+  // sampler_config. Used by the GrabsFromSource branch below to honor
+  // shader-declared WRAP/FILTER on array / 3D textures (without this,
+  // those hardcoded to ClampToEdge — which broke any glTF whose UVs
+  // went outside [0,1]).
+  std::vector<const isf::sampler_config*> port_sampler_cfg(ports.size(), nullptr);
+  if(desc)
+  {
+    const std::size_t N = std::min(ports.size(), desc->inputs.size());
+    for(std::size_t i = 0; i < N; ++i)
+    {
+      const auto& inp = desc->inputs[i];
+      if(auto* im = ossia::get_if<isf::image_input>(&inp.data))
+        port_sampler_cfg[i] = &im->sampler;
+      else if(auto* cm = ossia::get_if<isf::cubemap_input>(&inp.data))
+        port_sampler_cfg[i] = &cm->sampler;
+    }
+  }
+
   int cur_port = 0;
   for(Port* in : ports)
   {
@@ -935,22 +1350,65 @@ std::vector<Sampler> initInputSamplers(
             }
           }
 
+          // Pick a view-type-compatible placeholder when the upstream hasn't
+          // produced a texture yet. Binding a 2D view to a sampler3D /
+          // samplerCube / sampler2DArray shader input triggers
+          // VUID-vkCmdDraw-viewType-07752 at every draw until a real texture
+          // flows in (and forever if no edge ever connects).
+          QRhiTexture* fallback = nullptr;
+          if((in->flags & Flag::Cubemap) == Flag::Cubemap)
+            fallback = &renderer.emptyTextureCube();
+          else if((in->flags & Flag::ThreeDimensional) == Flag::ThreeDimensional)
+            fallback = &renderer.emptyTexture3D();
+          else if((in->flags & Flag::TextureArray) == Flag::TextureArray)
+            fallback = &renderer.emptyTextureArray();
+          else
+            fallback = &renderer.emptyTexture();
           if(!srcTex)
-            srcTex = &renderer.emptyTexture();
-
-          auto sampler = rhi.newSampler(
-              QRhiSampler::Linear, QRhiSampler::Linear, QRhiSampler::Linear,
-              QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge);
-          sampler->setName("initInputSamplers::cubemap_sampler");
-          SCORE_ASSERT(sampler->create());
+            srcTex = fallback;
+
+          // Honour the shader-declared sampler config when present
+          // (WRAP / FILTER / MIPMAP_MODE / COMPARE / …). Falls back to
+          // the historical Linear+ClampToEdge sampler when the
+          // descriptor wasn't passed or the input had no sampler block.
+          QRhiSampler* sampler = nullptr;
+          if(cur_port < (int)port_sampler_cfg.size() && port_sampler_cfg[cur_port])
+          {
+            sampler = score::gfx::makeSampler(rhi, *port_sampler_cfg[cur_port]);
+          }
+          else
+          {
+            sampler = rhi.newSampler(
+                QRhiSampler::Linear, QRhiSampler::Linear, QRhiSampler::Linear,
+                QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge);
+            SCORE_ASSERT(sampler->create());
+          }
+          sampler->setName("initInputSamplers::grabs_sampler");
 
-          samplers.push_back({sampler, srcTex});
+          samplers.push_back({sampler, srcTex, fallback});
         }
         else
         {
           // Look up the pre-created render target from the RenderList
           auto rt = renderer.renderTargetForInputPort(*in);
-          auto* texture = rt.texture ? rt.texture : &renderer.emptyTexture();
+          // View-type-matched fallback when the render target has no
+          // texture yet (no upstream producer wired). Same reasoning as
+          // the GrabsFromSource branch above: binding a sampler2D view
+          // into a sampler2DArray / samplerCube / sampler3D shader slot
+          // triggers Vulkan validation errors (VUID-…-viewType-07752)
+          // every frame and in some drivers crashes outright. Pick the
+          // empty texture whose view kind matches the shader's
+          // declared sampler type.
+          QRhiTexture* fallback = nullptr;
+          if((in->flags & Flag::Cubemap) == Flag::Cubemap)
+            fallback = &renderer.emptyTextureCube();
+          else if((in->flags & Flag::ThreeDimensional) == Flag::ThreeDimensional)
+            fallback = &renderer.emptyTexture3D();
+          else if((in->flags & Flag::TextureArray) == Flag::TextureArray)
+            fallback = &renderer.emptyTextureArray();
+          else
+            fallback = &renderer.emptyTexture();
+          QRhiTexture* texture = rt.texture ? rt.texture : fallback;
 
           auto spec = node.resolveRenderTargetSpecs(cur_port, renderer);
           auto sampler = rhi.newSampler(
@@ -959,7 +1417,7 @@ std::vector<Sampler> initInputSamplers(
           sampler->setName("initInputSamplers::sampler");
           SCORE_ASSERT(sampler->create());
 
-          samplers.push_back({sampler, texture});
+          samplers.push_back({sampler, texture, fallback});
 
           // If this port has sampleable depth, add depth sampler
           if((in->flags & Flag::SamplableDepth) == Flag::SamplableDepth)
@@ -971,7 +1429,7 @@ std::vector<Sampler> initInputSamplers(
             SCORE_ASSERT(depthSampler->create());
 
             auto* depthTex = rt.depthTexture ? rt.depthTexture : &renderer.emptyTexture();
-            samplers.push_back({depthSampler, depthTex});
+            samplers.push_back({depthSampler, depthTex, &renderer.emptyTexture()});
           }
         }
         break;
@@ -984,4 +1442,502 @@ std::vector<Sampler> initInputSamplers(
   }
   return samplers;
 }
+
+// ---------------------------------------------------------------------------
+// New render-target overloads (depth-only, layered, multiview)
+// ---------------------------------------------------------------------------
+
+TextureRenderTarget createDepthOnlyRenderTarget(
+    const RenderState& state, QSize sz, int samples, bool samplableDepth,
+    QRhiTexture::Format depthFmt)
+{
+  TextureRenderTarget ret;
+  ret.texture = nullptr;
+  ret.arrayLayers = 1;
+
+  // Depth resolve for MSAA sampleable depth — matches the main overload.
+  int effectiveSamples = samples;
+  bool useDepthResolve = false;
+  if(samplableDepth && samples > 1)
+  {
+#if QT_VERSION >= QT_VERSION_CHECK(6, 8, 0)
+    useDepthResolve = state.rhi->isFeatureSupported(QRhi::ResolveDepthStencil);
+#endif
+    if(!useDepthResolve)
+    {
+      qWarning() << "createDepthOnlyRenderTarget: samplable depth + samples="
+                 << samples
+                 << "unsupported on this backend; degrading to samples=1.";
+      effectiveSamples = 1;
+    }
+  }
+
+  // Allocate the sampleable depth texture (what downstream shaders sample).
+  if(samplableDepth)
+  {
+    ret.depthTexture = state.rhi->newTexture(
+        depthFmt, sz, 1, QRhiTexture::RenderTarget);
+    ret.depthTexture->setName("createDepthOnlyRenderTarget::depthTexture");
+    SCORE_ASSERT(ret.depthTexture->create());
+
+    if(useDepthResolve)
+    {
+#if QT_VERSION >= QT_VERSION_CHECK(6, 8, 0)
+      ret.msDepthTexture = state.rhi->newTexture(
+          depthFmt, sz, effectiveSamples, QRhiTexture::RenderTarget);
+      ret.msDepthTexture->setName("createDepthOnlyRenderTarget::msDepthTexture");
+      SCORE_ASSERT(ret.msDepthTexture->create());
+#endif
+    }
+  }
+  else
+  {
+    ret.depthRenderBuffer = state.rhi->newRenderBuffer(
+        QRhiRenderBuffer::DepthStencil, sz, effectiveSamples);
+    ret.depthRenderBuffer->setName("createDepthOnlyRenderTarget::depthRB");
+    SCORE_ASSERT(ret.depthRenderBuffer->create());
+  }
+
+  // Some backends (notably GL ES) REQUIRE a color attachment — allocate a
+  // 1×1 dummy color texture that never gets written to. The depth-only RT
+  // stores it in dummyColorTexture (owned, released with the RT).
+  //
+  // On desktop Vulkan/Metal/D3D a depth-only RT is usually accepted without
+  // a color attachment. We always allocate the dummy for portability —
+  // the memory cost (4 bytes) is negligible.
+  ret.dummyColorTexture = state.rhi->newTexture(
+      QRhiTexture::RGBA8, QSize(1, 1), effectiveSamples, QRhiTexture::RenderTarget);
+  ret.dummyColorTexture->setName("createDepthOnlyRenderTarget::dummyColor");
+  SCORE_ASSERT(ret.dummyColorTexture->create());
+
+  QRhiTextureRenderTargetDescription desc;
+  {
+    QRhiColorAttachment color0(ret.dummyColorTexture);
+    desc.setColorAttachments({color0});
+  }
+
+  if(samplableDepth)
+  {
+    if(useDepthResolve)
+    {
+#if QT_VERSION >= QT_VERSION_CHECK(6, 8, 0)
+      desc.setDepthTexture(ret.msDepthTexture);
+      desc.setDepthResolveTexture(ret.depthTexture);
+#else
+      desc.setDepthTexture(ret.depthTexture);
+#endif
+    }
+    else
+    {
+      desc.setDepthTexture(ret.depthTexture);
+    }
+  }
+  else
+  {
+    desc.setDepthStencilBuffer(ret.depthRenderBuffer);
+  }
+
+  auto* renderTarget = state.rhi->newTextureRenderTarget(desc);
+  renderTarget->setName("createDepthOnlyRenderTarget::rt");
+  SCORE_ASSERT(renderTarget);
+
+  auto* renderPass = renderTarget->newCompatibleRenderPassDescriptor();
+  renderPass->setName("createDepthOnlyRenderTarget::rp");
+  SCORE_ASSERT(renderPass);
+
+  renderTarget->setRenderPassDescriptor(renderPass);
+  SCORE_ASSERT(renderTarget->create());
+
+  ret.renderTarget = renderTarget;
+  ret.renderPass = renderPass;
+  return ret;
+}
+
+TextureRenderTarget createLayeredRenderTarget(
+    const RenderState& state, QRhiTexture* colorTextureArray, int renderLayer,
+    QRhiTexture* depthTex, int samples)
+{
+  TextureRenderTarget ret;
+  SCORE_ASSERT(colorTextureArray);
+  SCORE_ASSERT(renderLayer >= 0);
+
+  ret.texture = colorTextureArray;
+  ret.arrayLayers = std::max(colorTextureArray->arraySize(), 1);
+  ret.renderLayer = renderLayer;
+
+  QRhiTextureRenderTargetDescription desc;
+  {
+    QRhiColorAttachment color0(colorTextureArray);
+    color0.setLayer(renderLayer);
+    desc.setColorAttachments({color0});
+  }
+
+  if(depthTex)
+  {
+    ret.depthTexture = depthTex;
+    // For layered rendering with a depth *array* texture, we'd need to set
+    // the layer too. We expect a single shared 2D depth texture in most
+    // cases, which is fine.
+    desc.setDepthTexture(depthTex);
+  }
+
+  auto* renderTarget = state.rhi->newTextureRenderTarget(desc);
+  renderTarget->setName("createLayeredRenderTarget::rt");
+  SCORE_ASSERT(renderTarget);
+
+  auto* renderPass = renderTarget->newCompatibleRenderPassDescriptor();
+  renderPass->setName("createLayeredRenderTarget::rp");
+  SCORE_ASSERT(renderPass);
+
+  renderTarget->setRenderPassDescriptor(renderPass);
+  SCORE_ASSERT(renderTarget->create());
+
+  ret.renderTarget = renderTarget;
+  ret.renderPass = renderPass;
+  (void)samples;
+  return ret;
+}
+
+TextureRenderTarget createMultiViewRenderTarget(
+    const RenderState& state, QRhiTexture* colorTextureArray, int multiViewCount,
+    QRhiTexture* depthTextureArray, int samples)
+{
+  TextureRenderTarget ret;
+  SCORE_ASSERT(colorTextureArray);
+  SCORE_ASSERT(multiViewCount >= 2);
+
+  ret.texture = colorTextureArray;
+  ret.arrayLayers = std::max(colorTextureArray->arraySize(), multiViewCount);
+  ret.multiViewCount = multiViewCount;
+
+  QRhiTextureRenderTargetDescription desc;
+  {
+    QRhiColorAttachment color0(colorTextureArray);
+    // Render to layers [0..multiViewCount-1] via gl_ViewIndex.
+    color0.setLayer(0);
+#if QT_VERSION >= QT_VERSION_CHECK(6, 7, 0)
+    color0.setMultiViewCount(multiViewCount);
+#endif
+    desc.setColorAttachments({color0});
+  }
+
+  if(depthTextureArray)
+  {
+    ret.depthTexture = depthTextureArray;
+    desc.setDepthTexture(depthTextureArray);
+  }
+
+  auto* renderTarget = state.rhi->newTextureRenderTarget(desc);
+  renderTarget->setName("createMultiViewRenderTarget::rt");
+  SCORE_ASSERT(renderTarget);
+
+  auto* renderPass = renderTarget->newCompatibleRenderPassDescriptor();
+  renderPass->setName("createMultiViewRenderTarget::rp");
+  SCORE_ASSERT(renderPass);
+
+  renderTarget->setRenderPassDescriptor(renderPass);
+  SCORE_ASSERT(renderTarget->create());
+
+  ret.renderTarget = renderTarget;
+  ret.renderPass = renderPass;
+  (void)samples;
+  return ret;
+}
+
+TextureRenderTarget createDepthOnlyRenderTarget(
+    const RenderState& state, QRhiTexture* externalDepthTexture, int samples,
+    bool samplableDepth)
+{
+  // Like createDepthOnlyRenderTarget(sz, ...) but builds the RT AROUND a
+  // caller-supplied depth texture instead of allocating (and the old buggy
+  // call site then immediately deleting) an internal one. The supplied
+  // texture may be a plain 2D depth texture or a TextureArray (layered /
+  // shadow-cascade depth) — in both cases QRhi attaches layer 0 by default
+  // for a depth-only pass, which is what we want here.
+  //
+  // Ownership: `externalDepthTexture` becomes `ret.depthTexture` and is
+  // released with the RT (TextureRenderTarget::release()), matching the
+  // ownership the previous (broken) code implied.
+  TextureRenderTarget ret;
+  SCORE_ASSERT(externalDepthTexture);
+  ret.texture = nullptr;
+  ret.arrayLayers = std::max(externalDepthTexture->arraySize(), 1);
+
+  // Depth resolve for MSAA sampleable depth — matches the sz overload.
+  int effectiveSamples = samples;
+  bool useDepthResolve = false;
+  if(samplableDepth && samples > 1)
+  {
+#if QT_VERSION >= QT_VERSION_CHECK(6, 8, 0)
+    useDepthResolve = state.rhi->isFeatureSupported(QRhi::ResolveDepthStencil);
+#endif
+    if(!useDepthResolve)
+    {
+      qWarning() << "createDepthOnlyRenderTarget(external): samplable depth + samples="
+                 << samples
+                 << "unsupported on this backend; degrading to samples=1.";
+      effectiveSamples = 1;
+    }
+  }
+
+  ret.depthTexture = externalDepthTexture;
+
+  if(useDepthResolve)
+  {
+#if QT_VERSION >= QT_VERSION_CHECK(6, 8, 0)
+    ret.msDepthTexture = state.rhi->newTexture(
+        externalDepthTexture->format(), externalDepthTexture->pixelSize(),
+        effectiveSamples, QRhiTexture::RenderTarget);
+    ret.msDepthTexture->setName(
+        "createDepthOnlyRenderTarget(external)::msDepthTexture");
+    SCORE_ASSERT(ret.msDepthTexture->create());
+#endif
+  }
+
+  // Some backends (notably GL ES) REQUIRE a color attachment — same dummy
+  // 1×1 color texture as the sz overload.
+  ret.dummyColorTexture = state.rhi->newTexture(
+      QRhiTexture::RGBA8, QSize(1, 1), effectiveSamples, QRhiTexture::RenderTarget);
+  ret.dummyColorTexture->setName(
+      "createDepthOnlyRenderTarget(external)::dummyColor");
+  SCORE_ASSERT(ret.dummyColorTexture->create());
+
+  QRhiTextureRenderTargetDescription desc;
+  {
+    QRhiColorAttachment color0(ret.dummyColorTexture);
+    desc.setColorAttachments({color0});
+  }
+
+  if(useDepthResolve)
+  {
+#if QT_VERSION >= QT_VERSION_CHECK(6, 8, 0)
+    desc.setDepthTexture(ret.msDepthTexture);
+    desc.setDepthResolveTexture(ret.depthTexture);
+#else
+    desc.setDepthTexture(ret.depthTexture);
+#endif
+  }
+  else
+  {
+    desc.setDepthTexture(ret.depthTexture);
+  }
+
+  auto* renderTarget = state.rhi->newTextureRenderTarget(desc);
+  renderTarget->setName("createDepthOnlyRenderTarget(external)::rt");
+  SCORE_ASSERT(renderTarget);
+
+  auto* renderPass = renderTarget->newCompatibleRenderPassDescriptor();
+  renderPass->setName("createDepthOnlyRenderTarget(external)::rp");
+  SCORE_ASSERT(renderPass);
+
+  renderTarget->setRenderPassDescriptor(renderPass);
+  SCORE_ASSERT(renderTarget->create());
+
+  ret.renderTarget = renderTarget;
+  ret.renderPass = renderPass;
+  return ret;
+}
+
+TextureRenderTarget createLayeredRenderTarget(
+    const RenderState& state, std::span<QRhiTexture* const> colorTextures,
+    int renderLayer, QRhiTexture* depthTex, int samples)
+{
+  // Multi-attachment (MRT) layered variant: attaches ALL color textures to
+  // the render pass so the pipeline blend-state count (driven by
+  // rt.colorAttachmentCount()) agrees with the actual attachment count.
+  // Attaching only color[0] while the pipeline declares N blend targets is a
+  // Vulkan pipeline-create validation error AND silently drops outputs 1..N.
+  TextureRenderTarget ret;
+  SCORE_ASSERT(!colorTextures.empty());
+  SCORE_ASSERT(colorTextures[0]);
+  SCORE_ASSERT(renderLayer >= 0);
+
+  ret.texture = colorTextures[0];
+  for(std::size_t i = 1; i < colorTextures.size(); i++)
+    ret.additionalColorTextures.push_back(colorTextures[i]);
+  ret.arrayLayers = std::max(colorTextures[0]->arraySize(), 1);
+  ret.renderLayer = renderLayer;
+
+  QList<QRhiColorAttachment> attachments;
+  for(auto* tex : colorTextures)
+  {
+    QRhiColorAttachment att(tex);
+    // Layered textures select the rendered layer; plain 2D color textures in
+    // a mixed MRT keep their (single) layer 0 and ignore this.
+    if(tex->arraySize() > 1)
+      att.setLayer(renderLayer);
+    attachments.append(att);
+  }
+
+  QRhiTextureRenderTargetDescription desc;
+  desc.setColorAttachments(attachments.begin(), attachments.end());
+
+  if(depthTex)
+  {
+    ret.depthTexture = depthTex;
+    desc.setDepthTexture(depthTex);
+  }
+
+  auto* renderTarget = state.rhi->newTextureRenderTarget(desc);
+  renderTarget->setName("createLayeredRenderTarget(MRT)::rt");
+  SCORE_ASSERT(renderTarget);
+
+  auto* renderPass = renderTarget->newCompatibleRenderPassDescriptor();
+  renderPass->setName("createLayeredRenderTarget(MRT)::rp");
+  SCORE_ASSERT(renderPass);
+
+  renderTarget->setRenderPassDescriptor(renderPass);
+  SCORE_ASSERT(renderTarget->create());
+
+  ret.renderTarget = renderTarget;
+  ret.renderPass = renderPass;
+  (void)samples;
+  return ret;
+}
+
+TextureRenderTarget createMultiViewRenderTarget(
+    const RenderState& state, std::span<QRhiTexture* const> colorTextures,
+    int multiViewCount, QRhiTexture* depthTextureArray, int samples)
+{
+  // Multi-attachment (MRT) multiview variant: attaches ALL color textures
+  // (each a TextureArray with >= multiViewCount layers) with per-attachment
+  // setMultiViewCount, so attachments == pipeline blend targets. See the
+  // layered overload above for why attaching only color[0] is a bug.
+  TextureRenderTarget ret;
+  SCORE_ASSERT(!colorTextures.empty());
+  SCORE_ASSERT(colorTextures[0]);
+  SCORE_ASSERT(multiViewCount >= 2);
+
+  ret.texture = colorTextures[0];
+  for(std::size_t i = 1; i < colorTextures.size(); i++)
+    ret.additionalColorTextures.push_back(colorTextures[i]);
+  ret.arrayLayers = std::max(colorTextures[0]->arraySize(), multiViewCount);
+  ret.multiViewCount = multiViewCount;
+
+  QList<QRhiColorAttachment> attachments;
+  for(auto* tex : colorTextures)
+  {
+    QRhiColorAttachment att(tex);
+    // Render to layers [0..multiViewCount-1] via gl_ViewIndex.
+    att.setLayer(0);
+#if QT_VERSION >= QT_VERSION_CHECK(6, 7, 0)
+    att.setMultiViewCount(multiViewCount);
+#endif
+    attachments.append(att);
+  }
+
+  QRhiTextureRenderTargetDescription desc;
+  desc.setColorAttachments(attachments.begin(), attachments.end());
+
+  if(depthTextureArray)
+  {
+    ret.depthTexture = depthTextureArray;
+    desc.setDepthTexture(depthTextureArray);
+  }
+
+  auto* renderTarget = state.rhi->newTextureRenderTarget(desc);
+  renderTarget->setName("createMultiViewRenderTarget(MRT)::rt");
+  SCORE_ASSERT(renderTarget);
+
+  auto* renderPass = renderTarget->newCompatibleRenderPassDescriptor();
+  renderPass->setName("createMultiViewRenderTarget(MRT)::rp");
+  SCORE_ASSERT(renderPass);
+
+  renderTarget->setRenderPassDescriptor(renderPass);
+  SCORE_ASSERT(renderTarget->create());
+
+  ret.renderTarget = renderTarget;
+  ret.renderPass = renderPass;
+  (void)samples;
+  return ret;
+}
+
+QRhiTexture::Format parseOutputFormat(
+    const std::string& fmt, QRhiTexture::Format fallback) noexcept
+{
+  std::string f = fmt;
+  for(auto& c : f)
+    c = (char)std::tolower((unsigned char)c);
+  if(f == "rgba8")   return QRhiTexture::RGBA8;
+  if(f == "bgra8")   return QRhiTexture::BGRA8;
+  if(f == "r8")      return QRhiTexture::R8;
+  if(f == "rg8")     return QRhiTexture::RG8;
+  if(f == "r16")     return QRhiTexture::R16;
+  if(f == "rg16")    return QRhiTexture::RG16;
+  if(f == "r16f")    return QRhiTexture::R16F;
+  if(f == "r32f")    return QRhiTexture::R32F;
+  if(f == "rgba16f") return QRhiTexture::RGBA16F;
+  if(f == "rgba32f") return QRhiTexture::RGBA32F;
+  if(f == "d16")     return QRhiTexture::D16;
+  if(f == "d24")     return QRhiTexture::D24;
+  if(f == "d24s8")   return QRhiTexture::D24S8;
+  if(f == "d32f")    return QRhiTexture::D32F;
+  return fallback;
+}
+
+// ---------------- makeSampler -----------------------------------------------
+namespace
+{
+static QRhiSampler::Filter parseFilter(const std::string& s, QRhiSampler::Filter def)
+{
+  if(s.empty()) return def;
+  std::string v = s;
+  for(auto& c : v) c = (char)tolower(c);
+  if(v == "nearest") return QRhiSampler::Nearest;
+  if(v == "linear")  return QRhiSampler::Linear;
+  if(v == "none")    return QRhiSampler::None;
+  return def;
+}
+static QRhiSampler::AddressMode parseAddress(const std::string& s, QRhiSampler::AddressMode def)
+{
+  if(s.empty()) return def;
+  std::string v = s;
+  for(auto& c : v) c = (char)tolower(c);
+  for(auto& c : v) if(c == '-') c = '_';
+  if(v == "repeat")                                return QRhiSampler::Repeat;
+  if(v == "clamp" || v == "clamp_to_edge")         return QRhiSampler::ClampToEdge;
+  if(v == "mirror" || v == "mirrored_repeat")      return QRhiSampler::Mirror;
+  //#if QT_VERSION >= QT_VERSION_CHECK(6, 7, 0)
+  //  if(v == "mirror_once" || v == "mirror_clamp_to_edge")
+  //    return QRhiSampler::MirrorOnce;
+  //#endif
+  return def;
+}
+static QRhiSampler::CompareOp parseCompare(const std::string& s)
+{
+  if(s.empty()) return QRhiSampler::Never;
+  std::string v = s;
+  for(auto& c : v) c = (char)tolower(c);
+  for(auto& c : v) if(c == '-') c = '_';
+  if(v == "never")                                    return QRhiSampler::Never;
+  if(v == "less")                                     return QRhiSampler::Less;
+  if(v == "equal")                                    return QRhiSampler::Equal;
+  if(v == "less_equal"   || v == "lequal")            return QRhiSampler::LessOrEqual;
+  if(v == "greater")                                  return QRhiSampler::Greater;
+  if(v == "not_equal"    || v == "neq")               return QRhiSampler::NotEqual;
+  if(v == "greater_equal"|| v == "gequal")            return QRhiSampler::GreaterOrEqual;
+  if(v == "always")                                   return QRhiSampler::Always;
+  return QRhiSampler::Never;
+}
+}
+
+QRhiSampler* makeSampler(QRhi& rhi, const isf::sampler_config& cfg)
+{
+  const auto defaultLinear = QRhiSampler::Linear;
+  auto base = parseFilter(cfg.filter, defaultLinear);
+  auto minF = parseFilter(cfg.min_filter, base);
+  auto magF = parseFilter(cfg.mag_filter, base);
+  auto mipF = parseFilter(cfg.mipmap_mode, QRhiSampler::None);
+
+  const auto defaultWrap = QRhiSampler::ClampToEdge;
+  auto baseWrap = parseAddress(cfg.wrap, defaultWrap);
+  auto wrapU = parseAddress(cfg.wrap_s, baseWrap);
+  auto wrapV = parseAddress(cfg.wrap_t, baseWrap);
+  auto wrapW = parseAddress(cfg.wrap_r, baseWrap);
+
+  auto* s = rhi.newSampler(magF, minF, mipF, wrapU, wrapV, wrapW);
+  s->setTextureCompareOp(parseCompare(cfg.compare));
+  s->create();
+  return s;
+}
 }
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/Utils.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/Utils.hpp
index 55b43ec9e3..caa2770eff 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/Utils.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/Utils.hpp
@@ -5,6 +5,7 @@
 #include <Gfx/Graph/RenderState.hpp>
 #include <Gfx/Graph/Scale.hpp>
 #include <Gfx/Graph/Uniforms.hpp>
+#include <Gfx/Graph/VertexFallbackPlan.hpp>
 
 #include <ossia/detail/hash_map.hpp>
 #include <ossia/detail/small_flat_map.hpp>
@@ -13,20 +14,37 @@
 
 #include <span>
 
+namespace isf
+{
+struct descriptor;
+}
+
 namespace score::gfx
 {
 class Node;
 class NodeModel;
 struct Port;
+class VertexFallbackPool;
 struct Edge;
 class RenderList;
 /**
  * @brief Stores a sampler and the texture currently associated with it.
+ *
+ * `fallback` is the view-type-matched empty texture to bind when `texture`
+ * becomes null (no upstream, feedback-loop short, disconnect race). It MUST
+ * be one of `RenderList::emptyTexture() / emptyTexture3D() / emptyTextureCube()
+ * / emptyTextureArray()` so the bound view type matches the shader's
+ * sampler declaration. Leaving this null is only safe for plain sampler2D
+ * slots — a samplerCube / sampler3D / sampler2DArray slot with a null
+ * `fallback` will trip Vulkan viewType validation or, if the fallback
+ * path upstream also produced null, crash with a VK_NULL_HANDLE descriptor
+ * write.
  */
 struct Sampler
 {
   QRhiSampler* sampler{};
   QRhiTexture* texture{};
+  QRhiTexture* fallback{};
 };
 
 /**
@@ -45,6 +63,13 @@ struct AudioTexture
     FFT,
     Histogram
   } mode{};
+
+  // Optional sampler config. Empty strings keep legacy defaults
+  // (linear / clamp_to_edge). Populated by ISFNode from the parsed
+  // audio_input::sampler (FILTER / WRAP). Useful for FFT reads where
+  // NEAREST filtering avoids smearing adjacent bins.
+  std::string filter;
+  std::string wrap;
 };
 
 /**
@@ -107,10 +132,12 @@ struct Pipeline
 
   void release()
   {
-    delete pipeline;
+    if(pipeline)
+      pipeline->deleteLater();
     pipeline = nullptr;
 
-    delete srb;
+    if(srb)
+      srb->deleteLater();
     srb = nullptr;
   }
 };
@@ -120,67 +147,122 @@ struct Pipeline
  */
 struct TextureRenderTarget
 {
+  // The first five members must keep this order: out-of-tree addons
+  // aggregate-initialize {texture, colorRenderBuffer, depthRenderBuffer,
+  // renderPass, renderTarget}.
   QRhiTexture* texture{};                              // Primary color attachment (location 0)
-  std::vector<QRhiTexture*> additionalColorTextures;   // MRT: locations 1..N
   QRhiRenderBuffer* colorRenderBuffer{};
   QRhiRenderBuffer* depthRenderBuffer{};
-  QRhiTexture* depthTexture{};                         // Sampleable depth (alternative to depthRenderBuffer)
-  QRhiTexture* msDepthTexture{};                       // MSAA depth attachment when depthTexture is the resolve target
   QRhiRenderPassDescriptor* renderPass{};
   QRhiRenderTarget* renderTarget{};
 
-  operator bool() const noexcept { return texture != nullptr; }
+  std::vector<QRhiTexture*> additionalColorTextures;   // MRT: locations 1..N
+  QRhiTexture* depthTexture{};                         // Sampleable depth (alternative to depthRenderBuffer)
+  QRhiTexture* msDepthTexture{};                       // MSAA depth attachment when depthTexture is the resolve target
+
+  // A 1×1 color texture allocated when the backend requires a color attachment
+  // but the user only wants depth-only rendering. Owned by this RT.
+  QRhiTexture* dummyColorTexture{};
+
+  // Number of array layers on `texture` (1 = non-layered, >1 = texture array).
+  int arrayLayers{1};
+
+  // Multiview view count (0/1 = disabled).
+  int multiViewCount{0};
+
+  // Which layer of `texture`/`additionalColorTextures` this RT renders to.
+  // -1 = not applicable (non-layered, or MultiView handles it automatically).
+  int renderLayer{-1};
+
+  operator bool() const noexcept { return texture != nullptr || dummyColorTexture != nullptr || depthTexture != nullptr; }
 
   int colorAttachmentCount() const noexcept
   {
-    return texture ? 1 + (int)additionalColorTextures.size() : 0;
+    if(texture)
+      return 1 + (int)additionalColorTextures.size();
+    if(dummyColorTexture)
+      return 1;
+    return 0;
   }
 
   // Returns the actual MSAA sample count of this render target, or -1 if it
-  // cannot be determined from the stored fields (e.g. when only renderPass is
-  // set, as for placeholders that target a swap chain). Callers must treat
-  // -1 as "unknown — fall back to the renderlist's global sample count".
-  // This value is the authoritative input to QRhiGraphicsPipeline::setSampleCount()
-  // when known, since an RT may have been degraded (samplable-depth + MSAA
-  // without depth-resolve support).
+  // cannot be determined from the stored fields. Callers must treat -1 as
+  // "unknown — fall back to the renderlist's global sample count".
+  //
+  // Lookup priority:
+  //   1. colorRenderBuffer (owned MSAA attachment — always authoritative).
+  //   2. texture (single-sample resolve target OR non-MSAA render target).
+  //   3. depthTexture (depth-only RTs).
+  //   4. msDepthTexture (MSAA depth attachment when depth resolve is used).
+  //   5. renderTarget — BUT only when this RT genuinely owns its attachments
+  //      (colorRenderBuffer/texture/depthTexture set). A "bare" RT that only
+  //      carries renderTarget + renderPass (e.g. a swap-chain wrapper
+  //      returned by QRhiSwapChain::currentFrameRenderTarget()) is NOT
+  //      queried because swap-chain render-target objects lazily write
+  //      their sampleCount only when createOrResize() runs — any read before
+  //      that returns the default 1, which would silently mismatch a
+  //      multi-sample renderPassDescriptor and produce
+  //      VUID-VkGraphicsPipelineCreateInfo-multisampledRenderToSingleSampled-06853.
+  //   6. Otherwise return -1 so the caller uses RenderList::samples(), which
+  //      IS authoritative for externally-managed swap-chain RTs (it drove
+  //      the swap-chain sample count in the first place).
   int sampleCount() const noexcept
   {
-    if(renderTarget)
-      return renderTarget->sampleCount();
     if(colorRenderBuffer)
       return colorRenderBuffer->sampleCount();
     if(texture)
       return texture->sampleCount();
+    if(msDepthTexture)
+      return msDepthTexture->sampleCount();
+    if(depthTexture)
+      return depthTexture->sampleCount();
+    // renderTarget alone without any owned attachment = swap-chain wrapper.
+    // Its sampleCount is unreliable pre-createOrResize; fall through.
     return -1;
   }
 
   void release()
   {
-    if(texture)
+    if(texture || dummyColorTexture || depthTexture)
     {
-      delete texture;
+      // Use deleteLater() for all GPU resources: Qt RHI commands are async
+      // and resources may still be referenced by in-flight frames until
+      // endFrame() completes. deleteLater() defers actual destruction to
+      // the next beginFrame().
+      if(texture)
+        texture->deleteLater();
       texture = nullptr;
 
+      if(dummyColorTexture)
+        dummyColorTexture->deleteLater();
+      dummyColorTexture = nullptr;
+
       for(auto* t : additionalColorTextures)
-        delete t;
+        t->deleteLater();
       additionalColorTextures.clear();
 
-      delete colorRenderBuffer;
+      if(colorRenderBuffer)
+        colorRenderBuffer->deleteLater();
       colorRenderBuffer = nullptr;
 
-      delete depthRenderBuffer;
+      if(depthRenderBuffer)
+        depthRenderBuffer->deleteLater();
       depthRenderBuffer = nullptr;
 
-      delete depthTexture;
+      if(depthTexture)
+        depthTexture->deleteLater();
       depthTexture = nullptr;
 
-      delete msDepthTexture;
+      if(msDepthTexture)
+        msDepthTexture->deleteLater();
       msDepthTexture = nullptr;
 
-      delete renderPass;
+      if(renderPass)
+        renderPass->deleteLater();
       renderPass = nullptr;
 
-      delete renderTarget;
+      if(renderTarget)
+        renderTarget->deleteLater();
       renderTarget = nullptr;
     }
   }
@@ -225,6 +307,106 @@ TextureRenderTarget createRenderTarget(
     QRhiTexture* depthTexture,
     int samples);
 
+/**
+ * @brief Create a depth-only render target.
+ *
+ * Allocates a sampleable depth texture (samplableDepth=true) or a depth
+ * renderbuffer. If the backend rejects color-less render targets, a 1x1
+ * RGBA8 dummy color texture is allocated and stored in the
+ * TextureRenderTarget::dummyColorTexture field (owned by the RT).
+ *
+ * The resulting TextureRenderTarget has:
+ *   - `depthTexture` or `depthRenderBuffer` set (never both)
+ *   - `texture` == nullptr (depth-only semantics)
+ *   - `dummyColorTexture` may be non-null on some backends
+ */
+SCORE_PLUGIN_GFX_EXPORT
+TextureRenderTarget createDepthOnlyRenderTarget(
+    const RenderState& state, QSize sz, int samples, bool samplableDepth = true,
+    QRhiTexture::Format depthFmt = QRhiTexture::D32F);
+
+/**
+ * @brief Create a depth-only render target around an EXTERNAL depth texture.
+ *
+ * Builds the RT around `externalDepthTexture` (caller-allocated, already
+ * created) instead of allocating its own. Use this when the depth texture is
+ * named/owned by the node (so textureForOutput() can return it) — it avoids
+ * the previous bug where the RT was built around an internal texture that was
+ * then immediately deleted while still referenced by the render pass.
+ *
+ * `externalDepthTexture` may be a plain 2D depth texture or a TextureArray
+ * (layered / shadow-cascade depth). It becomes `ret.depthTexture` and is
+ * released with the RT.
+ */
+SCORE_PLUGIN_GFX_EXPORT
+TextureRenderTarget createDepthOnlyRenderTarget(
+    const RenderState& state, QRhiTexture* externalDepthTexture, int samples,
+    bool samplableDepth = true);
+
+/**
+ * @brief Create a render target that targets a single layer of a texture array.
+ *
+ * colorTextureArray must have been created with QRhiTexture::TextureArray
+ * and at least (renderLayer + 1) layers.
+ *
+ * depthTexture may be a regular 2D texture (shared across layers) or nullptr
+ * to skip depth (use a renderbuffer instead via createRenderTarget overloads).
+ */
+SCORE_PLUGIN_GFX_EXPORT
+TextureRenderTarget createLayeredRenderTarget(
+    const RenderState& state, QRhiTexture* colorTextureArray, int renderLayer,
+    QRhiTexture* depthTexture, int samples);
+
+/**
+ * @brief Multi-attachment (MRT) layered render target.
+ *
+ * Same as the single-texture overload but attaches ALL `colorTextures` to the
+ * render pass (locations 0..N-1), so the number of attachments matches the
+ * pipeline blend-state count (rt.colorAttachmentCount()). Each layered color
+ * texture renders to `renderLayer`; plain 2D textures keep layer 0.
+ */
+SCORE_PLUGIN_GFX_EXPORT
+TextureRenderTarget createLayeredRenderTarget(
+    const RenderState& state, std::span<QRhiTexture* const> colorTextures,
+    int renderLayer, QRhiTexture* depthTexture, int samples);
+
+/**
+ * @brief Create a multiview render target (single RT drawing N views at once).
+ *
+ * colorTextureArray must be a TextureArray with at least multiViewCount layers.
+ * depthTextureArray may be nullptr for no depth, or a TextureArray with the
+ * same layer count.
+ *
+ * Requires state.caps.multiview == true — caller must check.
+ */
+SCORE_PLUGIN_GFX_EXPORT
+TextureRenderTarget createMultiViewRenderTarget(
+    const RenderState& state, QRhiTexture* colorTextureArray, int multiViewCount,
+    QRhiTexture* depthTextureArray, int samples);
+
+/**
+ * @brief Multi-attachment (MRT) multiview render target.
+ *
+ * Same as the single-texture overload but attaches ALL `colorTextures` (each a
+ * TextureArray with >= multiViewCount layers) with per-attachment multiview, so
+ * attachments == pipeline blend targets. Requires state.caps.multiview == true.
+ */
+SCORE_PLUGIN_GFX_EXPORT
+TextureRenderTarget createMultiViewRenderTarget(
+    const RenderState& state, std::span<QRhiTexture* const> colorTextures,
+    int multiViewCount, QRhiTexture* depthTextureArray, int samples);
+
+/**
+ * @brief Map an ISF/CSF FORMAT string to a QRhiTexture::Format.
+ *
+ * Supported: rgba8, bgra8, r8, rg8, r16, rg16, r16f, r32f, rgba16f, rgba32f,
+ * d16, d24, d24s8, d32f. Unknown / empty strings fall back to the caller's
+ * default. Lookup is case-insensitive.
+ */
+SCORE_PLUGIN_GFX_EXPORT
+QRhiTexture::Format parseOutputFormat(
+    const std::string& fmt, QRhiTexture::Format fallback) noexcept;
+
 SCORE_PLUGIN_GFX_EXPORT
 void replaceBuffer(QRhiShaderResourceBindings&, int binding, QRhiBuffer* newBuffer);
 SCORE_PLUGIN_GFX_EXPORT
@@ -279,19 +461,75 @@ QRhiShaderResourceBindings* createDefaultBindings(
     QRhiBuffer* materialUBO, std::span<const Sampler> samplers,
     std::span<QRhiShaderResourceBinding> additionalBindings = {});
 
+/**
+ * @brief Match a (name, semantic) request to an upstream geometry attribute.
+ *
+ * Three-stage cascade shared by all shader modes:
+ *   1. semantic_key → name_to_semantic → if known, geom.find(semantic).
+ *   2. Custom-attribute lookup by `name`.
+ *   3. display_name == name fallback (so { NAME: "position", SEMANTIC:
+ *      "custom" } still finds the real position attribute when no custom
+ *      one shadows it).
+ * If `semantic_key` is empty, `name` is used as the semantic key.
+ */
+SCORE_PLUGIN_GFX_EXPORT
+const ossia::geometry::attribute* findGeometryAttribute(
+    const ossia::geometry& geom, std::string_view name, std::string_view semantic_key);
+
 /**
  * @brief Remap a pipeline's vertex input layout using semantic matching.
  *
- * For each shader input variable, resolves its name to an attribute semantic,
- * finds the matching attribute in the geometry, then creates a vertex input
- * attribute with binding/format/offset from the geometry and location from
- * the shader. Returns true on success, false if a required attribute is missing.
+ * Reflects the compiled vertex shader to find each `in` variable, then for
+ * each one runs findGeometryAttribute(name, name) — useful when no isf
+ * descriptor is around (legacy callers). Returns true on success, false if
+ * a required attribute can't be matched.
  */
 SCORE_PLUGIN_GFX_EXPORT
 bool remapPipelineVertexInputs(
     QRhiGraphicsPipeline& pip, const QShader& vertexShader,
     const ossia::geometry& geom);
 
+/**
+ * @brief Same as above, but honours explicit SEMANTIC on each VERTEX_INPUTS
+ * entry from the isf descriptor when present.
+ */
+SCORE_PLUGIN_GFX_EXPORT
+bool remapPipelineVertexInputs(
+    QRhiGraphicsPipeline& pip, const QShader& vertexShader,
+    const ossia::geometry& geom, const isf::descriptor& desc);
+
+// FallbackBindingPlan now lives in its own header so both Utils.hpp and
+// CustomMesh.hpp can depend on it without creating an include cycle
+// (Utils.hpp depends on Mesh.hpp, which transitively reaches CustomMesh
+// consumers). See <Gfx/Graph/VertexFallbackPlan.hpp>.
+
+/**
+ * @brief Fallback-aware overload: the strict-matching behaviour of the
+ *        overload above, extended so VERTEX_INPUTS entries with
+ *        "REQUIRED": false silently resolve to a shared identity buffer
+ *        from the pool when their semantic is absent upstream.
+ *
+ * @p pool     per-RenderList shared fallback buffer pool
+ * @p batch    any uploads for freshly-allocated fallback buffers are
+ *             recorded here
+ * @p outPlan  filled with the bindings the caller must merge into the
+ *             draw's QRhiCommandBuffer::VertexInput array. Cleared on
+ *             entry.
+ *
+ * Returns false (and logs which input failed) if:
+ *   - a REQUIRED=true input has no matching upstream attribute, OR
+ *   - a REQUIRED=false input has no matching upstream attribute AND the
+ *     declared GLSL TYPE is unsupported (mat4 / integer / sampler) OR
+ *     the resolved semantic is not in the whitelist AND no explicit
+ *     DEFAULT was supplied.
+ */
+SCORE_PLUGIN_GFX_EXPORT
+bool remapPipelineVertexInputs(
+    QRhiGraphicsPipeline& pip, const QShader& vertexShader,
+    const ossia::geometry& geom, const isf::descriptor& desc,
+    QRhi& rhi, VertexFallbackPool& pool, QRhiResourceUpdateBatch& batch,
+    FallbackBindingPlan& outPlan);
+
 /**
  * @brief Create a render pipeline following the score conventions for shaders and materials.
  */
@@ -302,6 +540,86 @@ Pipeline buildPipeline(
     QRhiBuffer* materialUBO, std::span<const Sampler> samplers,
     std::span<QRhiShaderResourceBinding> additionalBindings = {});
 
+/**
+ * @brief Lower-level buildPipeline variant: bring your own SRB.
+ *
+ * The returned Pipeline::srb equals the srb you passed — no ownership
+ * transfer. Useful when the caller wants to share a pipeline across
+ * multiple Passes that each have their own SRB (layout-compatible with
+ * this one per QRhi contract); the pipeline's stored SRB is only used
+ * for layout extraction at create() time and never dereferenced at draw
+ * time.
+ */
+SCORE_PLUGIN_GFX_EXPORT
+Pipeline buildPipeline(
+    const RenderList& renderer, const Mesh& mesh, const QShader& vertexS,
+    const QShader& fragmentS, const TextureRenderTarget& rt,
+    QRhiShaderResourceBindings* srb);
+
+// Forward declarations — definitions in PipelineStateHelpers.hpp, IsfBindingsBuilder.hpp
+} // namespace score::gfx
+
+namespace isf
+{
+struct sampler_config;
+}
+
+namespace score::gfx
+{
+/**
+ * @brief Build a QRhiSampler from an isf::sampler_config.
+ *
+ * Fields left empty/unset in the config are filled with ossia defaults
+ * (linear filtering, no mipmaps, clamp-to-edge). When the config sets a
+ * comparison op other than "never", the returned sampler is a shadow
+ * comparison sampler.
+ *
+ * The returned sampler is created (create() was called) and has no name
+ * assigned; callers should setName() before or after create() as needed.
+ * Ownership follows the standard QRhi convention — callers delete it.
+ */
+SCORE_PLUGIN_GFX_EXPORT
+QRhiSampler* makeSampler(QRhi& rhi, const isf::sampler_config& cfg);
+} // namespace score::gfx
+
+namespace isf
+{
+struct pipeline_state;
+}
+
+namespace score::gfx
+{
+struct GraphicsStorageResources;
+
+/**
+ * @brief Create a render pipeline applying pipeline_state from an ISF descriptor.
+ *
+ * This overload replaces the legacy hardcoded `setDepthTest(true)/setDepthWrite(true)`
+ * on RawRaster and the `anyNodeRequiresDepth()` fallback on ISF with a unified
+ * path driven by `state`. When `state` is empty (all fields nullopt), behaviour
+ * matches the legacy variant exactly for backwards compatibility.
+ *
+ * `extraBindings` is typically the result of IsfBindingsBuilder::buildExtraBindings().
+ * `multiViewCount` >= 2 activates multiview rendering (requires state.caps.multiview).
+ *
+ * Plan 09 S6: when `useShadingRate == true` AND
+ * `renderer.state.caps.variableRateShading == true`, the pipeline
+ * gets `QRhiGraphicsPipeline::UsesShadingRate`. The shading-rate
+ * texture / per-draw rate itself is supplied elsewhere (via the
+ * render-target attachment's `setShadingRateMap` or the command-
+ * buffer's `setShadingRate`). Presets opt in; silent no-op when the
+ * backend doesn't support VRS.
+ */
+SCORE_PLUGIN_GFX_EXPORT
+Pipeline buildPipelineWithState(
+    const RenderList& renderer, const Mesh& mesh, const QShader& vertexS,
+    const QShader& fragmentS, const TextureRenderTarget& rt, QRhiBuffer* processUBO,
+    QRhiBuffer* materialUBO, std::span<const Sampler> samplers,
+    std::span<QRhiShaderResourceBinding> extraBindings,
+    const isf::pipeline_state& state,
+    int multiViewCount = 0,
+    bool useShadingRate = false);
+
 /**
  * @brief Get a pair of compiled vertex / fragment shaders from GLSL 4.5 sources.
  *
@@ -434,5 +752,6 @@ inline void uploadStaticBufferWithStoredData(
 
 SCORE_PLUGIN_GFX_EXPORT
 std::vector<Sampler> initInputSamplers(
-    const score::gfx::Node& node, RenderList& renderer, const std::vector<Port*>& ports);
+    const score::gfx::Node& node, RenderList& renderer, const std::vector<Port*>& ports,
+    const isf::descriptor* desc = nullptr);
 }
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/VertexFallbackDefaults.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/VertexFallbackDefaults.cpp
new file mode 100644
index 0000000000..6e3d294c4b
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/VertexFallbackDefaults.cpp
@@ -0,0 +1,226 @@
+#include <Gfx/Graph/VertexFallbackDefaults.hpp>
+
+#include <ossia/detail/hash.hpp>
+
+#include <cstring>
+
+namespace score::gfx
+{
+namespace
+{
+
+// Small helper: how many float components does a GLSL TYPE declare?
+// Returns 0 for unsupported types (mat4, integer types) — v1 accepts
+// only scalar float / vec2 / vec3 / vec4 inputs for the fallback path.
+// This is strict on purpose: the PerInstance step_rate=1 broadcast
+// semantics we ship don't generalise cleanly to integer IDs or mat4
+// (location-bump issue).
+int float_components_of(std::string_view decl_type) noexcept
+{
+  if(decl_type == "float") return 1;
+  if(decl_type == "vec2")  return 2;
+  if(decl_type == "vec3")  return 3;
+  if(decl_type == "vec4")  return 4;
+  return 0;
+}
+
+// Map component count to the matching ossia geometry attribute format.
+// Only float formats are emitted in v1.
+int format_for_components(int n) noexcept
+{
+  using F = ossia::geometry::attribute;
+  switch(n)
+  {
+    case 1: return F::float1;
+    case 2: return F::float2;
+    case 3: return F::float3;
+    case 4: return F::float4;
+    default: return F::float4;
+  }
+}
+
+// Pack `n` floats into the spec's byte buffer starting at offset 0.
+// `src` holds the source numbers; values past src.size() are zero-padded.
+void pack_floats(VertexFallbackSpec& spec, int n,
+                 std::initializer_list<float> src) noexcept
+{
+  float tmp[4] = {0.f, 0.f, 0.f, 0.f};
+  int i = 0;
+  for(auto v : src) { if(i < 4) tmp[i++] = v; }
+  std::memcpy(spec.bytes.data(), tmp, (size_t)n * sizeof(float));
+  spec.stride_bytes = (uint32_t)(n * sizeof(float));
+  spec.format = format_for_components(n);
+}
+
+// Canonical whitelist of neutrals. Returns true if `semantic` is
+// whitelisted and the spec has been filled; returns false for
+// semantics that require an explicit user DEFAULT.
+//
+// Keep this in sync with the table in
+// docs/reference-manual/processes/library/render-pipeline.md.
+bool fill_whitelist(VertexFallbackSpec& spec,
+                    ossia::attribute_semantic sem, int n) noexcept
+{
+  using S = ossia::attribute_semantic;
+  switch(sem)
+  {
+    // Core geometry
+    case S::position:         pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true;
+    case S::normal:           pack_floats(spec, n, {0.f, 0.f, 1.f, 0.f}); return true;
+    case S::tangent:          pack_floats(spec, n, {1.f, 0.f, 0.f, 1.f}); return true;
+    case S::bitangent:        pack_floats(spec, n, {0.f, 1.f, 0.f, 0.f}); return true;
+
+    // UVs
+    case S::texcoord0: case S::texcoord1: case S::texcoord2: case S::texcoord3:
+    case S::texcoord4: case S::texcoord5: case S::texcoord6: case S::texcoord7:
+      pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true;
+
+    // Vertex colors — multiplicative identity is white.
+    case S::color0: case S::color1: case S::color2: case S::color3:
+      pack_floats(spec, n, {1.f, 1.f, 1.f, 1.f}); return true;
+
+    // Per-instance broadcast colors — same multiplicative identity as
+    // their per-vertex counterparts. Drives the unified-MDI shader's
+    // base × inst_color modulation: when no per-instance binding is
+    // present (Sponza, plain glTF), every fragment reads white and the
+    // effective scaling collapses to per-vertex × material only.
+    case S::instance_color0: case S::instance_color1:
+    case S::instance_color2: case S::instance_color3:
+      pack_floats(spec, n, {1.f, 1.f, 1.f, 1.f}); return true;
+
+    // Per-instance custom — application-specific user data. Zero is the
+    // benign default for "ignore me unless wired".
+    case S::instance_custom0: case S::instance_custom1:
+    case S::instance_custom2: case S::instance_custom3:
+      pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true;
+
+    // instance_draw_id intentionally omitted — uint-typed VERTEX_INPUTs
+    // aren't supported by the float-only v1 fallback path. Unified-MDI
+    // shaders that read it must set REQUIRED: true (and the
+    // ScenePreprocessor publishes the per-instance draw_id buffer).
+
+    // Transform / instancing. The enum at rotation..translation
+    // (values 600..607) is now collision-free with the morph deltas
+    // (500..504), so every transform semantic has an unambiguous
+    // neutral. transform_matrix (mat4) is still intentionally absent:
+    // mat4 VERTEX_INPUTS need distinct per-column vertex-input
+    // bindings which the v1 fallback path (single PerInstance buffer,
+    // single float{1..4} format) cannot express. Users can declare
+    // four vec4 columns and reassemble in GLSL, or keep
+    // transform_matrix REQUIRED: true.
+    case S::rotation:         pack_floats(spec, n, {0.f, 0.f, 0.f, 1.f}); return true;
+    case S::rotation_extra:   pack_floats(spec, n, {0.f, 0.f, 0.f, 1.f}); return true;
+    case S::scale:            pack_floats(spec, n, {1.f, 1.f, 1.f, 1.f}); return true;
+    case S::uniform_scale:    pack_floats(spec, n, {1.f, 0.f, 0.f, 0.f}); return true;
+    case S::up:               pack_floats(spec, n, {0.f, 1.f, 0.f, 0.f}); return true;
+    case S::pivot:            pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true;
+    case S::translation:      pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true;
+
+    // Morph deltas — zero delta means "no morph contribution", which is
+    // exactly the right neutral for an absent morph target. All five
+    // are safe to include now that the collisions are gone.
+    case S::morph_position:
+    case S::morph_normal:
+    case S::morph_tangent:
+    case S::morph_texcoord:
+    case S::morph_color:
+      pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true;
+
+    // Particle dynamics — at-rest defaults.
+    case S::velocity:
+    case S::acceleration:
+    case S::force:
+    case S::angular_velocity:
+      pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true;
+    case S::mass:             pack_floats(spec, n, {1.f, 0.f, 0.f, 0.f}); return true;
+    case S::age:              pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true;
+    case S::lifetime:         pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true;
+    case S::drag:             pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true;
+
+    // Rendering hints
+    case S::sprite_size:      pack_floats(spec, n, {1.f, 1.f, 0.f, 0.f}); return true;
+    case S::sprite_rotation:  pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true;
+    case S::sprite_facing:    pack_floats(spec, n, {0.f, 0.f, 1.f, 0.f}); return true;
+    case S::width:            pack_floats(spec, n, {1.f, 0.f, 0.f, 0.f}); return true;
+    case S::opacity:          pack_floats(spec, n, {1.f, 0.f, 0.f, 0.f}); return true;
+    case S::emissive:         pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true;
+    case S::emissive_strength: pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true;
+
+    // Material / PBR
+    case S::roughness:        pack_floats(spec, n, {0.5f, 0.f, 0.f, 0.f}); return true;
+    case S::metallic:         pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true;
+    case S::ambient_occlusion: pack_floats(spec, n, {1.f, 0.f, 0.f, 0.f}); return true;
+    case S::specular:         pack_floats(spec, n, {0.5f, 0.f, 0.f, 0.f}); return true;
+    case S::subsurface:
+    case S::clearcoat:
+    case S::clearcoat_roughness:
+    case S::anisotropy:
+    case S::transmission:
+    case S::thickness:
+      pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true;
+    case S::anisotropy_direction: pack_floats(spec, n, {1.f, 0.f, 0.f, 0.f}); return true;
+    case S::ior:              pack_floats(spec, n, {1.5f, 0.f, 0.f, 0.f}); return true;
+
+    // UI / effect slots
+    case S::selection:        pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true;
+    case S::fx0: case S::fx1: case S::fx2: case S::fx3:
+    case S::fx4: case S::fx5: case S::fx6: case S::fx7:
+      pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true;
+
+    // Everything else: NOT whitelisted. Forces the caller to require an
+    // explicit DEFAULT (motion-history semantics, skinning indices /
+    // weights, integer IDs, volumetric / splat data — cases where a
+    // wrong "neutral" is silently wrong).
+    default:
+      return false;
+  }
+}
+
+} // namespace
+
+std::optional<VertexFallbackSpec> resolveVertexFallback(
+    ossia::attribute_semantic semantic,
+    std::string_view decl_type,
+    const std::vector<double>& user_default) noexcept
+{
+  const int n = float_components_of(decl_type);
+  if(n <= 0)
+    return std::nullopt;   // unsupported type (mat4, integer, sampler, ...)
+
+  VertexFallbackSpec spec{};
+
+  if(!user_default.empty())
+  {
+    // User DEFAULT wins. Pack at most n floats, zero-pad the rest.
+    float tmp[4] = {0.f, 0.f, 0.f, 0.f};
+    const int k = (int)std::min<std::size_t>(user_default.size(), (std::size_t)n);
+    for(int i = 0; i < k; ++i)
+      tmp[i] = (float)user_default[(std::size_t)i];
+    std::memcpy(spec.bytes.data(), tmp, (size_t)n * sizeof(float));
+    spec.stride_bytes = (uint32_t)(n * sizeof(float));
+    spec.format = format_for_components(n);
+    return spec;
+  }
+
+  // No user default — look up the whitelist.
+  if(fill_whitelist(spec, semantic, n))
+    return spec;
+
+  return std::nullopt;
+}
+
+uint64_t hashVertexFallback(const VertexFallbackSpec& spec) noexcept
+{
+  // rapidhash-tiered (ossia::hash_*); same primitive used everywhere
+  // else in the gfx pipeline. Mix format + stride into the seed via
+  // hash_combine, then fold in the active byte range so two specs
+  // with identical bytes but different formats / strides don't alias.
+  uint64_t seed = ossia::hash_trivial(spec.format);
+  ossia::hash_combine(seed, spec.stride_bytes);
+  const uint32_t active
+      = std::min<uint32_t>(spec.stride_bytes, (uint32_t)spec.bytes.size());
+  ossia::hash_combine(seed, ossia::hash_bytes(spec.bytes.data(), active));
+  return seed;
+}
+
+} // namespace score::gfx
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/VertexFallbackDefaults.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/VertexFallbackDefaults.hpp
new file mode 100644
index 0000000000..713883fca3
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/VertexFallbackDefaults.hpp
@@ -0,0 +1,63 @@
+#pragma once
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <score_plugin_gfx_export.h>
+
+#include <array>
+#include <cstdint>
+#include <optional>
+#include <string_view>
+#include <vector>
+
+namespace score::gfx
+{
+
+// Packed neutral value for an optional VERTEX_INPUT whose upstream
+// attribute is absent. The renderer uploads these `stride_bytes` bytes
+// into a PerInstance step_rate=1 buffer of exactly one element and binds
+// it at the shader input's slot. Stride and format are driven by the
+// GLSL TYPE the shader declared — not the semantic's canonical width.
+struct VertexFallbackSpec
+{
+  // Values from the anonymous enum in ossia::geometry::attribute — we
+  // store as int to sidestep the "decltype on non-static member"
+  // boilerplate; callers cast back at the QRhi boundary the same way
+  // RenderedCSFNode.cpp already does.
+  int format{};
+  uint32_t stride_bytes{};
+  // First `stride_bytes` bytes are the payload (native float / int
+  // bytes). 64 bytes accommodate mat4 if mat4 VERTEX_INPUTS ever land
+  // (they don't today — the parser's location-bump is not mat4-aware).
+  std::array<uint8_t, 64> bytes{};
+};
+
+// Resolve a fallback for a shader-declared optional VERTEX_INPUT.
+//
+//   `semantic`       the resolved ossia semantic (from SEMANTIC field if
+//                    set, else from NAME via ossia::name_to_semantic).
+//                    Pass attribute_semantic::custom for unknown names.
+//   `decl_type`      the GLSL TYPE the shader declared, lowercased
+//                    ("float", "vec2", "vec3", "vec4"). mat4 / integer
+//                    types are unsupported in v1 — returns nullopt.
+//   `user_default`   the DEFAULT[] array from the JSON header (may be
+//                    empty). When non-empty, overrides the semantic
+//                    whitelist: numbers are packed into the payload in
+//                    declaration order, then truncated / zero-padded to
+//                    fit the declared type width.
+//
+// Returns `std::nullopt` when neither a user DEFAULT nor a whitelisted
+// semantic default applies — the caller is expected to fail the pipeline
+// build with a clear error referencing the input name.
+SCORE_PLUGIN_GFX_EXPORT std::optional<VertexFallbackSpec> resolveVertexFallback(
+    ossia::attribute_semantic semantic,
+    std::string_view decl_type,
+    const std::vector<double>& user_default) noexcept;
+
+// Stable hash of a fallback spec's byte payload. Used as part of the
+// VertexFallbackPool key so two shaders declaring the same semantic and
+// TYPE with different DEFAULT arrays don't share a buffer.
+SCORE_PLUGIN_GFX_EXPORT uint64_t
+hashVertexFallback(const VertexFallbackSpec& spec) noexcept;
+
+}
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/VertexFallbackPlan.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/VertexFallbackPlan.hpp
new file mode 100644
index 0000000000..c161654e5d
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/VertexFallbackPlan.hpp
@@ -0,0 +1,39 @@
+#pragma once
+
+#include <vector>
+
+class QRhiBuffer;
+
+namespace score::gfx
+{
+
+// Draw-time bindings the renderer must merge into its vertex-input
+// array to satisfy "REQUIRED: false" VERTEX_INPUTS whose upstream
+// geometry did not provide a matching attribute.
+//
+// Emitted by the fallback-aware remapPipelineVertexInputs overload and
+// consumed by RenderedRawRasterPipelineNode at draw time. Each Slot has
+// a `binding_index` — the slot in the pipeline's vertex-input binding
+// array that was appended during pipeline build — and a QRhiBuffer* the
+// runtime binds at that index when issuing the draw.
+//
+// The plan is safe to hold across frames: the buffer handles come from
+// the VertexFallbackPool which lives alongside the RenderList.
+//
+// This struct lives in its own header so consumers (CustomMesh, the
+// renderer) can depend on it without pulling the full Utils.hpp /
+// VertexFallbackPool.hpp graph in via Mesh.hpp.
+struct FallbackBindingPlan
+{
+  struct Slot
+  {
+    int binding_index{};
+    QRhiBuffer* buffer{};
+  };
+  std::vector<Slot> slots;
+
+  bool empty() const noexcept { return slots.empty(); }
+  void clear() noexcept { slots.clear(); }
+};
+
+} // namespace score::gfx
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/VertexFallbackPool.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/VertexFallbackPool.cpp
new file mode 100644
index 0000000000..2ac18fc085
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/VertexFallbackPool.cpp
@@ -0,0 +1,67 @@
+#include <Gfx/Graph/VertexFallbackPool.hpp>
+
+#include <private/qrhi_p.h>
+
+namespace score::gfx
+{
+
+VertexFallbackPool::~VertexFallbackPool()
+{
+  // RenderList owns us and must have called release() before
+  // tearing down the QRhi. Anything still in the map at destruction
+  // time would leak — but we can't safely delete QRhiBuffer* here
+  // without knowing the QRhi is still alive, so we just assert the
+  // caller did the right thing via an empty-map check.
+  // (Destructive assert would fire during OOM teardown; leave it as
+  // a quiet leak for robustness.)
+}
+
+VertexFallbackPool::Entry VertexFallbackPool::acquire(
+    QRhi& rhi, QRhiResourceUpdateBatch& batch,
+    const VertexFallbackSpec& spec)
+{
+  Key k{
+      .format = spec.format,
+      .stride = spec.stride_bytes,
+      .payload_hash = hashVertexFallback(spec)};
+
+  if(auto it = m_entries.find(k); it != m_entries.end())
+    return it->second;
+
+  // Allocate a single QRhiBuffer sized to exactly one element. The
+  // Immutable usage hint means QRhi uploads once and never touches
+  // the backing memory again.
+  auto* buf = rhi.newBuffer(
+      QRhiBuffer::Immutable,
+      QRhiBuffer::VertexBuffer,
+      spec.stride_bytes);
+  buf->setName(QByteArrayLiteral("score.vertex_fallback"));
+  if(!buf->create())
+  {
+    // Allocation failed. Return a null Entry; the caller will
+    // propagate as a pipeline-build failure.
+    delete buf;
+    return Entry{};
+  }
+
+  batch.uploadStaticBuffer(buf, 0, spec.stride_bytes, spec.bytes.data());
+
+  Entry e{.buffer = buf, .stride = spec.stride_bytes, .format = spec.format};
+  m_entries.emplace(k, e);
+  return e;
+}
+
+void VertexFallbackPool::release()
+{
+  for(auto& [k, e] : m_entries)
+  {
+    if(e.buffer)
+    {
+      e.buffer->deleteLater();
+      e.buffer = nullptr;
+    }
+  }
+  m_entries.clear();
+}
+
+} // namespace score::gfx
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/VertexFallbackPool.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/VertexFallbackPool.hpp
new file mode 100644
index 0000000000..ef71d3af98
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/VertexFallbackPool.hpp
@@ -0,0 +1,89 @@
+#pragma once
+
+#include <Gfx/Graph/VertexFallbackDefaults.hpp>
+
+#include <ossia/detail/hash_map.hpp>
+
+#include <score_plugin_gfx_export.h>
+
+#include <cstdint>
+
+class QRhi;
+class QRhiBuffer;
+class QRhiResourceUpdateBatch;
+
+namespace score::gfx
+{
+
+// Shared pool of tiny (4–16 byte) PerInstance step_rate=1 vertex
+// buffers used to satisfy "REQUIRED: false" VERTEX_INPUTS whose
+// upstream geometry does not provide a matching attribute.
+//
+// Lifetime-owned by the RenderList (same scope as GpuResourceRegistry).
+// Lookup key includes the format, stride, and a hash of the payload so
+// different DEFAULT values on the same semantic don't share a buffer.
+// A typical session touches ~5–10 distinct buckets; total footprint is
+// sub-kilobyte.
+//
+// Not thread-safe: designed for single-threaded render-thread access.
+class SCORE_PLUGIN_GFX_EXPORT VertexFallbackPool
+{
+public:
+  struct Entry
+  {
+    QRhiBuffer* buffer{};   // VertexBuffer | Immutable, exactly `stride` bytes
+    uint32_t stride{};      // matches spec.stride_bytes
+    int format{};           // matches spec.format (ossia::geometry::attribute::format)
+  };
+
+  VertexFallbackPool() = default;
+  ~VertexFallbackPool();
+
+  VertexFallbackPool(const VertexFallbackPool&) = delete;
+  VertexFallbackPool& operator=(const VertexFallbackPool&) = delete;
+
+  // Returns (and lazily creates) the shared buffer matching `spec`.
+  // The first call per key allocates a QRhiBuffer and records an
+  // upload on `batch`; subsequent calls return the cached buffer and
+  // do not touch `batch`.
+  //
+  // `rhi` and `batch` must be valid. The returned buffer is valid
+  // until release() is called.
+  Entry acquire(QRhi& rhi, QRhiResourceUpdateBatch& batch,
+                const VertexFallbackSpec& spec);
+
+  // Destroy every cached buffer and clear the pool. Called by the
+  // owning RenderList on teardown.
+  void release();
+
+  // Diagnostic only.
+  std::size_t size() const noexcept { return m_entries.size(); }
+
+private:
+  struct Key
+  {
+    int format{};
+    uint32_t stride{};
+    uint64_t payload_hash{};
+
+    bool operator==(const Key& o) const noexcept
+    {
+      return format == o.format && stride == o.stride
+             && payload_hash == o.payload_hash;
+    }
+  };
+  struct KeyHash
+  {
+    std::size_t operator()(const Key& k) const noexcept
+    {
+      // Cheap mix — keys are already high-entropy via payload_hash.
+      return (std::size_t)(k.payload_hash
+                           ^ ((uint64_t)k.format << 32)
+                           ^ (uint64_t)k.stride);
+    }
+  };
+
+  ossia::hash_map<Key, Entry, KeyHash> m_entries;
+};
+
+} // namespace score::gfx
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/VideoNodeRenderer.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/VideoNodeRenderer.cpp
index fb886132bc..1fce43b822 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/VideoNodeRenderer.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/VideoNodeRenderer.cpp
@@ -75,6 +75,8 @@ void VideoNodeRenderer::setupGpuDecoder(RenderList& r)
     m_p.clear();
   }
 
+  m_shaders = {};
+
   createGpuDecoder();
 
   createPipelines(r);
@@ -84,11 +86,11 @@ void VideoNodeRenderer::createPipelines(RenderList& r)
 {
   if(m_gpu)
   {
-    auto shaders = m_gpu->init(r);
+    m_shaders = m_gpu->init(r);
     SCORE_ASSERT(m_p.empty());
     score::gfx::defaultPassesInit(
-        m_p, this->node().output[0]->edges, r, r.defaultQuad(), shaders.first,
-        shaders.second, m_processUBO, m_materialUBO, m_gpu->samplers);
+        m_p, this->node().output[0]->edges, r, r.defaultQuad(), m_shaders.first,
+        m_shaders.second, m_processUBO, m_materialUBO, m_gpu->samplers);
   }
 }
 
@@ -113,7 +115,7 @@ void VideoNodeRenderer::checkFormat(RenderList& r, AVPixelFormat fmt, int w, int
   }
 }
 
-void VideoNodeRenderer::init(RenderList& renderer, QRhiResourceUpdateBatch& res)
+void VideoNodeRenderer::initState(RenderList& renderer, QRhiResourceUpdateBatch& res)
 {
   auto& rhi = *renderer.state.rhi;
 
@@ -136,8 +138,88 @@ void VideoNodeRenderer::init(RenderList& renderer, QRhiResourceUpdateBatch& res)
   if(!m_gpu)
     createGpuDecoder();
 
-  createPipelines(renderer);
+  // Cache the shaders from the GPU decoder (also creates its samplers/textures)
+  if(m_gpu)
+    m_shaders = m_gpu->init(renderer);
+
   m_recomputeScale = true;
+  m_initialized = true;
+}
+
+void VideoNodeRenderer::addOutputPass(
+    RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res)
+{
+  if(!m_gpu)
+    return;
+  if(!m_shaders.first.isValid() || !m_shaders.second.isValid())
+    return;
+
+  auto rt = renderer.renderTargetForOutput(edge);
+  if(rt.renderTarget)
+  {
+    auto pip = score::gfx::buildPipeline(
+        renderer, renderer.defaultQuad(), m_shaders.first, m_shaders.second, rt,
+        m_processUBO, m_materialUBO, m_gpu->samplers);
+    if(pip.pipeline)
+      m_p.emplace_back(&edge, Pass{rt, pip, nullptr});
+  }
+}
+
+void VideoNodeRenderer::removeOutputPass(RenderList& renderer, Edge& edge)
+{
+  auto it
+      = ossia::find_if(m_p, [&](const auto& p) { return p.first == &edge; });
+  if(it != m_p.end())
+  {
+    it->second.release();
+    m_p.erase(it);
+  }
+}
+
+bool VideoNodeRenderer::hasOutputPassForEdge(Edge& edge) const
+{
+  return ossia::find_if(m_p, [&](const auto& p) { return p.first == &edge; })
+         != m_p.end();
+}
+
+void VideoNodeRenderer::releaseState(RenderList& r)
+{
+  if(!m_initialized)
+    return;
+
+  if(m_gpu)
+    m_gpu->release(r);
+
+  delete m_processUBO;
+  m_processUBO = nullptr;
+
+  delete m_materialUBO;
+  m_materialUBO = nullptr;
+
+  for(auto& p : m_p)
+    p.second.release();
+  m_p.clear();
+
+  m_meshBuffer = {};
+  m_shaders = {};
+
+  if(m_currentFrame)
+  {
+    m_currentFrame->use_count--;
+    m_currentFrame.reset();
+  }
+
+  m_initialized = false;
+}
+
+void VideoNodeRenderer::init(RenderList& renderer, QRhiResourceUpdateBatch& res)
+{
+  initState(renderer, res);
+
+  for(Edge* edge : this->node().output[0]->edges)
+  {
+    addOutputPass(renderer, *edge, res);
+  }
 }
 
 void VideoNodeRenderer::runRenderPass(
@@ -235,25 +317,6 @@ void VideoNodeRenderer::displayFrame(
 
 void VideoNodeRenderer::release(RenderList& r)
 {
-  if(m_gpu)
-    m_gpu->release(r);
-
-  delete m_processUBO;
-  m_processUBO = nullptr;
-
-  delete m_materialUBO;
-  m_materialUBO = nullptr;
-
-  for(auto& p : m_p)
-    p.second.release();
-  m_p.clear();
-
-  m_meshBuffer = {};
-
-  if(m_currentFrame)
-  {
-    m_currentFrame->use_count--;
-    m_currentFrame.reset();
-  }
+  releaseState(r);
 }
 }
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/VideoNodeRenderer.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/VideoNodeRenderer.hpp
index 298760a934..1c58114eeb 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/VideoNodeRenderer.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/VideoNodeRenderer.hpp
@@ -32,6 +32,13 @@ class VideoNodeRenderer : public NodeRenderer
   void update(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) override;
   void release(RenderList& r) override;
 
+  void initState(RenderList& renderer, QRhiResourceUpdateBatch& res) override;
+  void releaseState(RenderList& renderer) override;
+  void addOutputPass(
+      RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) override;
+  void removeOutputPass(RenderList& renderer, Edge& edge) override;
+  bool hasOutputPassForEdge(Edge& edge) const override;
+
 private:
   void createPipelines(RenderList& r);
   void displayFrame(AVFrame& frame, RenderList& renderer, QRhiResourceUpdateBatch& res);
@@ -55,6 +62,7 @@ class VideoNodeRenderer : public NodeRenderer
   };
 
   std::unique_ptr<GPUVideoDecoder> m_gpu;
+  std::pair<QShader, QShader> m_shaders;
 
   Video::ImageFormat m_frameFormat{};
   score::gfx::ScaleMode m_currentScaleMode{};
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/VulkanVideoDevice.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/VulkanVideoDevice.hpp
index 3a0004800a..72a185a213 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/VulkanVideoDevice.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/VulkanVideoDevice.hpp
@@ -7,6 +7,13 @@
 #include <qvulkanfunctions.h>
 #include <vulkan/vulkan.h>
 
+#if __has_include(<vulkan/vulkan_win32.h>)
+#include <vulkan/vulkan.h>
+#ifdef Q_OS_WIN
+#include <vulkan/vulkan_win32.h>
+#endif
+#endif
+
 #include <cstring>
 #include <string>
 #include <vector>
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/Window.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/Window.cpp
index 0e59153621..59b6653b27 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/Window.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/Window.cpp
@@ -216,7 +216,7 @@ void Window::render()
 
     auto buf = m_swapChain->currentFrameCommandBuffer();
     auto batch = state->rhi->nextResourceUpdateBatch();
-    buf->beginPass(m_swapChain->currentFrameRenderTarget(), Qt::black, {1.0f, 0}, batch);
+    buf->beginPass(m_swapChain->currentFrameRenderTarget(), Qt::black, {0.0f, 0}, batch);
     buf->endPass();
 
     state->rhi->endFrame(m_swapChain, {});
@@ -248,12 +248,12 @@ void Window::exposeEvent(QExposeEvent* ev)
     resizeSwapChain();
   }
 
-  if(m_hasSwapChain && !m_swapChain)
-  {
-    qDebug("exposeEvent: m_hasSwapChain && !m_swapChain");
-    m_hasSwapChain = false;
-  }
-
+  // The (m_hasSwapChain, m_swapChain) pair is kept consistent at the
+  // teardown sites in ScreenNode (~ScreenNode, destroyOutput) and
+  // MultiWindowNode (releaseWindowSwapChain, destroyOutput): the flag is
+  // cleared and the alias is nulled BEFORE the QRhiSwapChain is released,
+  // so we can never observe (m_hasSwapChain == true && m_swapChain ==
+  // nullptr) here. See diagnostic 047.
   const QSize surfaceSize = m_hasSwapChain ? m_swapChain->surfacePixelSize() : QSize();
 
   if((!isExposed() || (m_hasSwapChain && surfaceSize.isEmpty())) && m_running)
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/decoders/GPUVideoDecoder.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/decoders/GPUVideoDecoder.cpp
index 3519255543..b592ae6a13 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/decoders/GPUVideoDecoder.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/decoders/GPUVideoDecoder.cpp
@@ -9,8 +9,8 @@ GPUVideoDecoder::~GPUVideoDecoder() { }
 
 void GPUVideoDecoder::release(RenderList&)
 {
-  for(auto [sampler, tex] : samplers)
-    tex->deleteLater();
+  for(auto& s : samplers)
+    if(s.texture) s.texture->deleteLater();
 
   for(auto sampler : samplers)
   {
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/decoders/HWD3D11.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/decoders/HWD3D11.hpp
index 6819dcd4fc..e7fba53804 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/decoders/HWD3D11.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/decoders/HWD3D11.hpp
@@ -19,7 +19,10 @@ extern "C" {
 
 #if defined(SCORE_HAS_D3D11_HWCONTEXT)
 
+// clang-format off
+#include <windows.h>
 #include <d3d11.h>
+// clang-format on
 
 namespace score::gfx
 {
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/decoders/HWD3D12.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/decoders/HWD3D12.hpp
index 762fa85095..0c5fda135b 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/decoders/HWD3D12.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/decoders/HWD3D12.hpp
@@ -19,7 +19,10 @@ extern "C" {
 
 #if defined(SCORE_HAS_D3D12_HWCONTEXT)
 
+// clang-format off
+#include <windows.h>
 #include <d3d12.h>
+// clang-format on
 
 namespace score::gfx
 {
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/decoders/Tonemap.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/decoders/Tonemap.hpp
index a63acb101e..8ca17bbf1e 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/decoders/Tonemap.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/decoders/Tonemap.hpp
@@ -403,13 +403,19 @@ vec3 agx(vec3 color) {
 }
 
 vec3 agxEotf(vec3 color) {
-  // AgX -> sRGB/BT.709 linear
+  // AgX outset (inverse of inset). The output of agxDefaultContrastApprox
+  // is in AgX's pseudo-sRGB-2.2-gamma space; we apply outset then the
+  // 2.2 EOTF to land in linear sRGB. Reference: iolite minimal AgX,
+  // https://iolite-engine.com/blog_posts/minimal_agx_implementation
   const mat3 agxInvTransform = mat3(
      1.19687900512017,  -0.0528968517574562, -0.0529716355144438,
     -0.0980208811401368,  1.15190312990417,  -0.0980434501171241,
     -0.0990297440797205, -0.0989611768448433,  1.15107367264116
   );
-  return agxInvTransform * color;
+  vec3 v = agxInvTransform * color;
+  // Without this gamma the output is display-non-linear but the caller
+  // treats it as linear -> shadows crushed, contrast over-steep.
+  return pow(max(v, vec3(0.0)), vec3(2.2));
 }
 
 vec3 tonemap(vec3 color) {
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/encoders/I420.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/encoders/I420.hpp
index 17b37ef558..40b1f39faf 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/encoders/I420.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/encoders/I420.hpp
@@ -145,7 +145,7 @@ struct I420Encoder : GPUVideoEncoder
 
   void execPlane(QRhi& rhi, QRhiCommandBuffer& cb, PlaneResources& plane, int w, int h)
   {
-    cb.beginPass(plane.rt, Qt::black, {1.0f, 0});
+    cb.beginPass(plane.rt, Qt::black, {0.0f, 0});
     cb.setGraphicsPipeline(plane.pipeline);
     cb.setShaderResources(plane.srb);
     cb.setViewport(QRhiViewport(0, 0, w, h));
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/encoders/NV12.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/encoders/NV12.hpp
index 644087a10f..6cb97dca31 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/encoders/NV12.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/encoders/NV12.hpp
@@ -159,7 +159,7 @@ struct NV12Encoder : GPUVideoEncoder
   void exec(QRhi& rhi, QRhiCommandBuffer& cb) override
   {
     // Pass 1: Y plane (full resolution)
-    cb.beginPass(m_yRT, Qt::black, {1.0f, 0});
+    cb.beginPass(m_yRT, Qt::black, {0.0f, 0});
     cb.setGraphicsPipeline(m_yPipeline);
     cb.setShaderResources(m_ySRB);
     cb.setViewport(QRhiViewport(0, 0, m_width, m_height));
@@ -170,7 +170,7 @@ struct NV12Encoder : GPUVideoEncoder
     cb.endPass(yReadbackBatch);
 
     // Pass 2: UV plane (half resolution)
-    cb.beginPass(m_uvRT, Qt::black, {1.0f, 0});
+    cb.beginPass(m_uvRT, Qt::black, {0.0f, 0});
     cb.setGraphicsPipeline(m_uvPipeline);
     cb.setShaderResources(m_uvSRB);
     cb.setViewport(QRhiViewport(0, 0, m_width / 2, m_height / 2));
diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/encoders/UYVY.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/encoders/UYVY.hpp
index bf3d2994b1..f57f123803 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Graph/encoders/UYVY.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Graph/encoders/UYVY.hpp
@@ -116,7 +116,7 @@ struct UYVYEncoder : GPUVideoEncoder
 
   void exec(QRhi& rhi, QRhiCommandBuffer& cb) override
   {
-    cb.beginPass(m_renderTarget, Qt::black, {1.0f, 0});
+    cb.beginPass(m_renderTarget, Qt::black, {0.0f, 0});
     cb.setGraphicsPipeline(m_pipeline);
     cb.setShaderResources(m_srb);
     cb.setViewport(QRhiViewport(0, 0, m_width / 2, m_height));
diff --git a/src/plugins/score-plugin-gfx/Gfx/Hashes.hpp b/src/plugins/score-plugin-gfx/Gfx/Hashes.hpp
new file mode 100644
index 0000000000..92e9f8587a
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Hashes.hpp
@@ -0,0 +1,35 @@
+#pragma once
+
+// Qt-aware adapters over ossia::hash (rapidhash). Centralises the
+// QString / QByteArray hashing pattern so cache keys across the gfx
+// pipeline produce the same stable values without each call site
+// re-deriving the trick of hashing the raw character buffer.
+//
+// All hashes here delegate to ossia::hash_bytes, which dispatches
+// to the appropriate rapidhash tier (Nano / Micro / full) based on
+// size. Use these — not qHash, not std::hash<QString> — for any
+// in-memory cache key in this plugin.
+
+#include <ossia/detail/hash.hpp>
+
+#include <QByteArray>
+#include <QString>
+
+#include <cstddef>
+#include <cstdint>
+
+namespace score::gfx
+{
+
+inline uint64_t hash_qstring(const QString& s) noexcept
+{
+  return ossia::hash_bytes(
+      s.constData(), (std::size_t)s.size() * sizeof(QChar));
+}
+
+inline uint64_t hash_qbytearray(const QByteArray& b) noexcept
+{
+  return ossia::hash_bytes(b.constData(), (std::size_t)b.size());
+}
+
+} // namespace score::gfx
diff --git a/src/plugins/score-plugin-gfx/Gfx/ISFProcess.hpp b/src/plugins/score-plugin-gfx/Gfx/ISFProcess.hpp
index d0f9b4cee6..2f793889c8 100644
--- a/src/plugins/score-plugin-gfx/Gfx/ISFProcess.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/ISFProcess.hpp
@@ -16,6 +16,8 @@
 
 #include <isf.hpp>
 
+#include <algorithm>
+
 namespace Gfx
 {
 struct ISFHelpers
@@ -79,6 +81,10 @@ struct ISFHelpers
       const isf::input& input;
       const int i;
       T& self;
+      // Outlet id allocator for write-access storage / image inputs. Starts at
+      // a high base so it never collides with inlet ids (input index `i`), the
+      // default "Texture Out" outlet (id 1), or the MRT outlet base (10000).
+      int& outlet_id;
 
       Process::Inlet* operator()(const float_input& v)
       {
@@ -168,8 +174,18 @@ struct ISFHelpers
           alternatives.emplace_back("2", 2);
         }
 
+        // ComboBox::init expects the VALUE to be initially selected, not
+        // an index. libisf's `v.def` is the INDEX into values for enum
+        // mode — passing it raw was making `DEFAULT: <value>` silently
+        // fall back to alternatives[0] when <value> didn't equal a valid
+        // index. Look up the alternative at v.def and forward its value.
+        // Same fix lives in CSF/Process.cpp + GeometryFilter/Process.cpp.
+        const std::size_t def_idx
+            = std::min<std::size_t>(v.def, alternatives.size() - 1);
+        const ossia::value& init_value = alternatives[def_idx].second;
+
         auto port = new Process::ComboBox(
-            std::move(alternatives), (int)v.def, nm, Id<Process::Port>(i), &self);
+            std::move(alternatives), init_value, nm, Id<Process::Port>(i), &self);
 
         if(auto it = previous_values.find(nm);
            it != previous_values.end()
@@ -340,23 +356,134 @@ struct ISFHelpers
       }
       
       // CSF-specific input handlers
-      Process::Inlet* operator()(const storage_input& v) { return nullptr; }
-      Process::Inlet* operator()(const texture_input& v) { return nullptr; }
-      Process::Inlet* operator()(const csf_image_input& v) { return nullptr; }
+      Process::Inlet* operator()(const storage_input& v)
+      {
+        // Mirror the renderer (isf_input_port_vis in ISFNode.cpp): the access
+        // qualifier decides inlet vs outlet. Treating every storage_input as a
+        // read inlet gave write buffers a phantom TextureInlet — shifting every
+        // later port by one (positional routing) and never exposing the
+        // TextureOutlet the renderer actually produces.
+        if(v.access == "read_only")
+        {
+          // read inlet: an upstream Buffer-producing node (ScenePreprocessor's
+          // scene_* auxes, ExtractBuffer2 outputs, ...) has a target to land on.
+          // For aux-named storage_inputs the RawRaster renderer also auto-binds
+          // by name, so this inlet is optional but allows explicit wiring.
+          auto port = new Gfx::TextureInlet(
+              QString::fromStdString(input.name), Id<Process::Port>(i), &self);
+          self.m_inlets.push_back(port);
+          return port;
+        }
+
+        // write_only / read_write: the renderer pushes a Buffer OUTPUT port for
+        // the produced SSBO so downstream nodes can connect to it.
+        auto outport = new Gfx::TextureOutlet(
+            QString::fromStdString(input.name), Id<Process::Port>(outlet_id++),
+            &self);
+        self.m_outlets.push_back(outport);
+
+        // Conditional sizing inlet: only buffers whose layout ends in a
+        // flexible-array member synthesize a "size" control — SAME condition as
+        // CSF/Process.cpp setupCSF, the renderer, and the generated GLSL.
+        if(!v.layout.empty()
+           && v.layout.back().type.find("[]") != std::string::npos)
+        {
+          auto size_inl = new Process::IntSpinBox{
+              1, 536870911, 1024,
+              QString::fromStdString(input.name) + " size",
+              Id<Process::Port>(i), &self};
+          self.m_inlets.push_back(size_inl);
+          self.controlAdded(size_inl->id());
+          return size_inl;
+        }
+        return nullptr;
+      }
+      Process::Inlet* operator()(const uniform_input& v)
+      {
+        // uniform_input expects an upstream Buffer port (ScenePreprocessor's
+        // camera/env aux buffers, ExtractBuffer2 outputs, etc.). TextureInlet
+        // is score's Process-layer inlet for SSBO / texture / UBO data flow.
+        // Without this, the Process model has no inlet for the cable to land
+        // on and Score.inlet(proc, i) returns null.
+        auto port = new Gfx::TextureInlet(
+            QString::fromStdString(input.name), Id<Process::Port>(i), &self);
+        self.m_inlets.push_back(port);
+        return port;
+      }
+      Process::Inlet* operator()(const texture_input& v)
+      {
+        // The renderer (isf_input_port_vis) creates an Image input port for
+        // every texture_input; returning nullptr here dropped the inlet and
+        // shifted all subsequent ports (same off-by-one drift family as the
+        // storage / csf_image cases).
+        auto port = new Gfx::TextureInlet(
+            QString::fromStdString(input.name), Id<Process::Port>(i), &self);
+        self.m_inlets.push_back(port);
+        return port;
+      }
+      Process::Inlet* operator()(const csf_image_input& v)
+      {
+        // Mirror the renderer: read_only → input port (an upstream texture
+        // cable lands on it); write_only / read_write → output port for the
+        // produced storage image. Always creating an inlet gave write images a
+        // phantom inlet (port shift) and no outlet for downstream connection.
+        if(v.access == "read_only")
+        {
+          auto port = new Gfx::TextureInlet(
+              QString::fromStdString(input.name), Id<Process::Port>(i), &self);
+          self.m_inlets.push_back(port);
+          return port;
+        }
+        auto outport = new Gfx::TextureOutlet(
+            QString::fromStdString(input.name), Id<Process::Port>(outlet_id++),
+            &self);
+        self.m_outlets.push_back(outport);
+        return nullptr;
+      }
       Process::Inlet* operator()(const geometry_input& v) { return nullptr; }
     };
 
+    // Outlet ids for write-access storage / image inputs. Base 20000 keeps
+    // them clear of inlet ids (input index), the default outlet (id 1) and the
+    // MRT base (10000), and lets the MRT block below tell them apart.
+    static constexpr int storage_outlet_base = 20000;
+    int outlet_id = storage_outlet_base;
+
     for(const isf::input& input : desc.inputs)
     {
-      ossia::visit(input_vis{previous_values, input, i, self}, input.data);
+      ossia::visit(input_vis{previous_values, input, i, self, outlet_id}, input.data);
       i++;
     }
 
-    // MRT: recreate outlets from OUTPUTS declarations
+    // The renderer (isf_input_port_vis) pushes write-storage / write-image
+    // OUTPUT ports first (in input order), then the color / MRT outputs. The
+    // model's outlets must follow the same order for positional routing. The
+    // default "Texture Out" outlet was created by the constructor *before* this
+    // loop, so it currently sits ahead of any storage outlets — pull the
+    // storage outlets (ids >= storage_outlet_base) to the front to match.
+    {
+      std::stable_partition(
+          self.m_outlets.begin(), self.m_outlets.end(),
+          [](Process::Outlet* o) { return o->id().val() >= storage_outlet_base; });
+    }
+
+    // MRT: recreate the color outlets from OUTPUTS declarations. Preserve the
+    // storage / image write outlets (ids >= storage_outlet_base); only the
+    // color / default outlets are replaced.
     if(!desc.outputs.empty())
     {
-      qDeleteAll(self.m_outlets);
-      self.m_outlets.clear();
+      for(auto it = self.m_outlets.begin(); it != self.m_outlets.end();)
+      {
+        if((*it)->id().val() < storage_outlet_base)
+        {
+          delete *it;
+          it = self.m_outlets.erase(it);
+        }
+        else
+        {
+          ++it;
+        }
+      }
 
       int outId = 10000; // High base to avoid ID collisions with inlets
       for(const auto& out : desc.outputs)
diff --git a/src/plugins/score-plugin-gfx/Gfx/InvertYRenderer.cpp b/src/plugins/score-plugin-gfx/Gfx/InvertYRenderer.cpp
index 92b1220bbf..ae9d4d1cb1 100644
--- a/src/plugins/score-plugin-gfx/Gfx/InvertYRenderer.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/InvertYRenderer.cpp
@@ -83,7 +83,7 @@ void InvertYRenderer::finishFrame(
     score::gfx::RenderList& renderer, QRhiCommandBuffer& cb,
     QRhiResourceUpdateBatch*& res)
 {
-  cb.beginPass(m_renderTarget.renderTarget, Qt::black, {1.0f, 0}, res);
+  cb.beginPass(m_renderTarget.renderTarget, Qt::black, {0.0f, 0}, res);
   res = nullptr;
   {
     const auto sz = renderer.state.renderSize;
@@ -170,7 +170,7 @@ void ScaledRenderer::runRenderPass(score::gfx::RenderList &, QRhiCommandBuffer &
 
 void ScaledRenderer::finishFrame(score::gfx::RenderList &renderer, QRhiCommandBuffer &cb, QRhiResourceUpdateBatch *&res)
 {
-  cb.beginPass(m_renderTarget.renderTarget, Qt::black, {1.0f, 0}, res);
+  cb.beginPass(m_renderTarget.renderTarget, Qt::black, {0.0f, 0}, res);
   res = nullptr;
   {
     const auto sz = renderer.state.outputSize;
diff --git a/src/plugins/score-plugin-gfx/Gfx/Libav/LibavEncoderNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Libav/LibavEncoderNode.cpp
index 1bf8e44729..a56737e626 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Libav/LibavEncoderNode.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Libav/LibavEncoderNode.cpp
@@ -153,18 +153,15 @@ score::gfx::RenderList* LibavEncoderNode::renderer() const
 
 void LibavEncoderNode::createOutput(score::gfx::OutputConfiguration conf)
 {
-  m_renderState = std::make_shared<score::gfx::RenderState>();
-
-  m_renderState->surface = QRhiGles2InitParams::newFallbackSurface();
-  QRhiGles2InitParams params;
-  params.fallbackSurface = m_renderState->surface;
-  score::GLCapabilities caps;
-  caps.setupFormat(params.format);
-  m_renderState->rhi = QRhi::create(QRhi::OpenGLES2, &params, {});
-  m_renderState->renderSize = QSize(m_settings.width, m_settings.height);
+  m_renderState = score::gfx::createRenderState(
+      conf.graphicsApi, QSize(m_settings.width, m_settings.height), nullptr);
+  if(!m_renderState || !m_renderState->rhi)
+  {
+    qWarning() << "LibavEncoderNode: failed to create QRhi";
+    m_renderState.reset();
+    return;
+  }
   m_renderState->outputSize = m_renderState->renderSize;
-  m_renderState->api = score::gfx::GraphicsApi::OpenGL;
-  m_renderState->version = caps.qShaderVersion;
 
   auto rhi = m_renderState->rhi;
   m_texture = rhi->newTexture(
@@ -226,6 +223,11 @@ void LibavEncoderNode::destroyOutput()
 
   if(m_renderState)
   {
+    // Persist-across-rebuild contract: registry survives RL teardown,
+    // so we tear down its QRhi resources here BEFORE
+    // RenderState::destroy() (called below) frees the device.
+    releaseRegistry();
+
     delete m_renderTarget;
     m_renderTarget = nullptr;
     delete m_renderState->renderPassDescriptor;
@@ -234,10 +236,10 @@ void LibavEncoderNode::destroyOutput()
     m_depthStencil = nullptr;
     delete m_texture;
     m_texture = nullptr;
-    delete m_renderState->rhi;
-    m_renderState->rhi = nullptr;
-    delete m_renderState->surface;
-    m_renderState->surface = nullptr;
+    // RenderState::destroy() flushes the pipeline cache via preRhiDestroy
+    // and then deletes rhi + surface. Doing the deletes manually (the
+    // previous approach) bypassed the cache flush.
+    m_renderState->destroy();
     m_renderState.reset();
   }
 }
diff --git a/src/plugins/score-plugin-gfx/Gfx/Sh4lt/Sh4ltOutputDevice.cpp b/src/plugins/score-plugin-gfx/Gfx/Sh4lt/Sh4ltOutputDevice.cpp
index e6ffc374cc..2593b9a31f 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Sh4lt/Sh4ltOutputDevice.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Sh4lt/Sh4ltOutputDevice.cpp
@@ -6,6 +6,7 @@
 #include <Gfx/Graph/NodeRenderer.hpp>
 #include <Gfx/Graph/OutputNode.hpp>
 #include <Gfx/Graph/RenderList.hpp>
+#include <Gfx/Graph/RenderState.hpp>
 #include <Gfx/InvertYRenderer.hpp>
 
 #include <score/gfx/OpenGL.hpp>
@@ -191,18 +192,15 @@ void Sh4ltOutputNode::createOutput(score::gfx::OutputConfiguration conf)
           sh4lt::ShType::default_group()),
       m_settings.width * m_settings.height * 4, m_logger);
   m_frame_dur = 1e9 / m_settings.rate;
-  m_renderState = std::make_shared<score::gfx::RenderState>();
-
-  m_renderState->surface = QRhiGles2InitParams::newFallbackSurface();
-  QRhiGles2InitParams params;
-  params.fallbackSurface = m_renderState->surface;
-  score::GLCapabilities caps;
-  caps.setupFormat(params.format);
-  m_renderState->rhi = QRhi::create(QRhi::OpenGLES2, &params, {});
-  m_renderState->renderSize = QSize(m_settings.width, m_settings.height);
+  m_renderState = score::gfx::createRenderState(
+      conf.graphicsApi, QSize(m_settings.width, m_settings.height), nullptr);
+  if(!m_renderState || !m_renderState->rhi)
+  {
+    qWarning() << "Sh4ltOutputNode: failed to create QRhi";
+    m_renderState.reset();
+    return;
+  }
   m_renderState->outputSize = m_renderState->renderSize;
-  m_renderState->api = score::gfx::GraphicsApi::OpenGL;
-  m_renderState->version = caps.qShaderVersion;
 
   auto rhi = m_renderState->rhi;
   m_texture = rhi->newTexture(
@@ -221,6 +219,26 @@ void Sh4ltOutputNode::createOutput(score::gfx::OutputConfiguration conf)
 void Sh4ltOutputNode::destroyOutput()
 {
   m_writer.reset();
+
+  if(!m_renderState)
+    return;
+
+  // Persist-across-rebuild contract: registry survives RL teardown,
+  // so we tear down its QRhi resources here BEFORE
+  // RenderState::destroy() (called below) frees the device.
+  releaseRegistry();
+
+  delete m_renderTarget;
+  m_renderTarget = nullptr;
+
+  delete m_renderState->renderPassDescriptor;
+  m_renderState->renderPassDescriptor = nullptr;
+
+  delete m_texture;
+  m_texture = nullptr;
+
+  m_renderState->destroy();
+  m_renderState.reset();
 }
 
 std::shared_ptr<score::gfx::RenderState> Sh4ltOutputNode::renderState() const
diff --git a/src/plugins/score-plugin-gfx/Gfx/ShaderProgram.cpp b/src/plugins/score-plugin-gfx/Gfx/ShaderProgram.cpp
index 5759c0d6bc..04497b3b64 100644
--- a/src/plugins/score-plugin-gfx/Gfx/ShaderProgram.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/ShaderProgram.cpp
@@ -7,11 +7,13 @@
 #include <score/application/ApplicationContext.hpp>
 
 #include <ossia/detail/flat_map.hpp>
+#include <ossia/detail/hash_map.hpp>
 
 #include <QDir>
 #include <QFile>
 #include <QFileInfo>
 #include <QRegularExpression>
+
 namespace Gfx
 {
 
@@ -20,15 +22,35 @@ namespace
 
 QStringList shaderIncludePaths()
 {
-  // Resolve includes ; for now we have one hardcoded library...
   QStringList shaderIncludePath;
 
-  // FIXME refactor that !
+  // Default path: the library packages dir so users' own GLSL snippets
+  // drop in without ceremony. Additional search roots are expected to be
+  // supplied via a user-facing include-paths GUI (not yet wired up) —
+  // no static registration mechanism lives here anymore.
   auto& lib_settings = score::AppContext().settings<Library::Settings::Model>();
+  const QString lib_path = lib_settings.getPackagesPath();
+  if(QDir{}.exists(lib_path))
   {
-    QString lib_path = lib_settings.getPackagesPath();
-    if(QDir{}.exists(lib_path))
-      shaderIncludePath.append(lib_path);
+    shaderIncludePath.append(lib_path);
+
+    // Also register every first-level subdirectory of `packages/` so
+    // shader libraries shipping as standalone packages (openpbr/,
+    // lygia/, MaterialX/, …) can be `#include`d by their bare header
+    // name from any user shader without the consumer having to know
+    // the install layout. Internal cross-includes inside a library
+    // keep working via the origin-dir-first lookup in
+    // tryResolveQuoted.
+    //
+    // Collision policy: if two libraries ship the same header
+    // basename, the one earlier in QDir iteration order wins. In
+    // practice shader libs prefix their headers (`openpbr_*.h`) so
+    // collisions are vanishingly unlikely.
+    QDir packagesDir{lib_path};
+    const auto subdirs = packagesDir.entryList(
+        QDir::Dirs | QDir::NoDotAndDotDot, QDir::Name);
+    for(const auto& sub : subdirs)
+      shaderIncludePath.append(packagesDir.filePath(sub));
   }
 
   return shaderIncludePath;
@@ -113,53 +135,233 @@ void updateToGlsl45(ShaderSource& program)
   program.fragment.remove("highp ");
 }
 
-static bool resolveGLSLIncludes(
-    QByteArray& data, const QStringList& includes, QString rootPath, int iterations);
-
-static std::optional<QByteArray> resolveFile_relative(
-    const QString& name, const QStringList& includes, const QString& rootPath,
-    int iterations)
+// Resolver state shared across recursive include expansion.
+//
+// `searchPaths` holds roots applied to both quoted and bracketed
+// includes. `originDir` is the directory the current buffer was loaded
+// from; it becomes the first place quoted includes are looked up and is
+// pushed/popped as we descend into included files so relative headers
+// resolve against their own sibling dir, not the top-level shader's.
+// `visited` holds canonicalised paths already expanded in the current
+// chain — revisiting one is a cycle.
+struct IncludeContext
+{
+  QStringList searchPaths;
+  QString originDir;
+  ossia::hash_set<QString> visited;
+  int depth = 0;
+  int maxDepth = 16;
+  QString error;             // first fatal error encountered
+  QStringList missing;       // unresolved headers, for diagnostics
+};
+
+static void removeIncludesInComments(QByteArray& data);
+static QByteArray resolveIncludes(QByteArray data, IncludeContext& ctx);
+
+static std::optional<QString> tryResolveQuoted(
+    const QString& header, const IncludeContext& ctx)
 {
-  QFile f{rootPath + "/" + name};
-  if(f.open(QIODevice::ReadOnly))
+  // Quoted: origin dir first, then search paths.
+  if(!ctx.originDir.isEmpty())
   {
-    QByteArray res = f.readAll();
-    if(resolveGLSLIncludes(res, includes, QFileInfo{f}.absolutePath(), iterations))
-      return res;
-    return std::nullopt;
+    const QString candidate = ctx.originDir + QLatin1Char('/') + header;
+    if(QFileInfo::exists(candidate))
+      return QFileInfo{candidate}.canonicalFilePath();
   }
-  return {};
+  for(const auto& path : ctx.searchPaths)
+  {
+    const QString candidate = path + QLatin1Char('/') + header;
+    if(QFileInfo::exists(candidate))
+      return QFileInfo{candidate}.canonicalFilePath();
+  }
+  return std::nullopt;
 }
 
-static std::optional<QByteArray>
-resolveFile_in_paths(const QString& name, const QStringList& includes, int iterations)
+static std::optional<QString> tryResolveBracketed(
+    const QString& header, const IncludeContext& ctx)
 {
-  for(auto& path : includes)
+  // Bracketed: search paths only (no origin-dir lookup).
+  for(const auto& path : ctx.searchPaths)
   {
-    if(auto res = resolveFile_relative(name, includes, path, iterations))
-      return res;
+    const QString candidate = path + QLatin1Char('/') + header;
+    if(QFileInfo::exists(candidate))
+      return QFileInfo{candidate}.canonicalFilePath();
   }
   return std::nullopt;
 }
 
-static std::optional<QByteArray> resolveFile_quotes(
-    const QString& name, const QStringList& includes, const QString& rootPath,
-    int iterations)
+// Expand one resolved include file into `ctx`-tracked source, emitting
+// `#line` markers so glslang error messages point at the included file.
+// On cycle / depth / unreadable-file failure, sets ctx.error and returns
+// an empty byte array (caller must abort).
+static QByteArray expandFile(
+    const QString& canonicalPath, IncludeContext& ctx, int parentLine,
+    const QString& parentPath)
 {
-  if(auto res = resolveFile_relative(name, includes, rootPath, iterations))
-    return res;
-  if(auto res = resolveFile_in_paths(name, includes, iterations))
-    return res;
-  return std::nullopt;
+  if(ctx.depth >= ctx.maxDepth)
+  {
+    ctx.error = QStringLiteral("Shader include depth limit (%1) exceeded at '%2'")
+                    .arg(ctx.maxDepth)
+                    .arg(canonicalPath);
+    return {};
+  }
+  if(ctx.visited.contains(canonicalPath))
+  {
+    ctx.error
+        = QStringLiteral("Shader include cycle detected: '%1' re-entered")
+              .arg(canonicalPath);
+    return {};
+  }
+
+  QFile f{canonicalPath};
+  if(!f.open(QIODevice::ReadOnly))
+  {
+    ctx.error
+        = QStringLiteral("Shader include: failed to read '%1'").arg(canonicalPath);
+    return {};
+  }
+  QByteArray body = f.readAll();
+
+  // Recurse with a pushed origin dir so relative includes in this file
+  // resolve against its own sibling dir. Save/restore on return.
+  const QString savedOriginDir = ctx.originDir;
+  ctx.originDir = QFileInfo{canonicalPath}.absolutePath();
+  ctx.visited.insert(canonicalPath);
+  ctx.depth++;
+
+  QByteArray expanded = resolveIncludes(std::move(body), ctx);
+
+  ctx.depth--;
+  ctx.visited.erase(canonicalPath);
+  ctx.originDir = savedOriginDir;
+
+  if(!ctx.error.isEmpty())
+    return {};
+
+  // Frame with #line markers: enter the included file at line 1, return
+  // to the parent at the line just after the #include directive. We pass
+  // filenames through as string tokens — glslang accepts that form.
+  QByteArray framed;
+  framed.reserve(expanded.size() + 256);
+  framed.append("#line 1 \"");
+  framed.append(canonicalPath.toUtf8());
+  framed.append("\"\n");
+  framed.append(expanded);
+  if(!framed.endsWith('\n'))
+    framed.append('\n');
+  framed.append("#line ");
+  framed.append(QByteArray::number(parentLine + 1));
+  framed.append(" \"");
+  framed.append(parentPath.toUtf8());
+  framed.append("\"\n");
+  return framed;
 }
 
-static std::optional<QByteArray> resolveFile_brackets(
-    const QString& name, const QStringList& includes, const QString& rootPath,
-    int iterations)
+// Single-pass textual expansion. Walks from top to bottom, replacing
+// each `#include` line with the (already-expanded) body of the target.
+// Comments are neutralised before the scan so `#include` inside // or /*
+// doesn't trigger.
+static QByteArray resolveIncludes(QByteArray data, IncludeContext& ctx)
 {
-  if(auto res = resolveFile_in_paths(name, includes, iterations))
-    return res;
-  return std::nullopt;
+  removeIncludesInComments(data);
+
+  // Anchor to start-of-line (optional leading whitespace only) so an
+  // `#include "..."` substring inside an #error string or a string-
+  // literal payload doesn't get misidentified as a directive. The
+  // openpbr headers exercise this: `#error "... Add #include
+  // <glm/glm.hpp> ..."` would otherwise trip a "<glm/glm.hpp> not found"
+  // hard error even though no actual GLSL include is needed.
+  static const QRegularExpression quoted{
+      R"_(^\s*#include\s*"([^"]+)")_",
+      QRegularExpression::MultilineOption};
+  static const QRegularExpression bracket{
+      R"_(^\s*#include\s*<([^>]+)>)_",
+      QRegularExpression::MultilineOption};
+
+  QByteArray out;
+  out.reserve(data.size());
+
+  // Lightweight "current file" tag for the parent-line #line marker;
+  // when the outer buffer came from disk, originDir points to the file's
+  // dir but we don't have the filename itself — fall back to "<shader>"
+  // for in-memory / unknown roots.
+  const QString parentPath
+      = ctx.originDir.isEmpty() ? QStringLiteral("<shader>") : ctx.originDir;
+
+  int cursor = 0;
+  int line = 1;
+  while(cursor < data.size())
+  {
+    const int eol = data.indexOf('\n', cursor);
+    const int lineEnd = eol == -1 ? data.size() : eol;
+    const QByteArray lineBytes = data.mid(cursor, lineEnd - cursor);
+
+    // Only scan lines that look like include directives at all.
+    const int hashIdx = lineBytes.indexOf('#');
+    if(hashIdx != -1 && lineBytes.indexOf("include", hashIdx) != -1)
+    {
+      const QString lineStr = QString::fromUtf8(lineBytes);
+      if(auto m = quoted.match(lineStr); m.hasMatch())
+      {
+        const QString header = m.captured(1);
+        if(auto resolved = tryResolveQuoted(header, ctx))
+        {
+          QByteArray body = expandFile(*resolved, ctx, line, parentPath);
+          if(!ctx.error.isEmpty())
+            return {};
+          out.append(body);
+          cursor = lineEnd + (eol == -1 ? 0 : 1);
+          line++;
+          continue;
+        }
+        ctx.missing.push_back(header);
+        ctx.error = QStringLiteral(
+                        "Shader include not found: \"%1\" (searched: %2)")
+                        .arg(header)
+                        .arg(ctx.originDir.isEmpty()
+                                 ? ctx.searchPaths.join(", ")
+                                 : (ctx.originDir + QStringLiteral(", ")
+                                    + ctx.searchPaths.join(", ")));
+        return {};
+      }
+      if(auto m = bracket.match(lineStr); m.hasMatch())
+      {
+        const QString header = m.captured(1);
+        if(auto resolved = tryResolveBracketed(header, ctx))
+        {
+          QByteArray body = expandFile(*resolved, ctx, line, parentPath);
+          if(!ctx.error.isEmpty())
+            return {};
+          out.append(body);
+          cursor = lineEnd + (eol == -1 ? 0 : 1);
+          line++;
+          continue;
+        }
+        // Bracketed include not found: NON-fatal. Emit the line verbatim
+        // and let the downstream preprocessor (glslang/QShaderBaker)
+        // handle gating. This is what makes openpbr work without an
+        // `#if`-aware resolver: openpbr_interop.h pulls in
+        // `openpbr_interop_cpp.h` (gated by `#if defined(__cplusplus)`),
+        // which itself includes `<cstdint>` / `<cassert>`. We don't
+        // honour the `#if`, so we textually inline the C++ branch's
+        // contents — but glslang DOES honour the `#if`, sees that
+        // `__cplusplus` is undefined for shader compilation, and skips
+        // the entire C++ branch (including the orphan `<cstdint>`
+        // line) at preprocess time. Tracking in `missing` keeps the
+        // diagnostic visible if the user wants to debug.
+        ctx.missing.push_back(header);
+        // fall through to the verbatim-line append below
+      }
+    }
+
+    out.append(lineBytes);
+    if(eol != -1)
+      out.append('\n');
+    cursor = lineEnd + (eol == -1 ? 0 : 1);
+    line++;
+  }
+
+  return out;
 }
 
 static void removeIncludesInComments(QByteArray& data)
@@ -245,59 +447,6 @@ static void removeIncludesInComments(QByteArray& data)
   }
 }
 
-static bool resolveGLSLIncludes(
-    QByteArray& data, const QStringList& includes, QString rootPath, int iterations)
-{
-  removeIncludesInComments(data);
-
-  iterations++;
-  if(iterations > 1000)
-  {
-    qDebug() << "More than 1000 iterations, shader include loop likely. Stopping.";
-    return false;
-  }
-  int idx = data.indexOf("#include");
-  if(idx == -1)
-    return true;
-
-  int end_line = data.indexOf('\n', idx);
-  int len = end_line - idx;
-  static QRegularExpression quoted_include{R"_(#include\s*"(.*)")_"};
-  auto cap = quoted_include.match(data.mid(idx, len)).capturedTexts();
-  if(cap.size() == 2)
-  {
-    if(auto f = resolveFile_quotes(cap[1], includes, rootPath, iterations))
-    {
-      data.replace(idx, len, *f);
-    }
-    else
-    {
-      qDebug().noquote() << "Could not resolve: " << cap[0]
-                         << " while processing shader";
-      return false;
-    }
-  }
-  else
-  {
-    static QRegularExpression bracket_include{R"_(#include\s*<(.*)>)_"};
-    auto cap = bracket_include.match(data.mid(idx, len)).capturedTexts();
-    if(cap.size() == 2)
-    {
-      if(auto f = resolveFile_brackets(cap[1], includes, rootPath, iterations))
-      {
-        data.replace(idx, len, *f);
-      }
-      else
-      {
-        qDebug().noquote() << "Could not resolve: " << cap[0]
-                           << " while processing shader";
-        return false;
-      }
-    }
-  }
-
-  return resolveGLSLIncludes(data, includes, rootPath, iterations);
-}
 }
 
 ProgramCache& ProgramCache::instance() noexcept
@@ -307,19 +456,42 @@ ProgramCache& ProgramCache::instance() noexcept
 }
 
 std::pair<std::optional<ProcessedProgram>, QString>
-ProgramCache::get(const ShaderSource& program) noexcept
+ProgramCache::get(const ShaderSource& program, const QString& originPath) noexcept
 {
-  auto it = programs.find(program);
+  // Derive the origin dir once — it's both the cache-key disambiguator
+  // (two shaders with identical text but different origin dirs resolve
+  // different sibling includes and must not collide) and the first
+  // search root for quoted #include resolution.
+  const QString originDir
+      = originPath.isEmpty() ? QString{} : QFileInfo{originPath}.absolutePath();
+  const ProgramCacheKey cacheKey{program, originDir};
+
+  auto it = programs.find(cacheKey);
   if(it != programs.end())
     return {it->second, QString{}};
 
   try
   {
-    // Resolve includes
-    QByteArray source_frag = program.fragment.toUtf8();
-    QByteArray source_vert = program.vertex.toUtf8();
-    resolveGLSLIncludes(source_frag, shaderIncludePaths(), {}, 0);
-    resolveGLSLIncludes(source_vert, shaderIncludePaths(), {}, 0);
+    // Resolve includes. Empty originDir → in-memory source, falls back
+    // to the search paths only.
+    IncludeContext ctx;
+    ctx.searchPaths = shaderIncludePaths();
+    ctx.originDir = originDir;
+
+    QByteArray source_frag = resolveIncludes(program.fragment.toUtf8(), ctx);
+    if(!ctx.error.isEmpty())
+      return {std::nullopt, QStringLiteral("Fragment: ") + ctx.error};
+
+    // Reset per-file state (visited chain, depth, errors); keep search
+    // paths and origin dir across the two shader stages.
+    ctx.visited.clear();
+    ctx.depth = 0;
+    ctx.error.clear();
+    ctx.missing.clear();
+
+    QByteArray source_vert = resolveIncludes(program.vertex.toUtf8(), ctx);
+    if(!ctx.error.isEmpty())
+      return {std::nullopt, QStringLiteral("Vertex: ") + ctx.error};
 
     switch(program.type)
     {
@@ -387,7 +559,7 @@ ProgramCache::get(const ShaderSource& program) noexcept
 
           if(vertexS.isValid() && fragmentS.isValid())
           {
-            programs[program] = processed;
+            programs[cacheKey] = processed;
             return {processed, {}};
           }
         }
@@ -415,18 +587,30 @@ ShaderSource
 programFromISFFragmentShaderPath(const QString& fsFilename, QByteArray fsData)
 {
   // ISF works by storing a vertex shader next to the fragment shader.
-  QString vertexName = fsFilename;
-  vertexName.replace(".frag", ".vert");
-  vertexName.replace(".fs", ".vs");
+  // Score recognises both the long (.frag/.vert) and short (.fs/.vs)
+  // extension conventions; pairings are tried independently of the FS
+  // file's own naming so a `foo.frag` next to `foo.vs` (or `foo.fs` next
+  // to `foo.vert`) also resolves. Without this, the .vs sibling is
+  // silently ignored and the descriptor falls back to the ISF default
+  // vertex shader — which doesn't know about user-declared
+  // VERTEX_INPUTS, so the consumer renders nothing.
+  const QString candidates[] = {
+      QString(fsFilename).replace(".frag", ".vert").replace(".fs", ".vs"),
+      QString(fsFilename).replace(".frag", ".vs"),
+      QString(fsFilename).replace(".fs", ".vert"),
+  };
 
   // If empty: will be using the ISF's default
   QByteArray vertexData;
-  if(vertexName != fsFilename)
+  for(const QString& vertexName : candidates)
   {
+    if(vertexName == fsFilename)
+      continue;
     if(QFile vertexFile{vertexName};
        vertexFile.exists() && vertexFile.open(QIODevice::ReadOnly))
     {
       vertexData = vertexFile.readAll();
+      break;
     }
   }
 
@@ -469,4 +653,18 @@ programFromVSAVertexShaderPath(const QString& vertexFilename, QByteArray vertexD
 
   return {ShaderSource::ProgramType::VertexShaderArt, vertexData, ""};
 }
+
+std::pair<QByteArray, QString>
+preprocessShaderIncludes(QByteArray source, const QString& originPath) noexcept
+{
+  IncludeContext ctx;
+  ctx.searchPaths = shaderIncludePaths();
+  if(!originPath.isEmpty())
+    ctx.originDir = QFileInfo{originPath}.absolutePath();
+
+  QByteArray expanded = resolveIncludes(std::move(source), ctx);
+  if(!ctx.error.isEmpty())
+    return {{}, ctx.error};
+  return {std::move(expanded), {}};
+}
 }
diff --git a/src/plugins/score-plugin-gfx/Gfx/ShaderProgram.hpp b/src/plugins/score-plugin-gfx/Gfx/ShaderProgram.hpp
index 2bdcaaf6a6..c9c847ffb3 100644
--- a/src/plugins/score-plugin-gfx/Gfx/ShaderProgram.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/ShaderProgram.hpp
@@ -1,8 +1,10 @@
 #pragma once
 #include <Gfx/Graph/RenderState.hpp>
+#include <Gfx/Hashes.hpp>
 
 #include <score/tools/Debug.hpp>
 
+#include <ossia/detail/hash.hpp>
 #include <ossia/detail/hash_map.hpp>
 
 #include <QDebug>
@@ -93,7 +95,12 @@ struct SCORE_PLUGIN_GFX_EXPORT ShaderSource
   }
   friend bool operator==(const ShaderSource& lhs, const ShaderSource& rhs) noexcept
   {
-    return lhs.vertex == rhs.vertex && lhs.fragment == rhs.fragment;
+    // `type` MUST be part of equality: std::hash<ShaderSource> seeds with
+    // `type`, so two sources differing only by type hash differently. If ==
+    // ignored type they'd be "equal but unequal-hash", breaking the
+    // unordered-container invariant for ProgramCache / ProgramCacheKey.
+    return lhs.type == rhs.type && lhs.vertex == rhs.vertex
+           && lhs.fragment == rhs.fragment;
   }
   friend bool operator!=(const ShaderSource& lhs, const ShaderSource& rhs) noexcept
   {
@@ -117,6 +124,16 @@ struct SCORE_PLUGIN_GFX_EXPORT ShaderSource
 ShaderSource programFromISFFragmentShaderPath(const QString& fsFilename, QByteArray fsData);
 ShaderSource
 programFromVSAVertexShaderPath(const QString& vertexFilename, QByteArray vertexData);
+
+// Textual `#include` resolution for a single GLSL buffer. Used by
+// callers that want include support without going through the full
+// ProgramCache ISF pipeline — compute shaders are the current use case.
+// Returns the expanded source and a non-empty error string on failure
+// (missing header, include cycle, depth limit, …). The returned
+// QByteArray is empty iff the error is non-empty.
+SCORE_PLUGIN_GFX_EXPORT
+std::pair<QByteArray, QString>
+preprocessShaderIncludes(QByteArray source, const QString& originPath = {}) noexcept;
 }
 
 namespace std
@@ -126,14 +143,11 @@ struct hash<Gfx::ShaderSource>
 {
   std::size_t operator()(const Gfx::ShaderSource& program) const noexcept
   {
-    constexpr const QtPrivate::QHashCombine combine{
-#if QT_VERSION >= QT_VERSION_CHECK(6, 10, 0)
-        0
-#endif
-    };
-    std::size_t seed{};
-    seed = combine(seed, program.vertex);
-    seed = combine(seed, program.fragment);
+    // rapidhash via the gfx Qt-aware adapters; same primitive that
+    // produces content_hash values throughout the gfx pipeline.
+    std::size_t seed{(std::size_t)program.type};
+    ossia::hash_combine(seed, score::gfx::hash_qstring(program.vertex));
+    ossia::hash_combine(seed, score::gfx::hash_qstring(program.fragment));
     return seed;
   }
 };
@@ -146,13 +160,52 @@ struct ProcessedProgram : ShaderSource
   isf::descriptor descriptor;
 };
 
+// Cache key. `originDir` is the *canonical directory* the shader was
+// loaded from (derived by the cache from the caller-supplied origin
+// path). Keying on both means two models loading the same source text
+// from different directories don't collide — include resolution against
+// each shader's own sibling dir stays correct.
+struct ProgramCacheKey
+{
+  ShaderSource source;
+  QString originDir;
+
+  friend bool
+  operator==(const ProgramCacheKey& a, const ProgramCacheKey& b) noexcept
+  {
+    return a.source == b.source && a.originDir == b.originDir;
+  }
+};
+}
+
+namespace std
+{
+template <>
+struct hash<Gfx::ProgramCacheKey>
+{
+  std::size_t operator()(const Gfx::ProgramCacheKey& k) const noexcept
+  {
+    std::size_t seed = std::hash<Gfx::ShaderSource>{}(k.source);
+    ossia::hash_combine(seed, score::gfx::hash_qstring(k.originDir));
+    return seed;
+  }
+};
+}
+
+namespace Gfx
+{
 struct SCORE_PLUGIN_GFX_EXPORT ProgramCache
 {
   static ProgramCache& instance() noexcept;
+
+  // `originPath` is the absolute path of the shader file the source was
+  // loaded from, used as the base for quoted `#include "..."` resolution
+  // and as part of the cache key. Empty when the source is in-memory
+  // with no associated file.
   std::pair<std::optional<ProcessedProgram>, QString>
-  get(const ShaderSource& program) noexcept;
+  get(const ShaderSource& program, const QString& originPath = {}) noexcept;
 
-  ossia::hash_map<ShaderSource, ProcessedProgram> programs;
+  ossia::hash_map<ProgramCacheKey, ProcessedProgram> programs;
 };
 
 }
diff --git a/src/plugins/score-plugin-gfx/Gfx/Shmdata/ShmdataOutputDevice.cpp b/src/plugins/score-plugin-gfx/Gfx/Shmdata/ShmdataOutputDevice.cpp
index f57712fc35..26f4574a93 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Shmdata/ShmdataOutputDevice.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Shmdata/ShmdataOutputDevice.cpp
@@ -6,6 +6,7 @@
 #include <Gfx/Graph/NodeRenderer.hpp>
 #include <Gfx/Graph/OutputNode.hpp>
 #include <Gfx/Graph/RenderList.hpp>
+#include <Gfx/Graph/RenderState.hpp>
 #include <Gfx/InvertYRenderer.hpp>
 
 #include <score/gfx/OpenGL.hpp>
@@ -180,18 +181,15 @@ void ShmdataOutputNode::createOutput(score::gfx::OutputConfiguration conf)
           m_settings.height, int(m_settings.rate)),
       &m_logger);
   // clang-format on
-  m_renderState = std::make_shared<score::gfx::RenderState>();
-
-  m_renderState->surface = QRhiGles2InitParams::newFallbackSurface();
-  QRhiGles2InitParams params;
-  params.fallbackSurface = m_renderState->surface;
-  score::GLCapabilities caps;
-  caps.setupFormat(params.format);
-  m_renderState->rhi = QRhi::create(QRhi::OpenGLES2, &params, {});
-  m_renderState->renderSize = QSize(m_settings.width, m_settings.height);
+  m_renderState = score::gfx::createRenderState(
+      conf.graphicsApi, QSize(m_settings.width, m_settings.height), nullptr);
+  if(!m_renderState || !m_renderState->rhi)
+  {
+    qWarning() << "ShmdataOutputNode: failed to create QRhi";
+    m_renderState.reset();
+    return;
+  }
   m_renderState->outputSize = m_renderState->renderSize;
-  m_renderState->api = score::gfx::GraphicsApi::OpenGL;
-  m_renderState->version = caps.qShaderVersion;
 
   auto rhi = m_renderState->rhi;
   m_texture = rhi->newTexture(
@@ -210,6 +208,26 @@ void ShmdataOutputNode::createOutput(score::gfx::OutputConfiguration conf)
 void ShmdataOutputNode::destroyOutput()
 {
   m_writer.reset();
+
+  if(!m_renderState)
+    return;
+
+  // Persist-across-rebuild contract: registry survives RL teardown,
+  // so we tear down its QRhi resources here BEFORE
+  // RenderState::destroy() (called below) frees the device.
+  releaseRegistry();
+
+  delete m_renderTarget;
+  m_renderTarget = nullptr;
+
+  delete m_renderState->renderPassDescriptor;
+  m_renderState->renderPassDescriptor = nullptr;
+
+  delete m_texture;
+  m_texture = nullptr;
+
+  m_renderState->destroy();
+  m_renderState.reset();
 }
 
 std::shared_ptr<score::gfx::RenderState> ShmdataOutputNode::renderState() const
diff --git a/src/plugins/score-plugin-gfx/Gfx/Spout/SpoutInput.cpp b/src/plugins/score-plugin-gfx/Gfx/Spout/SpoutInput.cpp
index e9593bb417..824c06bac3 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Spout/SpoutInput.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Spout/SpoutInput.cpp
@@ -4,9 +4,12 @@
 #include <Gfx/GfxExecContext.hpp>
 #include <Gfx/Graph/NodeRenderer.hpp>
 #include <Gfx/Graph/RenderList.hpp>
+#include <Gfx/Graph/Utils.hpp>
 #include <Gfx/Graph/decoders/GPUVideoDecoder.hpp>
 #include <Gfx/Graph/decoders/RGBA.hpp>
 
+#include <ossia/detail/algorithms.hpp>
+
 #include <score/gfx/OpenGL.hpp>
 #include <score/gfx/QRhiGles2.hpp>
 
@@ -23,8 +26,11 @@
 #include <private/qrhid3d11_p.h>
 #include <private/qrhid3d12_p.h>
 
+// clang-format off
 // D3D11On12 for D3D12 interop
+#include <windows.h>
 #include <d3d11on12.h>
+// clang-format on
 
 // Vulkan interop
 #if __has_include(<private/qrhivulkan_p.h>) && defined(QT_FEATURE_vulkan) && __has_include(<vulkan/vulkan.h>)
@@ -43,6 +49,68 @@
 
 namespace Gfx::Spout
 {
+namespace
+{
+// Cached snapshot of what we last observed from the Spout sender.
+// Allows detecting size/format/handle changes between frames.
+struct SpoutSenderInfo
+{
+  unsigned int width{};
+  unsigned int height{};
+  DWORD dxgiFormat{};
+  HANDLE handle{};
+
+  friend bool operator==(const SpoutSenderInfo&, const SpoutSenderInfo&) noexcept
+      = default;
+};
+
+bool querySpoutSender(const char* name, SpoutSenderInfo& out) noexcept
+{
+  spoutSenderNames senders;
+  return senders.GetSenderInfo(name, out.width, out.height, out.handle, out.dxgiFormat);
+}
+
+QRhiTexture::Format
+dxgiToQRhiFormat(DWORD dxgi, QRhi::Implementation backend) noexcept
+{
+  // For OpenGL we keep RGBA channel order regardless of sender layout:
+  // Spout's GL-DX interop handles the BGRA<->RGBA conversion on its side.
+  const bool wantNativeBGRA = (backend == QRhi::D3D11 || backend == QRhi::D3D12
+                               || backend == QRhi::Vulkan);
+
+  switch(static_cast<DXGI_FORMAT>(dxgi))
+  {
+    case DXGI_FORMAT_R8G8B8A8_UNORM:
+    case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
+    case DXGI_FORMAT_R8G8B8A8_TYPELESS:
+      return QRhiTexture::RGBA8;
+    case DXGI_FORMAT_B8G8R8A8_UNORM:
+    case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB:
+    case DXGI_FORMAT_B8G8R8A8_TYPELESS:
+      return wantNativeBGRA ? QRhiTexture::BGRA8 : QRhiTexture::RGBA8;
+    case DXGI_FORMAT_R10G10B10A2_UNORM:
+    case DXGI_FORMAT_R10G10B10A2_TYPELESS:
+      return QRhiTexture::RGB10A2;
+    case DXGI_FORMAT_R16G16B16A16_UNORM:
+    case DXGI_FORMAT_R16G16B16A16_FLOAT:
+    case DXGI_FORMAT_R16G16B16A16_TYPELESS:
+      // RGBA16F is the only 4x16 format QRhi exposes (no RGBA16-UNORM). For a
+      // _UNORM sender this samples as half-float (color-inaccurate) but is the
+      // only available 64-bit/pixel format; dxgiToVulkanFormat() maps the same
+      // DXGI formats to VK_FORMAT_R16G16B16A16_SFLOAT so the imported VkImage
+      // and the QRhi-created view agree (no validation violation). On D3D the
+      // CopyResource between _UNORM and _FLOAT is permitted (shared TYPELESS
+      // family) and bit-preserving.
+      return QRhiTexture::RGBA16F;
+    case DXGI_FORMAT_R32G32B32A32_FLOAT:
+    case DXGI_FORMAT_R32G32B32A32_TYPELESS:
+      return QRhiTexture::RGBA32F;
+    default:
+      return wantNativeBGRA ? QRhiTexture::BGRA8 : QRhiTexture::RGBA8;
+  }
+}
+}
+
 class InputSettingsWidget final : public SharedInputSettingsWidget
 {
 public:
@@ -91,6 +159,7 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer
 
   score::gfx::VideoMaterialUBO material;
   std::unique_ptr<score::gfx::GPUVideoDecoder> m_gpu{};
+  std::pair<QShader, QShader> m_shaders;
 
   // Spout receiver (for OpenGL)
   ::SpoutReceiver m_receiver;
@@ -98,14 +167,12 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer
   // Spout DirectX (for D3D11)
   spoutDirectX m_spoutDX;
   ID3D11Texture2D* m_receivedTexture{};
-  HANDLE m_sharedHandle{};
 
   // D3D11On12 interop (for D3D12)
   ID3D11On12Device* m_d3d11On12Device{};
   ID3D11Device* m_d3d11Device{};
   ID3D11DeviceContext* m_d3d11Context{};
   ID3D11Resource* m_wrappedTexture{};
-  ID3D11Texture2D* m_spoutSharedTexture{}; // Cached Spout shared texture
 
 #if SCORE_SPOUT_VULKAN
   // Vulkan-D3D11 interop using KMT handles (SpoutVK approach)
@@ -113,12 +180,14 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer
   // using the legacy DXGI shared handle (KMT type)
   VkImage m_vkLinkedImage{};              // VkImage linked to Spout's shared D3D11 texture
   VkDeviceMemory m_vkLinkedMemory{};      // Device memory imported from D3D11 texture
-  unsigned int m_vkSenderWidth{};
-  unsigned int m_vkSenderHeight{};
-  DWORD m_vkSenderFormat{};
   bool m_vkInitialized{};
 #endif
 
+  // Last-known sender info — used to detect size/format/handle changes.
+  SpoutSenderInfo m_lastSender{};
+  // Current destination texture format (may differ from sender DXGI byte-order on OpenGL).
+  QRhiTexture::Format m_textureFormat{QRhiTexture::RGBA8};
+
   bool enabled{};
   QRhi::Implementation m_backend{QRhi::Null};
 
@@ -130,7 +199,7 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer
     return {};
   }
 
-  void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override
+  void initState(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override
   {
     auto& rhi = *renderer.state.rhi;
     m_backend = rhi.backend();
@@ -151,102 +220,133 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer
         sizeof(score::gfx::VideoMaterialUBO));
     m_materialUBO->create();
 
-    // Initialize based on backend
-    unsigned int w = 0, h = 0;
-
+    // Backend-specific bring-up (creates D3D11On12 device, OpenGL receiver context, etc.)
+    // Does NOT allocate the destination texture — that happens once we know the format.
     switch(m_backend)
     {
       case QRhi::OpenGLES2:
-        initOpenGL(rhi, w, h);
+        initOpenGL(rhi);
         break;
       case QRhi::D3D11:
-        initD3D11(rhi, w, h);
+        initD3D11(rhi);
         break;
       case QRhi::D3D12:
-        initD3D12(rhi, w, h);
+        initD3D12(rhi);
         break;
 #if SCORE_SPOUT_VULKAN
       case QRhi::Vulkan:
-        initVulkan(rhi, w, h);
+        initVulkan(rhi);
         break;
 #endif
       default:
         break;
     }
 
-    // Use reasonable defaults if no sender found yet
-    if(w == 0 || h == 0)
+    // Probe sender once up-front so we can pick a matching texture format.
+    // If no sender is present yet, fall through to safe defaults and let the
+    // first successful update() reconfigure to the real format.
+    SpoutSenderInfo si;
+    if(querySpoutSender(node.settings.path.toStdString().c_str(), si)
+       && si.width > 0 && si.height > 0)
+    {
+      enabled = true;
+    }
+    else
     {
-      w = 1280;
-      h = 720;
+      si = {};
+      si.width = 1280;
+      si.height = 720;
+      // Default DXGI format mirrors the previous fallback (BGRA on D3D/Vulkan, RGBA on GL)
+      si.dxgiFormat = (m_backend == QRhi::D3D11 || m_backend == QRhi::D3D12
+                       || m_backend == QRhi::Vulkan)
+                          ? DXGI_FORMAT_B8G8R8A8_UNORM
+                          : DXGI_FORMAT_R8G8B8A8_UNORM;
       enabled = false;
     }
 
-    metadata.width = w;
-    metadata.height = h;
+    m_lastSender = si;
+    m_textureFormat = dxgiToQRhiFormat(si.dxgiFormat, m_backend);
+    metadata.width = si.width;
+    metadata.height = si.height;
+
+    m_gpu = std::make_unique<score::gfx::PackedDecoder>(
+        m_textureFormat, 4, metadata, QString{}, true);
 
-    // Use BGRA for D3D/Vulkan backends (native DXGI format), RGBA for OpenGL
-    auto format = (m_backend == QRhi::D3D11 || m_backend == QRhi::D3D12
-                   || m_backend == QRhi::Vulkan)
-                      ? QRhiTexture::BGRA8
-                      : QRhiTexture::RGBA8;
-    m_gpu = std::make_unique<score::gfx::PackedDecoder>(format, 4, metadata, QString{}, true);
-    createPipelines(renderer);
+    // Cache shaders from GPU decoder init
+    if(m_gpu)
+      m_shaders = m_gpu->init(renderer);
 
     material.textureSize[0] = metadata.width;
     material.textureSize[1] = metadata.height;
     res.updateDynamicBuffer(
         m_materialUBO, 0, sizeof(score::gfx::VideoMaterialUBO), &material);
+
+    m_initialized = true;
   }
 
-  void initOpenGL(QRhi& rhi, unsigned int& w, unsigned int& h)
+  void addOutputPass(
+      score::gfx::RenderList& renderer, score::gfx::Edge& edge,
+      QRhiResourceUpdateBatch& res) override
   {
-    m_receiver.SetReceiverName(node.settings.path.toStdString().c_str());
-    rhi.makeThreadLocalNativeContextCurrent();
+    if(!m_gpu)
+      return;
+    if(!m_shaders.first.isValid() || !m_shaders.second.isValid())
+      return;
 
-    if(m_receiver.ReceiveTexture())
+    auto rt = renderer.renderTargetForOutput(edge);
+    if(rt.renderTarget)
     {
-      w = m_receiver.GetSenderWidth();
-      h = m_receiver.GetSenderHeight();
-      enabled = true;
+      auto pip = score::gfx::buildPipeline(
+          renderer, renderer.defaultTriangle(), m_shaders.first, m_shaders.second, rt,
+          m_processUBO, m_materialUBO, m_gpu->samplers);
+      if(pip.pipeline)
+        m_p.emplace_back(&edge, score::gfx::Pass{rt, pip, nullptr});
     }
   }
 
-  void initD3D11(QRhi& rhi, unsigned int& w, unsigned int& h)
+  void removeOutputPass(score::gfx::RenderList& renderer, score::gfx::Edge& edge) override
+  {
+    auto it = ossia::find_if(m_p, [&](const auto& p) { return p.first == &edge; });
+    if(it != m_p.end())
+    {
+      it->second.release();
+      m_p.erase(it);
+    }
+  }
+
+  bool hasOutputPassForEdge(score::gfx::Edge& edge) const override
+  {
+    return ossia::find_if(m_p, [&](const auto& p) { return p.first == &edge; })
+           != m_p.end();
+  }
+
+  void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override
+  {
+    initState(renderer, res);
+
+    for(auto* edge : this->node.output[0]->edges)
+      addOutputPass(renderer, *edge, res);
+  }
+
+  void initOpenGL(QRhi& rhi)
+  {
+    m_receiver.SetReceiverName(node.settings.path.toStdString().c_str());
+    rhi.makeThreadLocalNativeContextCurrent();
+  }
+
+  void initD3D11(QRhi& rhi)
   {
-    // Get the D3D11 device from QRhi
     auto nativeHandles
         = static_cast<const QRhiD3D11NativeHandles*>(rhi.nativeHandles());
     if(!nativeHandles || !nativeHandles->dev)
       return;
 
     auto device = static_cast<ID3D11Device*>(nativeHandles->dev);
-
-    // Initialize Spout DirectX with the QRhi device
-    if(!m_spoutDX.OpenDirectX11(device))
-      return;
-
-    // Try to find and connect to the sender
-    spoutSenderNames senderNames;
-    char senderName[256]{0};
-    strncpy_s(senderName, node.settings.path.toStdString().c_str(), 255);
-
-    unsigned int senderWidth = 0, senderHeight = 0;
-    DWORD dwFormat = 0;
-    HANDLE shareHandle = nullptr;
-
-    if(senderNames.GetSenderInfo(senderName, senderWidth, senderHeight, shareHandle, dwFormat))
-    {
-      w = senderWidth;
-      h = senderHeight;
-      m_sharedHandle = shareHandle;
-      enabled = true;
-    }
+    m_spoutDX.OpenDirectX11(device);
   }
 
-  void initD3D12(QRhi& rhi, unsigned int& w, unsigned int& h)
+  void initD3D12(QRhi& rhi)
   {
-    // Get D3D12 device and command queue from QRhi
     auto nativeHandles
         = static_cast<const QRhiD3D12NativeHandles*>(rhi.nativeHandles());
     if(!nativeHandles || !nativeHandles->dev || !nativeHandles->commandQueue)
@@ -264,7 +364,6 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer
     if(FAILED(hr) || !m_d3d11Device)
       return;
 
-    // Get the D3D11On12Device interface
     hr = m_d3d11Device->QueryInterface(
         __uuidof(ID3D11On12Device), reinterpret_cast<void**>(&m_d3d11On12Device));
     if(FAILED(hr) || !m_d3d11On12Device)
@@ -273,63 +372,13 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer
       m_d3d11Device = nullptr;
       m_d3d11Context->Release();
       m_d3d11Context = nullptr;
-      return;
-    }
-
-    // Try to find and connect to the sender
-    spoutSenderNames senderNames;
-    char senderName[256]{0};
-    strncpy_s(senderName, node.settings.path.toStdString().c_str(), 255);
-
-    unsigned int senderWidth = 0, senderHeight = 0;
-    DWORD dwFormat = 0;
-    HANDLE shareHandle = nullptr;
-
-    if(senderNames.GetSenderInfo(senderName, senderWidth, senderHeight, shareHandle, dwFormat))
-    {
-      w = senderWidth;
-      h = senderHeight;
-      m_sharedHandle = shareHandle;
-      enabled = true;
     }
   }
 
 #if SCORE_SPOUT_VULKAN
-  void initVulkan(QRhi& rhi, unsigned int& w, unsigned int& h)
-  {
-    // Try to find and connect to the sender
-    spoutSenderNames senderNames;
-    char senderName[256]{0};
-    strncpy_s(senderName, node.settings.path.toStdString().c_str(), 255);
-
-    unsigned int senderWidth = 0, senderHeight = 0;
-    DWORD dwFormat = 0;
-    HANDLE shareHandle = nullptr;
-
-    if(senderNames.GetSenderInfo(senderName, senderWidth, senderHeight, shareHandle, dwFormat))
-    {
-      w = senderWidth;
-      h = senderHeight;
-      m_sharedHandle = shareHandle;
-      m_vkSenderWidth = senderWidth;
-      m_vkSenderHeight = senderHeight;
-      m_vkSenderFormat = dwFormat;
-      enabled = true;
-    }
-  }
+  void initVulkan(QRhi& /*rhi*/) { }
 #endif
 
-  void createPipelines(score::gfx::RenderList& r)
-  {
-    if(m_gpu)
-    {
-      auto shaders = m_gpu->init(r);
-      SCORE_ASSERT(m_p.empty());
-      score::gfx::defaultPassesInit(
-          m_p, this->node.output[0]->edges, r, r.defaultTriangle(), shaders.first,
-          shaders.second, m_processUBO, m_materialUBO, m_gpu->samplers);
-    }
-  }
 
   void update(
       score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res,
@@ -371,6 +420,8 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer
     auto tex = m_gpu->samplers[0].texture;
     auto gltex = static_cast<QGles2Texture*>(tex);
 
+    // Probe sender presence — this also lets Spout update its internal
+    // m_bUpdated flag, which IsUpdated() then reports/clears.
     if(!m_receiver.ReceiveTexture())
     {
       enabled = false;
@@ -379,16 +430,14 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer
 
     enabled = true;
 
-    if(m_receiver.IsUpdated())
+    // Pull the full sender state (size + DXGI format + handle) for change detection.
+    // GetSenderInfo reads from the Spout sender-names shared memory and is cheap.
+    SpoutSenderInfo si;
+    if(querySpoutSender(node.settings.path.toStdString().c_str(), si)
+       && si.width > 0 && si.height > 0)
     {
-      unsigned int w = m_receiver.GetSenderWidth();
-      unsigned int h = m_receiver.GetSenderHeight();
-
-      if(w > 0 && h > 0 && (w != metadata.width || h != metadata.height))
-      {
-        resizeTexture(tex, w, h);
+      if(reconfigureIfNeeded(rhi, si))
         gltex->specified = true;
-      }
     }
 
     GLuint texId = gltex->texture;
@@ -410,16 +459,8 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer
     auto device = static_cast<ID3D11Device*>(nativeHandles->dev);
     auto context = static_cast<ID3D11DeviceContext*>(nativeHandles->context);
 
-    // Check for sender updates
-    spoutSenderNames senderNames;
-    char senderName[256]{0};
-    strncpy_s(senderName, node.settings.path.toStdString().c_str(), 255);
-
-    unsigned int senderWidth = 0, senderHeight = 0;
-    DWORD dwFormat = 0;
-    HANDLE shareHandle = nullptr;
-
-    if(!senderNames.GetSenderInfo(senderName, senderWidth, senderHeight, shareHandle, dwFormat))
+    SpoutSenderInfo si;
+    if(!querySpoutSender(node.settings.path.toStdString().c_str(), si))
     {
       enabled = false;
       return;
@@ -427,25 +468,16 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer
 
     enabled = true;
 
-    // Check if size or handle changed
-    if(senderWidth != metadata.width || senderHeight != metadata.height
-       || shareHandle != m_sharedHandle)
-    {
-      // Release cached shared texture if handle changed
-      if(m_receivedTexture && shareHandle != m_sharedHandle)
-      {
-        m_receivedTexture->Release();
-        m_receivedTexture = nullptr;
-      }
-      m_sharedHandle = shareHandle;
-      resizeTexture(tex, senderWidth, senderHeight);
-    }
+    // Recreate the destination texture if anything changed.
+    // Important: D3D11 CopyResource requires source & destination formats to match,
+    // so we have to honor the sender's DXGI format here.
+    reconfigureIfNeeded(rhi, si);
 
     // Open the shared texture (cache it to avoid reopening every frame)
-    if(!m_receivedTexture && m_sharedHandle)
+    if(!m_receivedTexture && m_lastSender.handle)
     {
-      HRESULT hr
-          = device->OpenSharedResource(m_sharedHandle, IID_PPV_ARGS(&m_receivedTexture));
+      HRESULT hr = device->OpenSharedResource(
+          m_lastSender.handle, IID_PPV_ARGS(&m_receivedTexture));
       if(FAILED(hr))
         m_receivedTexture = nullptr;
     }
@@ -465,16 +497,8 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer
     SCORE_ASSERT(!m_gpu->samplers.empty());
     auto tex = m_gpu->samplers[0].texture;
 
-    // Check for sender updates
-    spoutSenderNames senderNames;
-    char senderName[256]{0};
-    strncpy_s(senderName, node.settings.path.toStdString().c_str(), 255);
-
-    unsigned int senderWidth = 0, senderHeight = 0;
-    DWORD dwFormat = 0;
-    HANDLE shareHandle = nullptr;
-
-    if(!senderNames.GetSenderInfo(senderName, senderWidth, senderHeight, shareHandle, dwFormat))
+    SpoutSenderInfo si;
+    if(!querySpoutSender(node.settings.path.toStdString().c_str(), si))
     {
       enabled = false;
       return;
@@ -482,24 +506,9 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer
 
     enabled = true;
 
-    // Check if size changed - need to re-wrap the texture
-    bool sizeChanged = (senderWidth != metadata.width || senderHeight != metadata.height);
-    bool handleChanged = (shareHandle != m_sharedHandle);
-
-    if(sizeChanged || handleChanged)
-    {
-      // Release old wrapped resource
-      if(m_wrappedTexture)
-      {
-        m_wrappedTexture->Release();
-        m_wrappedTexture = nullptr;
-      }
-
-      m_sharedHandle = shareHandle;
-
-      if(sizeChanged)
-        resizeTexture(tex, senderWidth, senderHeight);
-    }
+    // Recreate destination texture (and drop the cached D3D11 wrapped resource)
+    // when the sender's size, format or share handle changes.
+    reconfigureIfNeeded(rhi, si);
 
     // Get the native D3D12 resource from QRhiTexture
     auto nativeTex = tex->nativeTexture();
@@ -529,8 +538,8 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer
 
     // Open the Spout shared texture via D3D11
     ID3D11Texture2D* sharedTex = nullptr;
-    HRESULT hr
-        = m_d3d11Device->OpenSharedResource(m_sharedHandle, IID_PPV_ARGS(&sharedTex));
+    HRESULT hr = m_d3d11Device->OpenSharedResource(
+        m_lastSender.handle, IID_PPV_ARGS(&sharedTex));
     if(FAILED(hr) || !sharedTex)
       return;
 
@@ -561,8 +570,17 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer
       case DXGI_FORMAT_R10G10B10A2_UNORM:
         return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
       case DXGI_FORMAT_R16G16B16A16_UNORM:
-        return VK_FORMAT_R16G16B16A16_UNORM;
       case DXGI_FORMAT_R16G16B16A16_FLOAT:
+        // The QRhi destination texture for both of these is RGBA16F (the only
+        // 4x16 format QRhi exposes — there is no RGBA16-UNORM). The imported
+        // VkImage MUST use the same format as the QRhi-created image view,
+        // otherwise QVkTexture::createFrom() builds an SFLOAT view over a
+        // non-MUTABLE_FORMAT UNORM image, which is a Vulkan validation
+        // violation (VUID-VkImageViewCreateInfo-image-01762) and samples
+        // garbage. Both _UNORM and _FLOAT are 64-bit/pixel, so the KMT import
+        // succeeds; we therefore map both to SFLOAT to stay consistent with
+        // dxgiToQRhiFormat(). (UNORM data read as half-float is still color-
+        // inaccurate, but that is an inherent QRhi limitation, not a crash.)
         return VK_FORMAT_R16G16B16A16_SFLOAT;
       case DXGI_FORMAT_R32G32B32A32_FLOAT:
         return VK_FORMAT_R32G32B32A32_SFLOAT;
@@ -572,13 +590,11 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer
     }
   }
 
-  // Link a Vulkan image to D3D11 shared texture memory using KMT handle
-  // Based on SpoutVK::LinkVulkanImage from the official SpoutVulkan examples
+  // Link a Vulkan image to D3D11 shared texture memory using KMT handle.
+  // Caller is expected to have torn down any prior linked resources via
+  // releaseVulkanResources() and the QRhiTexture's destroy() before calling.
   bool linkVulkanImage(QRhi& rhi, HANDLE dxShareHandle, unsigned int w, unsigned int h, DWORD dwFormat)
   {
-    if(m_vkInitialized)
-      return false;
-
     auto nativeHandles = static_cast<const QRhiVulkanNativeHandles*>(rhi.nativeHandles());
     if(!nativeHandles || !nativeHandles->dev || !nativeHandles->physDev)
       return false;
@@ -588,33 +604,12 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer
 
     VkFormat vulkanFormat = dxgiToVulkanFormat(dwFormat);
 
-    // Release any previous resources
+    // Defensive: ensure nothing leaks if caller did not release first.
     releaseVulkanResources(rhi);
 
-    // The handle type for Spout sender is KMT (legacy shared handle)
-    // NOT NT handle - this is critical for Spout compatibility
-    VkExternalMemoryHandleTypeFlags handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_KMT_BIT;
-
-    // Query support for external image format using KMT handles
-    VkPhysicalDeviceImageFormatInfo2 formatInfo = {};
-    formatInfo.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2;
-    formatInfo.format = vulkanFormat;
-    formatInfo.type = VK_IMAGE_TYPE_2D;
-    formatInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
-    formatInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
-
-    VkPhysicalDeviceExternalImageFormatInfo externalFormatInfo = {};
-    externalFormatInfo.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO;
-    externalFormatInfo.handleType = (VkExternalMemoryHandleTypeFlagBits)handleType;
-    formatInfo.pNext = &externalFormatInfo;
-
-    VkExternalImageFormatProperties externalImageFormatProps = {};
-    externalImageFormatProps.sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES;
-    VkImageFormatProperties2 imageFormatProps2 = {};
-    imageFormatProps2.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2;
-    imageFormatProps2.pNext = &externalImageFormatProps;
-
-    // Use vkGetPhysicalDeviceImageFormatProperties2 to check support
+    // Spout shares D3D11 textures via legacy KMT handles (NOT NT handles).
+    constexpr auto handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_KMT_BIT;
+
     auto* inst = score::gfx::staticVulkanInstance();
     if(!inst)
       return false;
@@ -625,33 +620,69 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer
     if(!dfuncs)
       return false;
 
-    // We need to use the device-level function for this
-    auto vkGetPhysicalDeviceImageFormatProperties2Func
-        = reinterpret_cast<PFN_vkGetPhysicalDeviceImageFormatProperties2>(
-            inst->getInstanceProcAddr("vkGetPhysicalDeviceImageFormatProperties2"));
-    if(!vkGetPhysicalDeviceImageFormatProperties2Func)
-      return false;
-
-    VkResult result = vkGetPhysicalDeviceImageFormatProperties2Func(vkPhysDev, &formatInfo, &imageFormatProps2);
-    if(result != VK_SUCCESS)
+    // Resolve vkGetMemoryWin32HandlePropertiesKHR via vkGetDeviceProcAddr.
+    //
+    // Why not inst->getInstanceProcAddr("vkGetMemoryWin32HandlePropertiesKHR")?
+    // Qt forwards that to vkGetInstanceProcAddr, which for device-level
+    // extension functions can return a non-null trampoline that CRASHES
+    // when called: the instance loader has no per-device dispatch for
+    // device extensions, so calling that pointer dereferences garbage.
+    //
+    // vkGetDeviceProcAddr is itself a core 1.0 function, so resolving IT
+    // through inst->getInstanceProcAddr is safe — that part of the loader
+    // has proper dispatch. We then call the device-level resolver to get
+    // a pointer that's valid for THIS device's enabled extensions.
+    PFN_vkGetMemoryWin32HandlePropertiesKHR pfnGetMemWin32Props = nullptr;
     {
-      qWarning() << "SpoutInput: KMT handle type not supported for Vulkan external memory";
-      return false;
+      auto pfnGetDeviceProcAddr = reinterpret_cast<PFN_vkGetDeviceProcAddr>(
+          inst->getInstanceProcAddr("vkGetDeviceProcAddr"));
+      if(pfnGetDeviceProcAddr)
+      {
+        pfnGetMemWin32Props
+            = reinterpret_cast<PFN_vkGetMemoryWin32HandlePropertiesKHR>(
+                pfnGetDeviceProcAddr(
+                    vkDevice, "vkGetMemoryWin32HandlePropertiesKHR"));
+      }
     }
 
-    // Check if import is supported
-    VkExternalMemoryFeatureFlags externalMemoryFeatures
-        = externalImageFormatProps.externalMemoryProperties.externalMemoryFeatures;
-    if(!(externalMemoryFeatures & VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT))
+    // Probe whether import for this format/handle type is supported.
+    // Note: this is informational; the real test is the memory-type
+    // intersection below.
+    VkExternalMemoryFeatureFlags externalMemoryFeatures = 0;
     {
-      qWarning() << "SpoutInput: Cannot import memory with KMT handle type";
-      return false;
+      VkPhysicalDeviceExternalImageFormatInfo externalFormatInfo = {};
+      externalFormatInfo.sType
+          = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO;
+      externalFormatInfo.handleType = handleType;
+
+      VkPhysicalDeviceImageFormatInfo2 formatInfo = {};
+      formatInfo.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2;
+      formatInfo.pNext = &externalFormatInfo;
+      formatInfo.format = vulkanFormat;
+      formatInfo.type = VK_IMAGE_TYPE_2D;
+      formatInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
+      formatInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
+
+      VkExternalImageFormatProperties extProps = {};
+      extProps.sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES;
+
+      VkImageFormatProperties2 props2 = {};
+      props2.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2;
+      props2.pNext = &extProps;
+
+      auto pfnGetPhysFmt2 = reinterpret_cast<PFN_vkGetPhysicalDeviceImageFormatProperties2>(
+          inst->getInstanceProcAddr("vkGetPhysicalDeviceImageFormatProperties2"));
+      if(pfnGetPhysFmt2)
+      {
+        VkResult r = pfnGetPhysFmt2(vkPhysDev, &formatInfo, &props2);
+        if(r == VK_SUCCESS)
+          externalMemoryFeatures = extProps.externalMemoryProperties.externalMemoryFeatures;
+      }
     }
 
-    // Create the Vulkan import image with external memory info
+    // Create the VkImage with external memory info.
     VkExternalMemoryImageCreateInfo extMemoryImageInfo = {};
     extMemoryImageInfo.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO;
-    extMemoryImageInfo.pNext = nullptr;
     extMemoryImageInfo.handleTypes = handleType;
 
     VkImageCreateInfo imageCreateInfo = {};
@@ -664,81 +695,122 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer
     imageCreateInfo.arrayLayers = 1;
     imageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
     imageCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
-    imageCreateInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
+    imageCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
     imageCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
     imageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
 
-    result = dfuncs->vkCreateImage(vkDevice, &imageCreateInfo, nullptr, &m_vkLinkedImage);
+    VkResult result
+        = dfuncs->vkCreateImage(vkDevice, &imageCreateInfo, nullptr, &m_vkLinkedImage);
     if(result != VK_SUCCESS)
     {
-      qWarning() << "SpoutInput: Could not create Vulkan image for external memory";
+      qWarning() << "SpoutInput: vkCreateImage failed for external memory:" << result;
+      m_vkLinkedImage = VK_NULL_HANDLE;
       return false;
     }
 
-    // Get memory requirements
+    // Memory requirements as dictated by the image we just created.
     VkMemoryRequirements memRequirements;
     dfuncs->vkGetImageMemoryRequirements(vkDevice, m_vkLinkedImage, &memRequirements);
 
-    // Find suitable memory type
+    // For an imported KMT handle, the spec requires picking a memoryTypeIndex
+    // from the intersection of memRequirements.memoryTypeBits and the bits
+    // returned by vkGetMemoryWin32HandlePropertiesKHR for that handle.
+    uint32_t handleMemoryTypeBits = 0;
+    if(pfnGetMemWin32Props)
+    {
+      VkMemoryWin32HandlePropertiesKHR handleProps = {};
+      handleProps.sType = VK_STRUCTURE_TYPE_MEMORY_WIN32_HANDLE_PROPERTIES_KHR;
+      VkResult hr
+          = pfnGetMemWin32Props(vkDevice, handleType, dxShareHandle, &handleProps);
+      if(hr == VK_SUCCESS)
+        handleMemoryTypeBits = handleProps.memoryTypeBits;
+      else
+        qWarning() << "SpoutInput: vkGetMemoryWin32HandlePropertiesKHR failed:" << hr;
+    }
+    else
+    {
+      qWarning() << "SpoutInput: vkGetMemoryWin32HandlePropertiesKHR not available";
+    }
+
+    const uint32_t supportedBits
+        = memRequirements.memoryTypeBits & handleMemoryTypeBits;
+    if(supportedBits == 0)
+    {
+      qWarning() << "SpoutInput: No memory type supports the shared KMT handle"
+                 << "(memReqBits=" << Qt::hex << memRequirements.memoryTypeBits
+                 << "handleBits=" << handleMemoryTypeBits << ")";
+      dfuncs->vkDestroyImage(vkDevice, m_vkLinkedImage, nullptr);
+      m_vkLinkedImage = VK_NULL_HANDLE;
+      return false;
+    }
+
     VkPhysicalDeviceMemoryProperties memProperties;
     funcs->vkGetPhysicalDeviceMemoryProperties(vkPhysDev, &memProperties);
 
+    // Prefer DEVICE_LOCAL among compatible types; fall back to any compatible.
     uint32_t memoryTypeIndex = UINT32_MAX;
     for(uint32_t i = 0; i < memProperties.memoryTypeCount; i++)
     {
-      if((memRequirements.memoryTypeBits & (1 << i))
-         && (memProperties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT))
+      if((supportedBits & (1u << i))
+         && (memProperties.memoryTypes[i].propertyFlags
+             & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT))
       {
         memoryTypeIndex = i;
         break;
       }
     }
-
     if(memoryTypeIndex == UINT32_MAX)
     {
-      qWarning() << "SpoutInput: No suitable memory type for external import";
+      for(uint32_t i = 0; i < memProperties.memoryTypeCount; i++)
+      {
+        if(supportedBits & (1u << i))
+        {
+          memoryTypeIndex = i;
+          break;
+        }
+      }
+    }
+    if(memoryTypeIndex == UINT32_MAX)
+    {
       dfuncs->vkDestroyImage(vkDevice, m_vkLinkedImage, nullptr);
       m_vkLinkedImage = VK_NULL_HANDLE;
       return false;
     }
 
-    // Set up import memory info with KMT handle
+    // Import the KMT handle.
     VkImportMemoryWin32HandleInfoKHR importMemoryInfo = {};
     importMemoryInfo.sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_WIN32_HANDLE_INFO_KHR;
-    importMemoryInfo.pNext = nullptr;
-    importMemoryInfo.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_KMT_BIT;
+    importMemoryInfo.handleType = handleType;
     importMemoryInfo.handle = dxShareHandle;
-    importMemoryInfo.name = nullptr;
-
-    // Check if dedicated allocation is required
-    bool dedicatedRequired = (externalMemoryFeatures & VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT) != 0;
 
+    // Dedicated allocation: KMT-imported memory backs exactly one image,
+    // so we always dedicate. Required by some drivers, harmless on others.
+    (void)externalMemoryFeatures;
     VkMemoryDedicatedAllocateInfo dedicatedAllocInfo = {};
     dedicatedAllocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO;
     dedicatedAllocInfo.pNext = &importMemoryInfo;
     dedicatedAllocInfo.image = m_vkLinkedImage;
-    dedicatedAllocInfo.buffer = VK_NULL_HANDLE;
 
     VkMemoryAllocateInfo allocInfo = {};
     allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
-    allocInfo.pNext = dedicatedRequired ? (void*)&dedicatedAllocInfo : (void*)&importMemoryInfo;
+    allocInfo.pNext = &dedicatedAllocInfo;
     allocInfo.allocationSize = memRequirements.size;
     allocInfo.memoryTypeIndex = memoryTypeIndex;
 
     result = dfuncs->vkAllocateMemory(vkDevice, &allocInfo, nullptr, &m_vkLinkedMemory);
     if(result != VK_SUCCESS)
     {
-      qWarning() << "SpoutInput: Could not allocate memory for external import";
+      qWarning() << "SpoutInput: vkAllocateMemory for external import failed:" << result;
       dfuncs->vkDestroyImage(vkDevice, m_vkLinkedImage, nullptr);
       m_vkLinkedImage = VK_NULL_HANDLE;
+      m_vkLinkedMemory = VK_NULL_HANDLE;
       return false;
     }
 
-    // Bind memory to the Vulkan image
     result = dfuncs->vkBindImageMemory(vkDevice, m_vkLinkedImage, m_vkLinkedMemory, 0);
     if(result != VK_SUCCESS)
     {
-      qWarning() << "SpoutInput: Could not bind memory to image";
+      qWarning() << "SpoutInput: vkBindImageMemory failed:" << result;
       dfuncs->vkFreeMemory(vkDevice, m_vkLinkedMemory, nullptr);
       m_vkLinkedMemory = VK_NULL_HANDLE;
       dfuncs->vkDestroyImage(vkDevice, m_vkLinkedImage, nullptr);
@@ -752,16 +824,8 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer
 
   void updateVulkan(QRhi& rhi, QRhiResourceUpdateBatch& res)
   {
-    // Check for sender updates
-    spoutSenderNames senderNames;
-    char senderName[256]{0};
-    strncpy_s(senderName, node.settings.path.toStdString().c_str(), 255);
-
-    unsigned int senderWidth = 0, senderHeight = 0;
-    DWORD dwFormat = 0;
-    HANDLE shareHandle = nullptr;
-
-    if(!senderNames.GetSenderInfo(senderName, senderWidth, senderHeight, shareHandle, dwFormat))
+    SpoutSenderInfo si;
+    if(!querySpoutSender(node.settings.path.toStdString().c_str(), si))
     {
       enabled = false;
       return;
@@ -769,63 +833,16 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer
 
     enabled = true;
 
-    // Check if size, format, or handle changed
-    bool needsRecreate = !m_vkInitialized
-                         || senderWidth != m_vkSenderWidth
-                         || senderHeight != m_vkSenderHeight
-                         || dwFormat != m_vkSenderFormat
-                         || shareHandle != m_sharedHandle;
-
-    if(needsRecreate)
+    // On Vulkan the destination QRhiTexture must be (re)linked to the
+    // sender's shared D3D11 memory whenever size, format or handle changes.
+    // The first frame after init also flows through here because m_vkInitialized
+    // is still false (initState only allocates a plain placeholder texture).
+    if(!m_vkInitialized)
     {
-      // Update stored values
-      m_sharedHandle = shareHandle;
-      m_vkSenderWidth = senderWidth;
-      m_vkSenderHeight = senderHeight;
-      m_vkSenderFormat = dwFormat;
-
-      // Create linked Vulkan image from Spout's shared handle
-      if(!linkVulkanImage(rhi, shareHandle, senderWidth, senderHeight, dwFormat))
-      {
-        enabled = false;
-        return;
-      }
-
-      // Update metadata and texture size
-      if(senderWidth != metadata.width || senderHeight != metadata.height)
-      {
-        metadata.width = senderWidth;
-        metadata.height = senderHeight;
-        material.scale[0] = 1.f;
-        material.scale[1] = 1.f;
-        material.textureSize[0] = metadata.width;
-        material.textureSize[1] = metadata.height;
-      }
-
-      // Update QRhiTexture to use the linked VkImage
-      SCORE_ASSERT(!m_gpu->samplers.empty());
-      auto tex = m_gpu->samplers[0].texture;
-
-      tex->destroy();
-      tex->setPixelSize(QSize(senderWidth, senderHeight));
-
-      QRhiTexture::NativeTexture nativeTex;
-      nativeTex.object = (quint64)m_vkLinkedImage;
-      // The linked image is in GENERAL layout for shared memory compatibility
-      nativeTex.layout = VK_IMAGE_LAYOUT_GENERAL;
-
-      if(!tex->createFrom(nativeTex))
-      {
-        qWarning() << "SpoutInput: Failed to create QRhiTexture from linked VkImage";
-        releaseVulkanResources(rhi);
-        enabled = false;
-        return;
-      }
-
-      // Recreate shader resource bindings
-      for(auto& pass : m_p)
-        pass.second.srb->create();
+      // Force reconfiguration even if state happens to match the placeholder.
+      m_lastSender = {};
     }
+    reconfigureIfNeeded(rhi, si);
 
     // The texture content is automatically synchronized because
     // the VkImage memory is linked to the D3D11 shared texture.
@@ -852,35 +869,156 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer
     if(!dfuncs)
       return;
 
-    if(m_vkLinkedMemory)
-    {
-      dfuncs->vkFreeMemory(vkDevice, m_vkLinkedMemory, nullptr);
-      m_vkLinkedMemory = VK_NULL_HANDLE;
-    }
+    // Destroy the image (and any binding to memory) before freeing the memory.
     if(m_vkLinkedImage)
     {
       dfuncs->vkDestroyImage(vkDevice, m_vkLinkedImage, nullptr);
       m_vkLinkedImage = VK_NULL_HANDLE;
     }
+    if(m_vkLinkedMemory)
+    {
+      dfuncs->vkFreeMemory(vkDevice, m_vkLinkedMemory, nullptr);
+      m_vkLinkedMemory = VK_NULL_HANDLE;
+    }
     m_vkInitialized = false;
   }
 #endif
 
-  void resizeTexture(QRhiTexture* tex, unsigned int w, unsigned int h)
+  // Drop backend-specific caches that are tied to the previous sender handle,
+  // format or size. Called from reconfigureIfNeeded() before recreating the
+  // destination texture, and from releaseState() during teardown.
+  void releaseSharedResources(QRhi& rhi)
+  {
+    switch(m_backend)
+    {
+      case QRhi::D3D11:
+        if(m_receivedTexture)
+        {
+          m_receivedTexture->Release();
+          m_receivedTexture = nullptr;
+        }
+        break;
+      case QRhi::D3D12:
+        if(m_wrappedTexture)
+        {
+          m_wrappedTexture->Release();
+          m_wrappedTexture = nullptr;
+        }
+        break;
+#if SCORE_SPOUT_VULKAN
+      case QRhi::Vulkan:
+        releaseVulkanResources(rhi);
+        break;
+#endif
+      default:
+        break;
+    }
+  }
+
+  // Returns true if anything was reconfigured (texture recreated). When that
+  // happens, callers may need to refresh backend-specific state that depends
+  // on the underlying QRhiTexture (e.g. OpenGL's `specified` flag).
+  //
+  // Always ensures the QRhiTexture has a valid backing on return (either a
+  // linked import or a plain placeholder), so the SRB rebuild that follows
+  // never produces a null VkImageView descriptor write.
+  bool reconfigureIfNeeded(QRhi& rhi, const SpoutSenderInfo& sender)
   {
-    metadata.width = w;
-    metadata.height = h;
+    if(sender.width == 0 || sender.height == 0)
+      return false;
+
+    const QRhiTexture::Format newFormat
+        = dxgiToQRhiFormat(sender.dxgiFormat, m_backend);
+
+    const bool sizeChanged
+        = sender.width != m_lastSender.width || sender.height != m_lastSender.height;
+    const bool formatChanged = newFormat != m_textureFormat;
+    const bool handleChanged = sender.handle != m_lastSender.handle;
+    if(!sizeChanged && !formatChanged && !handleChanged)
+      return false;
+
+    SCORE_ASSERT(!m_gpu->samplers.empty());
+    auto tex = m_gpu->samplers[0].texture;
+
+    // Tear-down order matters: the QRhi-owned VkImageView (or D3D SRV) must
+    // be destroyed BEFORE the underlying native resource it was created
+    // from. Calling tex->destroy() first does the former; then
+    // releaseSharedResources() drops the latter.
+    tex->destroy();
+    releaseSharedResources(rhi);
+
+    tex->setPixelSize(QSize(sender.width, sender.height));
+    tex->setFormat(newFormat);
+
+    bool linked = false;
+#if SCORE_SPOUT_VULKAN
+    if(m_backend == QRhi::Vulkan)
+    {
+      if(linkVulkanImage(
+             rhi, sender.handle, sender.width, sender.height, sender.dxgiFormat))
+      {
+        QRhiTexture::NativeTexture nt;
+        nt.object = (quint64)m_vkLinkedImage;
+        nt.layout = VK_IMAGE_LAYOUT_GENERAL;
+        if(tex->createFrom(nt))
+        {
+          linked = true;
+        }
+        else
+        {
+          qWarning() << "SpoutInput: createFrom(VkImage) failed during reconfigure";
+          releaseVulkanResources(rhi);
+        }
+      }
+    }
+#endif
+
+    bool ok = linked;
+    if(!ok)
+    {
+      // Either non-Vulkan path, or Vulkan link failed. Allocate a normal
+      // QRhiTexture so the SRB has a valid view to bind. On Vulkan this
+      // yields a black/undefined image but avoids the
+      // VUID-VkWriteDescriptorSet-descriptorType-02997 validation error
+      // and the subsequent draw-time crash.
+      ok = tex->create();
+    }
+
+    if(!ok)
+    {
+      enabled = false;
+      // Do NOT advance m_lastSender — let the next frame retry from scratch.
+      return false;
+    }
+
+    // Update metadata + material UBO.
+    metadata.width = sender.width;
+    metadata.height = sender.height;
     material.scale[0] = 1.f;
     material.scale[1] = 1.f;
     material.textureSize[0] = metadata.width;
     material.textureSize[1] = metadata.height;
 
-    tex->destroy();
-    tex->setPixelSize(QSize(w, h));
-    tex->create();
+    m_textureFormat = newFormat;
+    m_lastSender = sender;
+#if SCORE_SPOUT_VULKAN
+    if(m_backend == QRhi::Vulkan && !linked)
+    {
+      // Link failed for this sender configuration. We mark the renderer as
+      // disabled (so callers can show a fallback frame) but record the
+      // sender state so we don't churn through destroy/create every frame.
+      // A natural retry happens when the sender's size, format or share
+      // handle changes.
+      enabled = false;
+    }
+#endif
 
+    // Pipelines stay valid (only the input sampler binding changed), but the
+    // SRB references the QRhiTexture pointer/format and must be rebuilt.
     for(auto& pass : m_p)
-      pass.second.srb->create();
+      pass.second.p.srb->create();
+
+    return true;
   }
 
   void runRenderPass(
@@ -891,28 +1029,31 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer
     score::gfx::defaultRenderPass(renderer, mesh, m_meshBuffer, cb, edge, m_p);
   }
 
-  void release(score::gfx::RenderList& r) override
+  void releaseState(score::gfx::RenderList& r) override
   {
+    if(!m_initialized)
+      return;
+
+    // Order matters: destroy QRhi-owned resources (QRhiTexture wrappers and
+    // their image views) BEFORE the underlying native shared resources they
+    // wrap. Otherwise the QRhiTexture destruction may operate on a view
+    // whose underlying VkImage / D3D resource has already been released.
+    if(m_gpu)
+    {
+      m_gpu->release(r);
+    }
+
+    // Now drop the native shared resources we hold.
+    releaseSharedResources(*r.state.rhi);
+
     switch(m_backend)
     {
       case QRhi::OpenGLES2:
         if(enabled)
           m_receiver.ReleaseReceiver();
         break;
-      case QRhi::D3D11:
-        if(m_receivedTexture)
-        {
-          m_receivedTexture->Release();
-          m_receivedTexture = nullptr;
-        }
-        break;
       case QRhi::D3D12:
-        // Release D3D11On12 resources
-        if(m_wrappedTexture)
-        {
-          m_wrappedTexture->Release();
-          m_wrappedTexture = nullptr;
-        }
+        // Release the D3D11On12 interop layer (set up in initD3D12).
         if(m_d3d11On12Device)
         {
           m_d3d11On12Device->Release();
@@ -929,26 +1070,13 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer
           m_d3d11Device = nullptr;
         }
         break;
-#if SCORE_SPOUT_VULKAN
-      case QRhi::Vulkan:
-        releaseVulkanResources(*r.state.rhi);
-        m_vkSenderWidth = 0;
-        m_vkSenderHeight = 0;
-        m_vkSenderFormat = 0;
-        break;
-#endif
       default:
         break;
     }
 
     enabled = false;
-    m_receivedTexture = nullptr;
-    m_sharedHandle = nullptr;
-
-    if(m_gpu)
-    {
-      m_gpu->release(r);
-    }
+    m_lastSender = {};
+    m_textureFormat = QRhiTexture::RGBA8;
 
     delete m_processUBO;
     m_processUBO = nullptr;
@@ -959,7 +1087,15 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer
       p.second.release();
     m_p.clear();
 
-    m_meshBuffer.buffers.clear();
+    m_meshBuffer = {};
+    m_shaders = {};
+
+    m_initialized = false;
+  }
+
+  void release(score::gfx::RenderList& r) override
+  {
+    releaseState(r);
   }
 };
 
diff --git a/src/plugins/score-plugin-gfx/Gfx/Spout/SpoutOutput.cpp b/src/plugins/score-plugin-gfx/Gfx/Spout/SpoutOutput.cpp
index ae0e2d7945..9a5fcc0a7a 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Spout/SpoutOutput.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Spout/SpoutOutput.cpp
@@ -6,6 +6,7 @@
 #include <Gfx/Graph/NodeRenderer.hpp>
 #include <Gfx/Graph/OutputNode.hpp>
 #include <Gfx/Graph/RenderList.hpp>
+#include <Gfx/Graph/RenderState.hpp>
 #include <Gfx/Settings/Model.hpp>
 #include <score/gfx/OpenGL.hpp>
 #include <score/gfx/QRhiGles2.hpp>
@@ -24,8 +25,11 @@
 #include <private/qrhid3d11_p.h>
 #include <private/qrhid3d12_p.h>
 
+// clang-format off
 // D3D11On12 for D3D12 interop
+#include <windows.h>
 #include <d3d11on12.h>
+// clang-format on
 
 // Vulkan interop
 #if __has_include(<private/qrhivulkan_p.h>) && defined(QT_FEATURE_vulkan) && __has_include(<vulkan/vulkan.h>)
@@ -533,8 +537,6 @@ struct SpoutNode final : score::gfx::OutputNode
 
   void createOutput(score::gfx::OutputConfiguration conf) override
   {
-    m_renderState = std::make_shared<score::gfx::RenderState>();
-
     // Choose backend based on requested API
     switch(conf.graphicsApi)
     {
@@ -555,12 +557,13 @@ struct SpoutNode final : score::gfx::OutputNode
         break;
     }
 
-    auto rhi = m_renderState->rhi;
-    if(!rhi)
+    if(!m_renderState || !m_renderState->rhi)
     {
       qWarning() << "Failed to create QRhi for Spout output";
+      m_renderState.reset();
       return;
     }
+    auto rhi = m_renderState->rhi;
 
     // Use BGRA for D3D/Vulkan backends, RGBA for OpenGL
     auto format = (m_backend == QRhi::D3D11 || m_backend == QRhi::D3D12 || m_backend == QRhi::Vulkan)
@@ -586,43 +589,36 @@ struct SpoutNode final : score::gfx::OutputNode
     m_backend = QRhi::OpenGLES2;
     m_spout = std::make_shared<SpoutSender>();
 
-    m_renderState->surface = QRhiGles2InitParams::newFallbackSurface();
-    QRhiGles2InitParams params;
-    params.fallbackSurface = m_renderState->surface;
-    score::GLCapabilities caps;
-    caps.setupFormat(params.format);
-    m_renderState->rhi = QRhi::create(QRhi::OpenGLES2, &params, {});
-    m_renderState->renderSize = QSize(m_settings.width, m_settings.height);
-    m_renderState->outputSize = m_renderState->renderSize;
-    m_renderState->api = score::gfx::GraphicsApi::OpenGL;
-    m_renderState->version = caps.qShaderVersion;
+    m_renderState = score::gfx::createRenderState(
+        score::gfx::GraphicsApi::OpenGL,
+        QSize(m_settings.width, m_settings.height), nullptr);
+    if(m_renderState)
+      m_renderState->outputSize = m_renderState->renderSize;
   }
 
   void createOutputD3D11()
   {
     m_backend = QRhi::D3D11;
 
-    QRhiD3D11InitParams params;
-    m_renderState->rhi = QRhi::create(QRhi::D3D11, &params, {});
-    m_renderState->renderSize = QSize(m_settings.width, m_settings.height);
-    m_renderState->outputSize = m_renderState->renderSize;
-    m_renderState->api = score::gfx::GraphicsApi::D3D11;
-    m_renderState->version = Gfx::Settings::shaderVersionForAPI(score::gfx::GraphicsApi::D3D11);
+    m_renderState = score::gfx::createRenderState(
+        score::gfx::GraphicsApi::D3D11,
+        QSize(m_settings.width, m_settings.height), nullptr);
+    if(m_renderState)
+      m_renderState->outputSize = m_renderState->renderSize;
   }
 
   void createOutputD3D12()
   {
     m_backend = QRhi::D3D12;
 
-    QRhiD3D12InitParams params;
-    m_renderState->rhi = QRhi::create(QRhi::D3D12, &params, {});
-    m_renderState->renderSize = QSize(m_settings.width, m_settings.height);
-    m_renderState->outputSize = m_renderState->renderSize;
-    m_renderState->api = score::gfx::GraphicsApi::D3D12;
-    m_renderState->version = Gfx::Settings::shaderVersionForAPI(score::gfx::GraphicsApi::D3D12);
+    m_renderState = score::gfx::createRenderState(
+        score::gfx::GraphicsApi::D3D12,
+        QSize(m_settings.width, m_settings.height), nullptr);
+    if(m_renderState)
+      m_renderState->outputSize = m_renderState->renderSize;
 
     // Get D3D12 device and command queue from QRhi
-    if(m_renderState->rhi)
+    if(m_renderState && m_renderState->rhi)
     {
       auto nativeHandles = static_cast<const QRhiD3D12NativeHandles*>(
           m_renderState->rhi->nativeHandles());
@@ -653,33 +649,16 @@ struct SpoutNode final : score::gfx::OutputNode
   {
     m_backend = QRhi::Vulkan;
 
-    // Create Vulkan instance with required extensions
-    auto* vkInst = score::gfx::staticVulkanInstance();
-    if(!vkInst)
-    {
-      qWarning() << "SpoutOutput: No Vulkan instance available";
-      return;
-    }
-
-    QRhiVulkanInitParams params;
-    params.inst = vkInst;
-
-    // Enable required device extensions for external memory
-    params.deviceExtensions << VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME
-                            << VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME
-                            << VK_KHR_DEDICATED_ALLOCATION_EXTENSION_NAME
-                            << VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME
-                            << VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME
-                            << VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME;
-
-    m_renderState->rhi = QRhi::create(QRhi::Vulkan, &params, QRhi::EnableDebugMarkers, nullptr);
-    m_renderState->renderSize = QSize(m_settings.width, m_settings.height);
-    m_renderState->outputSize = m_renderState->renderSize;
-    m_renderState->api = score::gfx::GraphicsApi::Vulkan;
-    m_renderState->version = Gfx::Settings::shaderVersionForAPI(score::gfx::GraphicsApi::Vulkan);
+    // createRenderState already adds the VK_KHR_EXTERNAL_MEMORY{,_WIN32}, etc.
+    // extensions on Windows, plus shares the video-decode-capable VkDevice.
+    m_renderState = score::gfx::createRenderState(
+        score::gfx::GraphicsApi::Vulkan,
+        QSize(m_settings.width, m_settings.height), nullptr);
+    if(m_renderState)
+      m_renderState->outputSize = m_renderState->renderSize;
 
     // Create a D3D11 device for creating the shared texture
-    if(m_renderState->rhi)
+    if(m_renderState && m_renderState->rhi)
     {
       D3D_FEATURE_LEVEL featureLevels[] = {D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0};
       UINT createDeviceFlags = D3D11_CREATE_DEVICE_BGRA_SUPPORT;
@@ -781,6 +760,29 @@ struct SpoutNode final : score::gfx::OutputNode
         break;
     }
     m_created = false;
+
+    // Backend-specific interop handles are gone above; now release the
+    // QRhi-owned resources. Order: render target -> render pass descriptor
+    // -> texture -> rhi (which is what RenderState::destroy() does).
+    if(!m_renderState)
+      return;
+
+    // Persist-across-rebuild contract: registry survives RL teardown,
+    // so we tear down its QRhi resources here BEFORE
+    // RenderState::destroy() (called below) frees the device.
+    releaseRegistry();
+
+    delete m_renderTarget;
+    m_renderTarget = nullptr;
+
+    delete m_renderState->renderPassDescriptor;
+    m_renderState->renderPassDescriptor = nullptr;
+
+    delete m_texture;
+    m_texture = nullptr;
+
+    m_renderState->destroy();
+    m_renderState.reset();
   }
 
   std::shared_ptr<score::gfx::RenderState> renderState() const override
diff --git a/src/plugins/score-plugin-gfx/Gfx/Syphon/SyphonInput.mm b/src/plugins/score-plugin-gfx/Gfx/Syphon/SyphonInput.mm
index 9821e04651..24703f856e 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Syphon/SyphonInput.mm
+++ b/src/plugins/score-plugin-gfx/Gfx/Syphon/SyphonInput.mm
@@ -8,7 +8,10 @@
 #include <Gfx/GfxExecContext.hpp>
 #include <Gfx/Graph/NodeRenderer.hpp>
 #include <Gfx/Graph/RenderList.hpp>
+#include <Gfx/Graph/Utils.hpp>
 #include <Gfx/Graph/decoders/RGBA.hpp>
+
+#include <ossia/detail/algorithms.hpp>
 #include <Gfx/Syphon/SyphonHelpers.hpp>
 #include <Syphon/SyphonClient.h>
 #include <Syphon/SyphonOpenGLClient.h>
@@ -67,6 +70,7 @@ explicit Renderer(const SyphonInputNode &n)
 
   score::gfx::VideoMaterialUBO material;
   std::unique_ptr<score::gfx::GPUVideoDecoder> m_gpu{};
+  std::pair<QShader, QShader> m_shaders;
 
   // OpenGL receiver
   SyphonOpenGLClient* m_receiver{};
@@ -78,6 +82,8 @@ explicit Renderer(const SyphonInputNode &n)
 
   bool enabled{};
   bool m_usingMetal{};
+  int m_emptyFrameCount{0};
+  static constexpr int kReopenAfterEmpty = 60;
 
   ~Renderer() { }
 
@@ -99,10 +105,37 @@ explicit Renderer(const SyphonInputNode &n)
     return nullptr;
   }
 
+  // Whether the server we are bound to is still advertised in the Syphon
+  // directory. A *static* sender (publishes one frame then idles) keeps no
+  // "new frame" coming but stays in the directory — so we must NOT reconnect
+  // just because frames stopped; only reconnect once the server truly vanished.
+  bool serverStillPresent()
+  {
+    SyphonServerDirectory* ssd = [SyphonServerDirectory sharedDirectory];
+    NSArray* servers = [ssd serversMatchingName:NULL appName:NULL];
+    return findServer(servers, node.settings.path) != nullptr;
+  }
+
   void openServer(QRhi& rhi)
   {
     enabled = false;
 
+    // Symmetric with releaseState(): stop any client we already hold before
+    // replacing it, otherwise the previous SyphonClient leaks (and keeps a
+    // connection open to the server).
+    if (m_mtlReceiver)
+    {
+      [m_mtlReceiver stop];
+      m_mtlReceiver = nil;
+    }
+    if (m_receiver)
+    {
+      [m_receiver stop];
+      m_receiver = nil;
+    }
+    m_currentMtlTexture = nil;
+    currentTex = 0;
+
     SyphonServerDirectory *ssd = [SyphonServerDirectory sharedDirectory];
     NSArray *servers = [ssd serversMatchingName:NULL appName:NULL];
     if (servers.count == 0)
@@ -147,7 +180,8 @@ void openServer(QRhi& rhi)
   }
 
   score::gfx::TextureRenderTarget renderTargetForInput(const score::gfx::Port& p) override { return { }; }
-  void init(score::gfx::RenderList &renderer, QRhiResourceUpdateBatch &res) override
+
+  void initState(score::gfx::RenderList &renderer, QRhiResourceUpdateBatch &res) override
   {
     // Initialize our rendering structures
     auto& rhi = *renderer.state.rhi;
@@ -216,7 +250,10 @@ void init(score::gfx::RenderList &renderer, QRhiResourceUpdateBatch &res) overri
     {
       m_gpu = std::make_unique<score::gfx::PackedRectDecoder>(QRhiTexture::RGBA8, 4, metadata, QString{});
     }
-    createPipelines(renderer);
+
+    // Cache shaders from GPU decoder init
+    if (m_gpu)
+      m_shaders = m_gpu->init(renderer);
 
     if (m_usingMetal && mtlTex)
     {
@@ -226,27 +263,54 @@ void init(score::gfx::RenderList &renderer, QRhiResourceUpdateBatch &res) overri
     {
       rebuildTexture(glImg);
     }
+
+    m_initialized = true;
   }
 
-  void createPipelines(score::gfx::RenderList& r)
+  void addOutputPass(
+      score::gfx::RenderList& renderer, score::gfx::Edge& edge,
+      QRhiResourceUpdateBatch& res) override
   {
-    if (m_gpu)
+    if (!m_gpu)
+      return;
+    if (!m_shaders.first.isValid() || !m_shaders.second.isValid())
+      return;
+
+    auto rt = renderer.renderTargetForOutput(edge);
+    if (rt.renderTarget)
+    {
+      auto pip = score::gfx::buildPipeline(
+          renderer, renderer.defaultTriangle(), m_shaders.first, m_shaders.second, rt,
+          m_processUBO, m_materialUBO, m_gpu->samplers);
+      if (pip.pipeline)
+        m_p.emplace_back(&edge, score::gfx::Pass{rt, pip, nullptr});
+    }
+  }
+
+  void removeOutputPass(score::gfx::RenderList& renderer, score::gfx::Edge& edge) override
+  {
+    auto it = ossia::find_if(m_p, [&](const auto& p) { return p.first == &edge; });
+    if (it != m_p.end())
     {
-      auto shaders = m_gpu->init(r);
-      SCORE_ASSERT(m_p.empty());
-      score::gfx::defaultPassesInit(
-          m_p,
-          this->node.output[0]->edges,
-          r,
-          r.defaultTriangle(),
-          shaders.first,
-          shaders.second,
-          m_processUBO,
-          m_materialUBO,
-          m_gpu->samplers);
+      it->second.release();
+      m_p.erase(it);
     }
   }
 
+  bool hasOutputPassForEdge(score::gfx::Edge& edge) const override
+  {
+    return ossia::find_if(m_p, [&](const auto& p) { return p.first == &edge; })
+           != m_p.end();
+  }
+
+  void init(score::gfx::RenderList &renderer, QRhiResourceUpdateBatch &res) override
+  {
+    initState(renderer, res);
+
+    for (auto* edge : this->node.output[0]->edges)
+      addOutputPass(renderer, *edge, res);
+  }
+
   void rebuildTexture(SyphonOpenGLImage* img)
   {
     SCORE_ASSERT(!m_gpu->samplers.empty());
@@ -274,7 +338,7 @@ void rebuildTexture(SyphonOpenGLImage* img)
       t->gltype = GL_UNSIGNED_INT_8_8_8_8_REV;
     }
     for(auto& pass : m_p)
-      pass.second.srb->create();
+      pass.second.p.srb->create();
   }
 
   void rebuildTextureMetal(id<MTLTexture> mtlTex)
@@ -293,7 +357,7 @@ void rebuildTextureMetal(id<MTLTexture> mtlTex)
     tex->createFrom(nativeTex);
 
     for(auto& pass : m_p)
-      pass.second.srb->create();
+      pass.second.p.srb->create();
   }
 
   void update(score::gfx::RenderList &renderer,
@@ -304,13 +368,26 @@ void update(score::gfx::RenderList &renderer,
     {
       auto& rhi = *renderer.state.rhi;
       openServer(rhi);
+      m_emptyFrameCount = 0;
     }
 
     if (m_usingMetal)
     {
       // Metal path
       if (!m_mtlReceiver || !m_mtlReceiver.hasNewFrame)
+      {
+        if (++m_emptyFrameCount >= kReopenAfterEmpty)
+        {
+          m_emptyFrameCount = 0;
+          // Only reconnect if the server is actually gone. A healthy static
+          // sender simply stops producing new frames while staying present;
+          // dropping it here would reconnect forever and lose the last frame.
+          if (!m_mtlReceiver || !serverStillPresent())
+            enabled = false;
+        }
         return;
+      }
+      m_emptyFrameCount = 0;
 
       id<MTLTexture> mtlTex = [m_mtlReceiver newFrameImage];
       if (!mtlTex)
@@ -336,7 +413,18 @@ void update(score::gfx::RenderList &renderer,
     {
       // OpenGL path
       if (!m_receiver || !m_receiver.hasNewFrame)
+      {
+        if (++m_emptyFrameCount >= kReopenAfterEmpty)
+        {
+          m_emptyFrameCount = 0;
+          // Only reconnect if the server actually vanished (see Metal path):
+          // a static sender stays present but stops sending new frames.
+          if (!m_receiver || !serverStillPresent())
+            enabled = false;
+        }
         return;
+      }
+      m_emptyFrameCount = 0;
 
       auto img = [m_receiver newFrameImage];
       if (!img)
@@ -370,22 +458,27 @@ void runRenderPass(
     score::gfx::defaultRenderPass(renderer, mesh, m_meshBuffer, cb, edge, m_p);
   }
 
-  void release(score::gfx::RenderList& r) override
+  void releaseState(score::gfx::RenderList& r) override
   {
-    if (enabled)
+    if (!m_initialized)
+      return;
+
+    // Stop whenever a receiver exists — NOT only when enabled. A receiver can
+    // be alive while enabled==false (e.g. after the empty-frame path cleared
+    // enabled but left the client connected), and skipping -stop in that case
+    // leaks the SyphonClient. This also mirrors openServer(), which is the only
+    // other place receivers are created.
+    if (m_mtlReceiver)
     {
-      if (m_mtlReceiver)
-      {
-        [m_mtlReceiver stop];
-        m_mtlReceiver = nil;
-      }
-      if (m_receiver)
-      {
-        [m_receiver stop];
-        m_receiver = nil;
-      }
-      enabled = false;
+      [m_mtlReceiver stop];
+      m_mtlReceiver = nil;
+    }
+    if (m_receiver)
+    {
+      [m_receiver stop];
+      m_receiver = nil;
     }
+    enabled = false;
 
     m_currentMtlTexture = nil;
     currentTex = 0;
@@ -404,7 +497,15 @@ void release(score::gfx::RenderList& r) override
       p.second.release();
     m_p.clear();
 
-    m_meshBuffer.buffers.clear();
+    m_meshBuffer = {};
+    m_shaders = {};
+
+    m_initialized = false;
+  }
+
+  void release(score::gfx::RenderList& r) override
+  {
+    releaseState(r);
   }
 };
 
diff --git a/src/plugins/score-plugin-gfx/Gfx/Syphon/SyphonOutput.mm b/src/plugins/score-plugin-gfx/Gfx/Syphon/SyphonOutput.mm
index b6073fa78e..fdb164c092 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Syphon/SyphonOutput.mm
+++ b/src/plugins/score-plugin-gfx/Gfx/Syphon/SyphonOutput.mm
@@ -1,6 +1,7 @@
 #include "SyphonOutput.hpp"
 
 #include <Gfx/Graph/RenderList.hpp>
+#include <Gfx/Graph/RenderState.hpp>
 #include <Gfx/Graph/NodeRenderer.hpp>
 #include <Gfx/Graph/OutputNode.hpp>
 #include <Gfx/Settings/Model.hpp>
@@ -177,33 +178,21 @@ void setRenderer(std::shared_ptr<score::gfx::RenderList> r) override
 
   void createOutput(score::gfx::OutputConfiguration conf) override
   {
-    m_renderState = std::make_shared<score::gfx::RenderState>();
-    m_renderState->renderSize = QSize(m_settings.width, m_settings.height);
-    m_renderState->outputSize = m_renderState->renderSize;
-
-    if (conf.graphicsApi == score::gfx::GraphicsApi::Metal)
+    // Syphon supports GL or Metal; the upstream graphics API picks which one.
+    const auto api = (conf.graphicsApi == score::gfx::GraphicsApi::Metal)
+                         ? score::gfx::GraphicsApi::Metal
+                         : score::gfx::GraphicsApi::OpenGL;
+    m_usingMetal = (api == score::gfx::GraphicsApi::Metal);
+
+    m_renderState = score::gfx::createRenderState(
+        api, QSize(m_settings.width, m_settings.height), nullptr);
+    if(!m_renderState || !m_renderState->rhi)
     {
-      // Metal backend
-      QRhiMetalInitParams params;
-      m_renderState->rhi = QRhi::create(QRhi::Metal, &params, {});
-      m_renderState->api = score::gfx::GraphicsApi::Metal;
-      m_renderState->version = Gfx::Settings::shaderVersionForAPI(score::gfx::GraphicsApi::Metal);
-      m_usingMetal = true;
-    }
-    else
-    {
-      // OpenGL backend
-      m_renderState->surface = QRhiGles2InitParams::newFallbackSurface();
-      QRhiGles2InitParams params;
-      params.format.setMajorVersion(3);
-      params.format.setMinorVersion(2);
-      params.format.setProfile(QSurfaceFormat::CompatibilityProfile);
-      params.fallbackSurface = m_renderState->surface;
-      m_renderState->rhi = QRhi::create(QRhi::OpenGLES2, &params, {});
-      m_renderState->api = score::gfx::GraphicsApi::OpenGL;
-      m_renderState->version = QShaderVersion(120);
-      m_usingMetal = false;
+      qWarning() << "SyphonOutput: failed to create QRhi";
+      m_renderState.reset();
+      return;
     }
+    m_renderState->outputSize = m_renderState->renderSize;
 
     auto rhi = m_renderState->rhi;
     m_texture = rhi->newTexture(
@@ -240,6 +229,28 @@ void destroyOutput() override
     }
 
     m_created = false;
+
+    // Release Syphon servers above first; they hold native GL/Metal handles
+    // into the rhi's device. Now tear down the rhi-owned resources.
+    if(!m_renderState)
+      return;
+
+    // Persist-across-rebuild contract: registry survives RL teardown,
+    // so we tear down its QRhi resources here BEFORE
+    // RenderState::destroy() (called below) frees the device.
+    releaseRegistry();
+
+    delete m_renderTarget;
+    m_renderTarget = nullptr;
+
+    delete m_renderState->renderPassDescriptor;
+    m_renderState->renderPassDescriptor = nullptr;
+
+    delete m_texture;
+    m_texture = nullptr;
+
+    m_renderState->destroy();
+    m_renderState.reset();
   }
 
   std::shared_ptr<score::gfx::RenderState> renderState() const override
diff --git a/src/plugins/score-plugin-gfx/Gfx/TexturePort.cpp b/src/plugins/score-plugin-gfx/Gfx/TexturePort.cpp
index 5847097cec..6160694b7b 100644
--- a/src/plugins/score-plugin-gfx/Gfx/TexturePort.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/TexturePort.cpp
@@ -9,8 +9,7 @@
 #include <Explorer/DocumentPlugin/DeviceDocumentPlugin.hpp>
 
 #include <Gfx/GfxApplicationPlugin.hpp>
-#include <Gfx/Graph/ScreenNode.hpp>
-#include <Gfx/Graph/Window.hpp>
+#include <Gfx/Widgets/RhiPreviewWidget.hpp>
 #include <Inspector/InspectorLayout.hpp>
 
 #include <score/command/Dispatchers/CommandDispatcher.hpp>
@@ -41,108 +40,34 @@ class GraphPreviewWidget : public QWidget
 public:
   GraphPreviewWidget(const TextureOutlet& outlet, Gfx::DocumentPlugin& plug)
       : outlet_p{&outlet}
-      , plug{&plug}
   {
     setLayout(new Inspector::VBoxLayout{this});
 
-    score::gfx::OutputNode::Configuration conf{};
-    auto window = std::make_unique<score::gfx::ScreenNode>(conf, true);
-    node = window.get();
-    screenId = plug.context.register_preview_node(std::move(window));
-    if(screenId != -1)
-    {
-      if(outlet.nodeId != -1)
-      {
-        nodeId = outlet.nodeId;
-        e = {{nodeId, 0}, {screenId, 0}};
-        plug.context.connect_preview_node(*e);
-      }
-      timerId = startTimer(16);
-    }
+    m_rhiWidget = new RhiPreviewWidget(this);
+    m_rhiWidget->setMinimumWidth(100);
+    m_rhiWidget->setMaximumWidth(300);
+    m_rhiWidget->setMinimumHeight(200);
+    m_rhiWidget->setMaximumHeight(200);
+    m_rhiWidget->useContext(&plug.context, outlet.nodeId);
+    layout()->addWidget(m_rhiWidget);
+
+    // TextureOutlet::nodeId has no notifier — poll for changes so a
+    // process re-instantiation rewires the preview to the new producer.
+    startTimer(16);
   }
 
-  void timerEvent(QTimerEvent*)
+  void timerEvent(QTimerEvent*) override
   {
-    const auto& w = node->window();
-    if(!w)
+    if(!outlet_p || !m_rhiWidget)
       return;
-
-    if(!outlet_p)
-      return;
-
-    auto& outlet = *outlet_p;
-
-    if(outlet.nodeId != nodeId)
-    {
-      if(e)
-      {
-        if(plug)
-          plug->context.disconnect_preview_node(*e);
-        e = std::nullopt;
-      }
-
-      if(outlet.nodeId != -1)
-      {
-        nodeId = outlet.nodeId;
-        e = {{nodeId, 0}, {screenId, 0}};
-
-        if(plug)
-          plug->context.connect_preview_node(*e);
-      }
-    }
-
-    if(!container)
-    {
-      qwindow = w.get();
-      this->window = w;
-
-      container = QWidget::createWindowContainer(qwindow, this);
-      container->setMinimumWidth(100);
-      container->setMaximumWidth(300);
-      container->setMinimumHeight(200);
-      container->setMaximumHeight(200);
-      this->layout()->addWidget(container);
-    }
-    node->render();
+    m_rhiWidget->setProducerNodeId(outlet_p->nodeId);
   }
 
-  ~GraphPreviewWidget()
-  {
-    if(qwindow)
-    {
-      // Take back ownership of the window
-      qwindow->setParent(nullptr);
-      qwindow->close();
-      QChildEvent ev(QEvent::ChildRemoved, qwindow);
-      ((QObject*)container)->event(&ev);
-    }
-
-    // We "garbage collect" the window
-    QTimer::singleShot(1, [w = this->window] { });
-    if(plug)
-    {
-      if(e)
-      {
-        plug->context.disconnect_preview_node(*e);
-      }
-      plug->context.unregister_preview_node(screenId);
-    }
-  }
+  ~GraphPreviewWidget() override = default;
 
 private:
   QPointer<const TextureOutlet> outlet_p;
-  QPointer<Gfx::DocumentPlugin> plug;
-  score::gfx::ScreenNode* node{};
-  std::optional<Gfx::EdgeSpec> e;
-
-  std::shared_ptr<score::gfx::Window> window;
-
-  QPointer<QWindow> qwindow{};
-  QWidget* container{};
-
-  int screenId = score::gfx::invalid_node_index;
-  int nodeId = score::gfx::invalid_node_index;
-  int timerId{};
+  RhiPreviewWidget* m_rhiWidget{};
 };
 
 TextureInlet::~TextureInlet() { }
diff --git a/src/plugins/score-plugin-gfx/Gfx/VSA/Process.cpp b/src/plugins/score-plugin-gfx/Gfx/VSA/Process.cpp
index 4c16529942..eff765b3be 100644
--- a/src/plugins/score-plugin-gfx/Gfx/VSA/Process.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/VSA/Process.cpp
@@ -10,8 +10,10 @@
 #include <Gfx/TexturePort.hpp>
 
 #include <score/application/GUIApplicationContext.hpp>
+#include <score/document/DocumentInterface.hpp>
 #include <score/tools/DeleteAll.hpp>
 #include <score/tools/File.hpp>
+#include <score/tools/FilePath.hpp>
 
 #include <QFileInfo>
 
@@ -141,6 +143,7 @@ Model::Model(
   metadata().setInstanceName(*this);
   m_outlets.push_back(new TextureOutlet{"Texture Out", Id<Process::Port>(1), this});
 
+  m_scriptPath = init;
   (void)setProgram(programFromVSAVertexShaderPath(init, {}));
 }
 
@@ -175,7 +178,9 @@ Process::ScriptChangeResult Model::setProgram(ShaderSource f)
   m_program.vertex = f.vertex;
   m_program.fragment.clear();
   m_processedProgram.fragment.clear();
-  if(const auto& [processed, error] = ProgramCache::instance().get(f); bool(processed))
+  if(const auto& [processed, error]
+     = ProgramCache::instance().get(f, m_scriptPath);
+     bool(processed))
   {
     ossia::flat_map<QString, ossia::value> previous_values;
     for(auto inl : m_inlets)
@@ -246,7 +251,9 @@ Process::Descriptor ProcessFactory::descriptor(QString path) const noexcept
 template <>
 void DataStreamReader::read(const Gfx::VSA::Model& proc)
 {
-  m_stream << proc.m_program;
+  auto& ctx = score::IDocument::documentContext(proc);
+  m_stream << proc.m_program
+           << score::relativizeFilePath(proc.m_scriptPath, ctx);
 
   readPorts(*this, proc.m_inlets, proc.m_outlets);
 
@@ -257,7 +264,12 @@ template <>
 void DataStreamWriter::write(Gfx::VSA::Model& proc)
 {
   Gfx::ShaderSource s;
-  m_stream >> s;
+  m_stream >> s >> proc.m_scriptPath;
+  if(!proc.m_scriptPath.isEmpty())
+  {
+    auto& ctx = score::IDocument::documentContext(proc);
+    proc.m_scriptPath = score::locateFilePath(proc.m_scriptPath, ctx);
+  }
   s.type = isf::parser::ShaderType::VertexShaderArt;
   (void)proc.setVertex(s.vertex);
 
@@ -272,6 +284,11 @@ template <>
 void JSONReader::read(const Gfx::VSA::Model& proc)
 {
   obj["Vertex"] = proc.vertex();
+  if(!proc.m_scriptPath.isEmpty())
+  {
+    auto& ctx = score::IDocument::documentContext(proc);
+    obj["Root"] = score::relativizeFilePath(proc.m_scriptPath, ctx);
+  }
 
   readPorts(*this, proc.m_inlets, proc.m_outlets);
 }
@@ -282,6 +299,15 @@ void JSONWriter::write(Gfx::VSA::Model& proc)
   Gfx::ShaderSource s;
   s.vertex = obj["Vertex"].toString();
   s.type = isf::parser::ShaderType::VertexShaderArt;
+  if(auto r = obj.tryGet("Root"))
+  {
+    proc.m_scriptPath <<= *r;
+    if(!proc.m_scriptPath.isEmpty())
+    {
+      auto& ctx = score::IDocument::documentContext(proc);
+      proc.m_scriptPath = score::locateFilePath(proc.m_scriptPath, ctx);
+    }
+  }
   (void)proc.setVertex(s.vertex);
 
   writePorts(
diff --git a/src/plugins/score-plugin-gfx/Gfx/VSA/Process.hpp b/src/plugins/score-plugin-gfx/Gfx/VSA/Process.hpp
index 1191dc8bdd..8efa772d46 100644
--- a/src/plugins/score-plugin-gfx/Gfx/VSA/Process.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/VSA/Process.hpp
@@ -57,6 +57,10 @@ class Model final : public Process::ProcessModel
   void errorMessage(int line, const QString& arg_2) const
       W_SIGNAL(errorMessage, line, arg_2);
 
+  // Absolute path of the shader file this model was loaded from. Used as
+  // the base for quoted #include resolution. Empty for in-memory source.
+  QString rootPath() const noexcept { return m_scriptPath; }
+
 private:
   [[nodiscard]] Process::ScriptChangeResult setProgram(ShaderSource f);
   void loadPreset(const Process::Preset& preset) override;
@@ -66,6 +70,7 @@ class Model final : public Process::ProcessModel
 
   ShaderSource m_program;
   ProcessedProgram m_processedProgram;
+  QString m_scriptPath;
 };
 
 struct ProcessFactory final : Process::ProcessFactory_T<Gfx::VSA::Model>
diff --git a/src/plugins/score-plugin-gfx/Gfx/Widgets/RhiPreviewWidget.cpp b/src/plugins/score-plugin-gfx/Gfx/Widgets/RhiPreviewWidget.cpp
new file mode 100644
index 0000000000..6c159e807f
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Widgets/RhiPreviewWidget.cpp
@@ -0,0 +1,267 @@
+#include <Gfx/Widgets/RhiPreviewWidget.hpp>
+
+#include <Gfx/GfxContext.hpp>
+#include <Gfx/Graph/BackgroundNode.hpp>
+#include <Gfx/Graph/Graph.hpp>
+#include <Gfx/Graph/Node.hpp>
+
+#include <QImage>
+#include <QPainter>
+#include <QPaintEvent>
+#include <QResizeEvent>
+
+namespace Gfx
+{
+namespace
+{
+constexpr int kPreviewIntervalMs = 16;  // ~60 Hz
+}
+
+RhiPreviewWidget::RhiPreviewWidget(QWidget* parent)
+    : QWidget{parent}
+{
+  // Opaque painter target: every paintEvent fully overwrites the area
+  // (image blit or solid clear), so Qt can skip background fill.
+  setAttribute(Qt::WA_OpaquePaintEvent, true);
+  setAttribute(Qt::WA_NoSystemBackground, true);
+}
+
+RhiPreviewWidget::~RhiPreviewWidget()
+{
+  detach();
+}
+
+void RhiPreviewWidget::useGraph(
+    score::gfx::Graph* graph,
+    std::function<void(score::gfx::BackgroundNode&)> onAttached,
+    std::function<void(score::gfx::BackgroundNode&)> onAboutToDetach)
+{
+  detach();
+  m_backend = Backend::Graph;
+  m_graph = graph;
+  m_onAttached = std::move(onAttached);
+  m_onAboutToDetach = std::move(onAboutToDetach);
+  m_ctx = nullptr;
+  attach();
+}
+
+void RhiPreviewWidget::useContext(GfxContext* ctx, int32_t producerNodeId)
+{
+  detach();
+  m_backend = Backend::Context;
+  m_ctx = ctx;
+  m_producerNodeId = producerNodeId;
+  m_graph = nullptr;
+  attach();
+}
+
+void RhiPreviewWidget::setProducerNodeId(int32_t id)
+{
+  if(id == m_producerNodeId)
+    return;
+
+  const int32_t oldId = m_producerNodeId;
+  m_producerNodeId = id;
+
+  // Hot-rewire the producer→preview edge. Only meaningful on the
+  // Context backend; the Graph backend rewires through the caller's
+  // attach/detach callbacks.
+  if(m_backend == Backend::Context && m_ctx
+     && m_screenNodeId != score::gfx::invalid_node_index)
+  {
+    if(m_edgeConnected)
+    {
+      m_ctx->disconnect_preview_node(
+          EdgeSpec{{oldId, 0}, {m_screenNodeId, 0}});
+      m_edgeConnected = false;
+    }
+    if(m_producerNodeId != score::gfx::invalid_node_index)
+    {
+      m_ctx->connect_preview_node(
+          EdgeSpec{{m_producerNodeId, 0}, {m_screenNodeId, 0}});
+      m_edgeConnected = true;
+    }
+  }
+}
+
+void RhiPreviewWidget::attach()
+{
+  if(m_backend == Backend::None)
+    return;
+
+  m_readback = std::make_shared<QRhiReadbackResult>();
+
+  auto node = std::make_unique<score::gfx::BackgroundNode>();
+  node->shared_readback = m_readback;
+  // Match the offscreen render size to the widget's pixel size; the
+  // BackgroundNode allocates its own QRhi target at this size.
+  const qreal dpr = devicePixelRatioF();
+  const QSize px{
+      qMax(1, int(width() * dpr)), qMax(1, int(height() * dpr))};
+  if(width() > 0 && height() > 0)
+    node->setSize(px);
+  m_node = node.get();
+
+  switch(m_backend)
+  {
+    case Backend::Graph: {
+      if(!m_graph)
+      {
+        m_node = nullptr;
+        return;
+      }
+
+      // Keep ownership: Graph::removeNode does not delete; we delete in
+      // detach() once we've removed the node + its render list.
+      m_graph->addNode(node.release());
+
+      // The caller wires producer→preview edges here, then arranges
+      // for a render list to be built (typically via createAllRenderLists).
+      if(m_onAttached)
+        m_onAttached(*m_node);
+      break;
+    }
+
+    case Backend::Context: {
+      if(!m_ctx)
+      {
+        m_node = nullptr;
+        return;
+      }
+
+      // register_node (not register_preview_node) so that GfxContext's
+      // recomputeTimers picks up BackgroundNode::configuration().
+      // manualRenderingRate and drives render() automatically — the
+      // BackgroundNode does its own offscreen frame + readback there.
+      // We just trigger update() on the widget timer to repaint.
+      m_screenNodeId = m_ctx->register_node(
+          std::unique_ptr<score::gfx::Node>{node.release()});
+      if(m_screenNodeId == score::gfx::invalid_node_index)
+      {
+        m_node = nullptr;
+        return;
+      }
+      if(m_producerNodeId != score::gfx::invalid_node_index)
+      {
+        m_ctx->connect_preview_node(
+            EdgeSpec{{m_producerNodeId, 0}, {m_screenNodeId, 0}});
+        m_edgeConnected = true;
+      }
+      break;
+    }
+
+    case Backend::None:
+      break;
+  }
+
+  // Single timer: refreshes the widget at preview rate. For the Graph
+  // backend it also drives BackgroundNode::render() directly (the
+  // manager's graph has no GfxContext timers); for the Context backend
+  // GfxContext drives render() via its manual timer and we only need
+  // update() here.
+  if(m_timerId == 0)
+    m_timerId = startTimer(kPreviewIntervalMs);
+}
+
+void RhiPreviewWidget::detach()
+{
+  if(m_timerId)
+  {
+    killTimer(m_timerId);
+    m_timerId = 0;
+  }
+
+  switch(m_backend)
+  {
+    case Backend::Graph: {
+      if(m_node && m_graph)
+      {
+        if(m_onAboutToDetach)
+          m_onAboutToDetach(*m_node);
+        m_graph->destroyOutputRenderList(*m_node);
+        m_graph->removeNode(m_node);
+      }
+      delete m_node;
+      m_node = nullptr;
+      break;
+    }
+
+    case Backend::Context: {
+      if(m_ctx && m_screenNodeId != score::gfx::invalid_node_index)
+      {
+        if(m_edgeConnected)
+        {
+          m_ctx->disconnect_preview_node(
+              EdgeSpec{{m_producerNodeId, 0}, {m_screenNodeId, 0}});
+          m_edgeConnected = false;
+        }
+        m_ctx->unregister_node(m_screenNodeId);
+      }
+      m_screenNodeId = score::gfx::invalid_node_index;
+      // GfxContext owns the node lifetime via its command queue; we
+      // do not delete here.
+      m_node = nullptr;
+      break;
+    }
+
+    case Backend::None:
+      m_node = nullptr;
+      break;
+  }
+
+  m_readback.reset();
+}
+
+void RhiPreviewWidget::resizeEvent(QResizeEvent* ev)
+{
+  QWidget::resizeEvent(ev);
+  if(m_node)
+  {
+    const qreal dpr = devicePixelRatioF();
+    const QSize px{
+        qMax(1, int(ev->size().width() * dpr)),
+        qMax(1, int(ev->size().height() * dpr))};
+    m_node->setSize(px);
+  }
+}
+
+void RhiPreviewWidget::timerEvent(QTimerEvent* ev)
+{
+  if(ev->timerId() != m_timerId)
+  {
+    QWidget::timerEvent(ev);
+    return;
+  }
+
+  // Graph backend: drive the offscreen frame + readback ourselves
+  // (the manager's private graph has no timer infrastructure).
+  // Context backend: GfxContext drives render() via its manual timer.
+  if(m_backend == Backend::Graph && m_node)
+    m_node->render();
+
+  update();
+}
+
+void RhiPreviewWidget::paintEvent(QPaintEvent*)
+{
+  QPainter painter{this};
+
+  if(m_readback)
+  {
+    const auto& rb = *m_readback;
+    const int w = rb.pixelSize.width();
+    const int h = rb.pixelSize.height();
+    const int expected = w * h * 4;
+    if(w > 0 && h > 0 && rb.data.size() >= expected)
+    {
+      QImage img{
+          reinterpret_cast<const unsigned char*>(rb.data.constData()),
+          w, h, w * 4, QImage::Format_RGBA8888};
+      painter.drawImage(rect(), img);
+      return;
+    }
+  }
+
+  painter.fillRect(rect(), Qt::black);
+}
+}
diff --git a/src/plugins/score-plugin-gfx/Gfx/Widgets/RhiPreviewWidget.hpp b/src/plugins/score-plugin-gfx/Gfx/Widgets/RhiPreviewWidget.hpp
new file mode 100644
index 0000000000..e6dacef043
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Widgets/RhiPreviewWidget.hpp
@@ -0,0 +1,92 @@
+#pragma once
+#include <QWidget>
+
+#include <score_plugin_gfx_export.h>
+
+#include <cstdint>
+#include <functional>
+#include <memory>
+
+struct QRhiReadbackResult;
+
+namespace score::gfx
+{
+struct Graph;
+struct BackgroundNode;
+}
+
+namespace Gfx
+{
+class GfxContext;
+
+/**
+ * @brief A QWidget that paints a score::gfx render-graph output without using
+ *        QWidget::createWindowContainer (broken on macOS) or QRhiWidget
+ *        (forces the toplevel to switch to RHI compositing — flash + perf
+ *        impact).
+ *
+ * The graph renders into an offscreen QRhi texture owned by a
+ * score::gfx::BackgroundNode; each frame is read back into a QImage-shaped
+ * QByteArray and drawn in paintEvent. CPU readback is cheap at preview
+ * resolutions and avoids touching Qt's compositor RHI entirely.
+ *
+ * Two backends:
+ *   - Graph backend (useGraph): caller owns a score::gfx::Graph and drives
+ *     wiring through callbacks. Used by ShaderPreviewManager.
+ *   - Context backend (useContext): caller routes registration through a
+ *     Gfx::GfxContext. The GfxContext's manual timer drives the offscreen
+ *     render; the widget only triggers QWidget::update() to refresh the
+ *     painted image. Used by GraphPreviewWidget (texture-port preview).
+ */
+class SCORE_PLUGIN_GFX_EXPORT RhiPreviewWidget : public QWidget
+{
+public:
+  explicit RhiPreviewWidget(QWidget* parent = nullptr);
+  ~RhiPreviewWidget() override;
+
+  /// Graph backend. onAttached fires once the BackgroundNode has been
+  /// registered with the graph (its render list is built). The caller wires
+  /// producer→preview edges in there. onAboutToDetach fires before the
+  /// BackgroundNode is removed; the caller must remove any edges it added.
+  void useGraph(
+      score::gfx::Graph* graph,
+      std::function<void(score::gfx::BackgroundNode&)> onAttached,
+      std::function<void(score::gfx::BackgroundNode&)> onAboutToDetach);
+
+  /// Context backend. The producer node id can be updated at any time; the
+  /// widget rewires the preview edge accordingly.
+  void useContext(GfxContext* ctx, int32_t producerNodeId);
+  void setProducerNodeId(int32_t id);
+
+protected:
+  void paintEvent(QPaintEvent* ev) override;
+  void resizeEvent(QResizeEvent* ev) override;
+  void timerEvent(QTimerEvent* ev) override;
+
+private:
+  void attach();
+  void detach();
+
+  enum class Backend
+  {
+    None,
+    Graph,
+    Context
+  } m_backend{Backend::None};
+
+  // Graph backend
+  score::gfx::Graph* m_graph{};
+  std::function<void(score::gfx::BackgroundNode&)> m_onAttached;
+  std::function<void(score::gfx::BackgroundNode&)> m_onAboutToDetach;
+
+  // Context backend
+  GfxContext* m_ctx{};
+  int32_t m_producerNodeId{-1};
+  int32_t m_screenNodeId{-1};
+  bool m_edgeConnected{false};
+
+  std::shared_ptr<QRhiReadbackResult> m_readback;
+  score::gfx::BackgroundNode* m_node{};  // owned by m_graph or m_ctx after attach
+  int m_timerId{};
+};
+}
diff --git a/src/plugins/score-plugin-gfx/Gfx/Window/OffscreenDevice.hpp b/src/plugins/score-plugin-gfx/Gfx/Window/OffscreenDevice.hpp
new file mode 100644
index 0000000000..b806133f14
--- /dev/null
+++ b/src/plugins/score-plugin-gfx/Gfx/Window/OffscreenDevice.hpp
@@ -0,0 +1,95 @@
+#pragma once
+
+#include <Gfx/GfxParameter.hpp>
+#include <Gfx/Graph/BackgroundNode.hpp>
+
+#include <ossia/network/base/device.hpp>
+#include <ossia/network/base/protocol.hpp>
+#include <ossia/network/generic/generic_node.hpp>
+
+#include <ossia-qt/invoke.hpp>
+
+namespace Gfx
+{
+
+// Headless device used when SCORE_FORCE_OFFSCREEN_WINDOW selects this
+// window device by name. Wraps a BackgroundNode — which already drives
+// beginOffscreenFrame/endOffscreenFrame — without the ScenarioDocumentView
+// dependency of background_device. Exposes only the parameters required by
+// offscreen tests (size, rendersize) and holds the shared_readback used by
+// WindowDevice::grabTo to write frames to disk.
+class offscreen_device : public ossia::net::device_base
+{
+  // unique_ptr ownership: BackgroundNode is not a QObject child of any
+  // parent in this class (it inherits NodeModel, not QObject), so a raw
+  // `new BackgroundNode` with no matching `delete` in the dtor leaked
+  // every offscreen device cycle — including the rhi resources its
+  // ~BackgroundNode → destroyOutput would have released. unique_ptr
+  // restores the pair.
+  std::unique_ptr<score::gfx::BackgroundNode> m_node;
+  gfx_node_base m_root;
+  QObject m_qtContext;
+
+  ossia::net::parameter_base* size_param{};
+  ossia::net::parameter_base* rendersize_param{};
+
+public:
+  offscreen_device(std::unique_ptr<gfx_protocol_base> proto, std::string name)
+      : ossia::net::device_base{std::move(proto)}
+      , m_node{std::make_unique<score::gfx::BackgroundNode>()}
+      , m_root{*this, *static_cast<gfx_protocol_base*>(m_protocol.get()), m_node.get(), name}
+  {
+    this->m_capabilities.change_tree = true;
+    m_node->shared_readback = std::make_shared<QRhiReadbackResult>();
+
+    {
+      auto size_node = std::make_unique<ossia::net::generic_node>("size", *this, m_root);
+      size_param = size_node->create_parameter(ossia::val_type::VEC2F);
+      size_param->push_value(ossia::vec2f{1280.f, 720.f});
+      m_node->setSize(QSize{1280, 720});
+      size_param->add_callback([this](const ossia::value& v) {
+        if(auto val = v.target<ossia::vec2f>())
+        {
+          ossia::qt::run_async(&m_qtContext, [node = m_node.get(), v = *val] {
+            node->setSize({(int)v[0], (int)v[1]});
+          });
+        }
+      });
+      m_root.add_child(std::move(size_node));
+    }
+
+    {
+      auto size_node
+          = std::make_unique<ossia::net::generic_node>("rendersize", *this, m_root);
+      ossia::net::set_description(
+          *size_node, "Set to [0, 0] to use the viewport's size");
+      rendersize_param = size_node->create_parameter(ossia::val_type::VEC2F);
+      rendersize_param->push_value(ossia::vec2f{0.f, 0.f});
+      rendersize_param->add_callback([this](const ossia::value& v) {
+        if(auto val = v.target<ossia::vec2f>())
+        {
+          ossia::qt::run_async(&m_qtContext, [node = m_node.get(), v = *val] {
+            node->setRenderSize({(int)v[0], (int)v[1]});
+          });
+        }
+      });
+      m_root.add_child(std::move(size_node));
+    }
+  }
+
+  ~offscreen_device()
+  {
+    m_protocol->stop();
+    m_root.clear_children();
+    m_protocol.reset();
+    // m_node destroyed by unique_ptr → ~BackgroundNode → destroyOutput
+    // (releases RT/RPD/depth tex/colour tex + the offscreen rhi).
+  }
+
+  score::gfx::BackgroundNode* node() const noexcept { return m_node.get(); }
+
+  const gfx_node_base& get_root_node() const override { return m_root; }
+  gfx_node_base& get_root_node() override { return m_root; }
+};
+
+}
diff --git a/src/plugins/score-plugin-gfx/Gfx/Window/WindowDevice.hpp b/src/plugins/score-plugin-gfx/Gfx/Window/WindowDevice.hpp
index 434c0033ba..6ce8985356 100644
--- a/src/plugins/score-plugin-gfx/Gfx/Window/WindowDevice.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/Window/WindowDevice.hpp
@@ -37,7 +37,7 @@ static score::gfx::ScreenNode* createScreenNode(
   };
 
   auto node = new score::gfx::ScreenNode{
-      make_configuration(), false, (settings.autoplay || !settings.gui)};
+      make_configuration(), false, (settings.autoplay && !settings.gui)};
   node->setSwapchainFlag(swapFlag);
   node->setSwapchainFormat(swapFormat);
 
@@ -105,6 +105,7 @@ class window_device : public ossia::net::device_base
   }
 
 public:
+  score::gfx::ScreenNode* screen() const noexcept { return m_screen; }
   ~window_device()
   {
     if(auto w = m_screen->window())
diff --git a/src/plugins/score-plugin-gfx/Gfx/WindowCapture/WindowCaptureNode.cpp b/src/plugins/score-plugin-gfx/Gfx/WindowCapture/WindowCaptureNode.cpp
index bad3ba129f..5b8804a95f 100644
--- a/src/plugins/score-plugin-gfx/Gfx/WindowCapture/WindowCaptureNode.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/WindowCapture/WindowCaptureNode.cpp
@@ -3,8 +3,11 @@
 #include <Gfx/Graph/NodeRenderer.hpp>
 #include <Gfx/Graph/RenderList.hpp>
 #include <Gfx/Graph/RenderState.hpp>
+#include <Gfx/Graph/Utils.hpp>
 #include <Gfx/Graph/decoders/GPUVideoDecoder.hpp>
 
+#include <ossia/detail/algorithms.hpp>
+
 #include <QDebug>
 
 #if defined(__linux__)
@@ -52,7 +55,7 @@ class WindowCaptureNode::Renderer : public score::gfx::NodeRenderer
     return {};
   }
 
-  void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override
+  void initState(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override
   {
     auto& rhi = *renderer.state.rhi;
 
@@ -73,9 +76,14 @@ class WindowCaptureNode::Renderer : public score::gfx::NodeRenderer
     m_width = 640;
     m_height = 480;
 
-    // Use BGRA8 — native format for all capture backends
+    // BGRA8 covers Windows / macOS / X11 backends. PipeWire on Wayland may
+    // negotiate SPA_VIDEO_FORMAT_RGBA / RGBx (mapped to CapturedFrame::CPU_RGBA)
+    // — we recreate the texture in QRhiTexture::RGBA8 the first time a CPU_RGBA
+    // frame arrives. Without that branch, RGBA bytes were uploaded as BGRA and
+    // displayed with R/B swapped.
+    m_textureFormat = QRhiTexture::BGRA8;
     m_texture = rhi.newTexture(
-        QRhiTexture::BGRA8, QSize{m_width, m_height}, 1, QRhiTexture::Flag{});
+        m_textureFormat, QSize{m_width, m_height}, 1, QRhiTexture::Flag{});
     m_texture->create();
 
     m_sampler = rhi.newSampler(
@@ -112,11 +120,8 @@ class WindowCaptureNode::Renderer : public score::gfx::NodeRenderer
     {
       auto [vertS, fragS] = score::gfx::makeShaders(
           renderer.state, score::gfx::GPUVideoDecoder::vertexShader(), frag);
-
-      const score::gfx::Sampler samplers[] = {{m_sampler, m_texture}};
-      score::gfx::defaultPassesInit(
-          m_p, this->node.output[0]->edges, renderer, mesh, vertS, fragS,
-          m_processUBO, m_materialUBO, samplers);
+      m_vertexS = vertS;
+      m_fragmentS = fragS;
     }
 
     // Start capturing
@@ -132,6 +137,83 @@ class WindowCaptureNode::Renderer : public score::gfx::NodeRenderer
       target.regionH = node.settings.regionH;
       const_cast<WindowCaptureNode&>(node).backend->start(target);
     }
+
+    m_initialized = true;
+  }
+
+  void addOutputPass(
+      score::gfx::RenderList& renderer, score::gfx::Edge& edge,
+      QRhiResourceUpdateBatch& res) override
+  {
+    if(!m_vertexS.isValid() || !m_fragmentS.isValid())
+      return;
+
+    auto rt = renderer.renderTargetForOutput(edge);
+    if(rt.renderTarget)
+    {
+      const score::gfx::Sampler samplers[] = {{m_sampler, m_texture}};
+      auto pip = score::gfx::buildPipeline(
+          renderer, renderer.defaultTriangle(), m_vertexS, m_fragmentS, rt,
+          m_processUBO, m_materialUBO, samplers);
+      if(pip.pipeline)
+        m_p.emplace_back(&edge, score::gfx::Pass{rt, pip, nullptr});
+    }
+  }
+
+  void removeOutputPass(score::gfx::RenderList& renderer, score::gfx::Edge& edge) override
+  {
+    auto it = ossia::find_if(m_p, [&](const auto& p) { return p.first == &edge; });
+    if(it != m_p.end())
+    {
+      it->second.release();
+      m_p.erase(it);
+    }
+  }
+
+  bool hasOutputPassForEdge(score::gfx::Edge& edge) const override
+  {
+    return ossia::find_if(m_p, [&](const auto& p) { return p.first == &edge; })
+           != m_p.end();
+  }
+
+  void releaseState(score::gfx::RenderList& r) override
+  {
+    if(!m_initialized)
+      return;
+
+    if(node.backend)
+      const_cast<WindowCaptureNode&>(node).backend->stop();
+
+#if HAS_DMABUF_IMPORT
+    if(m_dmaBufImporter)
+      m_dmaBufImporter->cleanupPlane(m_dmaBufPlane);
+#endif
+
+    for(auto& [edge, pass] : m_p)
+      pass.release();
+    m_p.clear();
+
+    delete m_texture;
+    m_texture = nullptr;
+    delete m_sampler;
+    m_sampler = nullptr;
+    delete m_processUBO;
+    m_processUBO = nullptr;
+    delete m_materialUBO;
+    m_materialUBO = nullptr;
+    m_meshBuffer = {};
+    m_vertexS = {};
+    m_fragmentS = {};
+
+    m_initialized = false;
+  }
+
+  void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override
+  {
+    initState(renderer, res);
+
+    for(auto* edge : this->node.output[0]->edges)
+      addOutputPass(renderer, *edge, res);
   }
 
   void update(
@@ -145,16 +227,41 @@ class WindowCaptureNode::Renderer : public score::gfx::NodeRenderer
     if(frame.type == CapturedFrame::None || frame.width <= 0 || frame.height <= 0)
       return;
 
-    // Handle resize
-    if(frame.width != m_width || frame.height != m_height)
+    // Detect format mismatch and recreate the texture in the matching format.
+    // PipeWire negotiates RGBA/RGBx on some compositors (yields CPU_RGBA);
+    // X11 / Windows / macOS yield CPU_BGRA. The two formats can both arrive
+    // in a single session if the user changes Wayland compositors mid-session
+    // or if the backend renegotiates. Done before the resize check so a
+    // simultaneous resize+format change is handled in a single create.
+    QRhiTexture::Format wanted = m_textureFormat;
+    if(frame.type == CapturedFrame::CPU_RGBA)
+      wanted = QRhiTexture::RGBA8;
+    else if(frame.type == CapturedFrame::CPU_BGRA)
+      wanted = QRhiTexture::BGRA8;
+    // Other branches (D3D11_Texture / IOSurface_Ref / DMABUF) recreate the
+    // texture below via createFrom(...) on the native handle and don't go
+    // through this CPU upload path.
+
+    const bool formatChanged = (wanted != m_textureFormat);
+    const bool sizeChanged = (frame.width != m_width || frame.height != m_height);
+
+    if(formatChanged || sizeChanged)
     {
       m_width = frame.width;
       m_height = frame.height;
 
-      // Only resize for CPU upload path — GPU paths recreate from native handle
+      // Only the CPU upload paths participate in setPixelSize/setFormat
+      // recreation. GPU import paths replace the texture wholesale via
+      // createFrom() further down.
       if(frame.type == CapturedFrame::CPU_BGRA || frame.type == CapturedFrame::CPU_RGBA)
       {
-        m_texture->setPixelSize(QSize{m_width, m_height});
+        if(formatChanged)
+        {
+          m_texture->setFormat(wanted);
+          m_textureFormat = wanted;
+        }
+        if(sizeChanged)
+          m_texture->setPixelSize(QSize{m_width, m_height});
         m_texture->create();
       }
     }
@@ -234,27 +341,7 @@ class WindowCaptureNode::Renderer : public score::gfx::NodeRenderer
 
   void release(score::gfx::RenderList& r) override
   {
-    if(node.backend)
-      const_cast<WindowCaptureNode&>(node).backend->stop();
-
-#if HAS_DMABUF_IMPORT
-    if(m_dmaBufImporter)
-      m_dmaBufImporter->cleanupPlane(m_dmaBufPlane);
-#endif
-
-    for(auto& [edge, pass] : m_p)
-      pass.release();
-    m_p.clear();
-
-    delete m_texture;
-    m_texture = nullptr;
-    delete m_sampler;
-    m_sampler = nullptr;
-    delete m_processUBO;
-    m_processUBO = nullptr;
-    delete m_materialUBO;
-    m_materialUBO = nullptr;
-    m_meshBuffer = {};
+    releaseState(r);
   }
 
   void runRenderPass(
@@ -273,7 +360,10 @@ class WindowCaptureNode::Renderer : public score::gfx::NodeRenderer
   QRhiBuffer* m_processUBO{};
   QRhiBuffer* m_materialUBO{};
   QRhiTexture* m_texture{};
+  QRhiTexture::Format m_textureFormat{QRhiTexture::BGRA8};
   QRhiSampler* m_sampler{};
+  QShader m_vertexS;
+  QShader m_fragmentS;
   score::gfx::VideoMaterialUBO m_material;
 
   int m_width{};
diff --git a/src/plugins/score-plugin-gfx/Gfx/WindowDevice.cpp b/src/plugins/score-plugin-gfx/Gfx/WindowDevice.cpp
index f6e1fe73bd..c5440f544b 100644
--- a/src/plugins/score-plugin-gfx/Gfx/WindowDevice.cpp
+++ b/src/plugins/score-plugin-gfx/Gfx/WindowDevice.cpp
@@ -2,6 +2,7 @@
 
 #include <Gfx/Window/BackgroundDevice.hpp>
 #include <Gfx/Window/MultiWindowDevice.hpp>
+#include <Gfx/Window/OffscreenDevice.hpp>
 #include <Gfx/Window/WindowDevice.hpp>
 #include <Gfx/Window/WindowSettingsWidget.hpp>
 
@@ -19,6 +20,24 @@ W_OBJECT_IMPL(Gfx::WindowDevice)
 namespace Gfx
 {
 
+// SCORE_FORCE_OFFSCREEN_WINDOW=Name1,Name2 forces any matching WindowDevice
+// (whatever its Single/Background/MultiWindow mode) into a headless offscreen
+// render path. Used by tests that need grabTo output but must not pop a
+// platform window.
+static bool shouldForceOffscreen(const QString& name)
+{
+  static const QByteArray env = qgetenv("SCORE_FORCE_OFFSCREEN_WINDOW");
+  if(env.isEmpty())
+    return false;
+  for(const auto& part : env.split(','))
+  {
+    const auto trimmed = QString::fromUtf8(part).trimmed();
+    if(!trimmed.isEmpty() && trimmed == name)
+      return true;
+  }
+  return false;
+}
+
 score::gfx::Window* WindowDevice::window() const noexcept
 {
   if(m_dev)
@@ -75,6 +94,44 @@ void WindowDevice::disconnect()
   deviceChanged(prev.get(), nullptr);
 }
 
+void WindowDevice::grabTo(const QString& path) const
+{
+  if(auto dev = dynamic_cast<window_device*>(m_dev.get()))
+  {
+    if(auto screen = dev->screen())
+    {
+      if(auto win = screen->window())
+      {
+        auto screen = win->screen();
+        auto wid = win->winId();
+        auto grab = screen->grabWindow(wid);
+        grab.save(path);
+      }
+    }
+  }
+  else if(auto dev = dynamic_cast<background_device*>(m_dev.get()))
+  {
+    // TODO
+  }
+  else if(auto dev = dynamic_cast<offscreen_device*>(m_dev.get()))
+  {
+    if(auto node = dev->node(); node && node->shared_readback)
+    {
+      const auto& rb = *node->shared_readback;
+      const int w = rb.pixelSize.width();
+      const int h = rb.pixelSize.height();
+      const int expected = w * h * 4;
+      if(w > 0 && h > 0 && rb.data.size() >= expected)
+      {
+        QImage img{
+            reinterpret_cast<const unsigned char*>(rb.data.constData()), w, h, w * 4,
+            QImage::Format_RGBA8888};
+        img.save(path);
+      }
+    }
+  }
+}
+
 bool WindowDevice::reconnect()
 {
   disconnect();
@@ -90,6 +147,18 @@ bool WindowDevice::reconnect()
       auto view = m_ctx.document.view();
       auto main_view = view ? qobject_cast<Scenario::ScenarioDocumentView*>(
           &view->viewDelegate()) : nullptr;
+
+      if(shouldForceOffscreen(m_settings.name))
+      {
+        m_dev = std::make_unique<offscreen_device>(
+            std::unique_ptr<gfx_protocol_base>(m_protocol),
+            m_settings.name.toStdString());
+
+        enableCallbacks();
+        deviceChanged(nullptr, m_dev.get());
+        return connected();
+      }
+
       switch(set.mode)
       {
         case WindowMode::Background: {
diff --git a/src/plugins/score-plugin-gfx/Gfx/WindowDevice.hpp b/src/plugins/score-plugin-gfx/Gfx/WindowDevice.hpp
index e0549f04e5..daca092767 100644
--- a/src/plugins/score-plugin-gfx/Gfx/WindowDevice.hpp
+++ b/src/plugins/score-plugin-gfx/Gfx/WindowDevice.hpp
@@ -2,22 +2,14 @@
 #include <Gfx/GfxDevice.hpp>
 #include <Gfx/Window/WindowSettings.hpp>
 
-#include <QGraphicsRectItem>
-#include <QGraphicsScene>
-#include <QGraphicsView>
-#include <QRectF>
+#include <QImage>
 
 class QComboBox;
 class QCheckBox;
 class QDoubleSpinBox;
-class QGraphicsEllipseItem;
-class QGraphicsLineItem;
-class QGraphicsPolygonItem;
 class QLabel;
 class QStackedWidget;
 class QSpinBox;
-class QGraphicsView;
-
 namespace score::gfx
 {
 class Window;
@@ -74,6 +66,8 @@ class SCORE_PLUGIN_GFX_EXPORT WindowDevice final : public GfxOutputDevice
   void disconnect() override;
   bool reconnect() override;
 
+  void grabTo(const QString& path) const;
+  W_SLOT(grabTo)
 private:
   gfx_protocol_base* m_protocol{};
   mutable std::unique_ptr<ossia::net::device_base> m_dev;
diff --git a/src/plugins/score-plugin-js/JS/ApplicationPlugin.cpp b/src/plugins/score-plugin-js/JS/ApplicationPlugin.cpp
index 0766388866..2ad09cce24 100644
--- a/src/plugins/score-plugin-js/JS/ApplicationPlugin.cpp
+++ b/src/plugins/score-plugin-js/JS/ApplicationPlugin.cpp
@@ -17,6 +17,8 @@
 #include <ossia-qt/qml_protocols.hpp>
 
 #include <QCommandLineParser>
+#include <QFileInfo>
+#include <QString>
 
 #if __has_include(<QQuickWindow>)
 #include <QGuiApplication>
@@ -32,6 +34,33 @@
 
 namespace JS
 {
+// Check whether the input is a script, or a file path.
+// An existing file always wins: a real path may legitimately contain
+// characters (parentheses, braces, ...) that also occur in inline source,
+// so the file-existence check must come FIRST. Only when the input is not
+// an existing file do we fall back to the inline-source heuristic.
+static bool stringIsScript(const QString& input)
+{
+  if(input.isEmpty())
+    return false;
+
+  if(QFileInfo fileInfo{input}; fileInfo.exists() && fileInfo.isFile())
+    return false;
+
+  if(input.length() > 4096)
+    return true;
+
+  for(QChar ch : input)
+  {
+    const char16_t c = ch.unicode();
+    if(c == '\n' || c == '\r' || c == ';' || c == '{' || c == '}' || c == '('
+       || c == ')')
+      return true;
+  }
+
+  return true;
+}
+
 ApplicationPlugin::ApplicationPlugin(const score::GUIApplicationContext& ctx)
     : score::GUIApplicationPlugin{ctx}
 {
@@ -79,7 +108,25 @@ ApplicationPlugin::ApplicationPlugin(const score::GUIApplicationContext& ctx)
   parser.addOption(script_opt);
 
   parser.parse(ctx.applicationSettings.arguments);
-  this->m_start_script = parser.value(script_opt);
+  auto script = parser.value(script_opt);
+  if(stringIsScript(script))
+  {
+    this->m_start_script = script;
+  }
+  else if(!script.isEmpty())
+  {
+    QFile f{script};
+    if(f.open(QIODevice::ReadOnly))
+    {
+      this->m_start_script = f.readAll();
+      this->m_start_script_path = QFileInfo{f}.canonicalPath();
+    }
+    else
+    {
+      qWarning() << "JS::ApplicationPlugin: could not open --script file"
+                 << script << ":" << f.errorString();
+    }
+  }
 }
 
 void ApplicationPlugin::on_newDocument(score::Document& doc)
@@ -124,7 +171,11 @@ void ApplicationPlugin::on_createdDocument(score::Document& doc)
 
   if(!m_start_script.isEmpty())
   {
-    QTimer::singleShot(100, this, [this] { m_consoleEngine.evaluate(m_start_script); });
+    QTimer::singleShot(100, this, [this] {
+      if(!m_start_script_path.isEmpty())
+        m_consoleEngine.addImportPath(m_start_script_path);
+      m_consoleEngine.evaluate(m_start_script);
+    });
   }
 }
 void ApplicationPlugin::afterStartup()
diff --git a/src/plugins/score-plugin-js/JS/ApplicationPlugin.hpp b/src/plugins/score-plugin-js/JS/ApplicationPlugin.hpp
index db4aae0d45..03558e60d4 100644
--- a/src/plugins/score-plugin-js/JS/ApplicationPlugin.hpp
+++ b/src/plugins/score-plugin-js/JS/ApplicationPlugin.hpp
@@ -45,5 +45,6 @@ class ApplicationPlugin final
   ossia::net::network_context_ptr m_asioContext;
 
   QString m_start_script;
+  QString m_start_script_path;
 };
 }
diff --git a/src/plugins/score-plugin-js/JS/Executor/GPUNode.cpp b/src/plugins/score-plugin-js/JS/Executor/GPUNode.cpp
index f60a4d4709..c24132bd84 100644
--- a/src/plugins/score-plugin-js/JS/Executor/GPUNode.cpp
+++ b/src/plugins/score-plugin-js/JS/Executor/GPUNode.cpp
@@ -32,8 +32,10 @@
 #include <private/qquickrendercontrol_p.h>
 #include <private/qquickwindow_p.h>
 #include <private/qsgcontext_p.h>
+#include <private/qsgdefaultrendercontext_p.h>
 
 #include <compare>
+#include <set>
 namespace JS
 {
 struct engine_key
@@ -86,6 +88,14 @@ struct GpuNode : score::gfx::NodeModel
     JS::Script* m_object{};
     QPointer<QQuickItem> m_item{};
 
+    // Qt Quick runtime. Created in GpuRenderer::initState(), destroyed
+    // when the Engine itself is destroyed (GpuRenderer::release() drops
+    // the map entry and the renderer's own shared_ptr, bringing refcount
+    // to zero). Destruction runs while the owning QRhi is still alive —
+    // see the note in GpuRenderer::release() for why this matters.
+    QQuickRenderControl* m_quickRenderControl{};
+    QQuickWindow* m_quickWindow{};
+
     std::vector<Inlet*> m_jsInlets;
     std::vector<std::pair<ControlInlet*, int>> m_ctrlInlets;
     std::vector<std::pair<Impulse*, int>> m_impulseInlets;
@@ -94,13 +104,17 @@ struct GpuNode : score::gfx::NodeModel
 
     ossia::spsc_queue<js_message_type> ui_messages;
 
-    void init(GpuRenderer& renderer, GpuNode& node, QQuickWindow* window);
+    void init(
+        GpuRenderer& renderer, GpuNode& node, QQuickWindow* window,
+        score::gfx::RenderList& rl);
 
-    void createItem(GpuRenderer& renderer, GpuNode& node);
+    void createItem(
+        GpuRenderer& renderer, GpuNode& node, score::gfx::RenderList& rl);
 
     void updateItemTextureOut(QQuickWindow* window);
 
-    void setupComponent(GpuRenderer& renderer, GpuNode& node);
+    void setupComponent(
+        GpuRenderer& renderer, GpuNode& node, score::gfx::RenderList& rl);
 
     void releaseItem();
 
@@ -146,20 +160,21 @@ struct GpuNode : score::gfx::NodeModel
   std::pair<const engine_key, std::shared_ptr<Engine>> acquireEngine(QRhi* rhi)
   {
     const auto key = engine_key{std::this_thread::get_id(), rhi};
-    // FIXME find if there's a more atomic way to implement this with insert_or_visit,
-    // without calling init() inside the map's lock.
     std::shared_ptr<Engine> res;
-    m_engines.visit(key, [&](const auto& engine) { res = engine.second; });
-
-    if(!res)
-    {
-      res = std::make_shared<Engine>();
-      m_engines.insert({key, res});
-    }
+    m_engines.try_emplace_and_visit(
+        key,
+        std::make_shared<Engine>(),
+        [&](auto& slot) { res = slot.second; },   // newly-inserted visitor
+        [&](auto& slot) { res = slot.second; });  // existing-key visitor
     return {key, res};
   }
 
-  void releaseEngine(QRhi* rhi) { m_engines.erase({std::this_thread::get_id(), rhi}); }
+  // Release by the key stored at acquire time, NOT by the current thread id.
+  // If releaseState() ever runs on a different thread than initState()'s
+  // insert (e.g. under SCORE_THREADED_GFX), erasing by the current-thread
+  // key would leave the stale Engine (with m_quickWindow set) mapped, and
+  // the next acquire would return it and trip the SCORE_ASSERT in initState().
+  void releaseEngine(const engine_key& key) { m_engines.erase(key); }
 
   boost::concurrent_flat_map<engine_key, std::shared_ptr<Engine>, engine_key_hash>
       m_engines;
@@ -243,19 +258,53 @@ void main ()
 
   std::vector<score::gfx::Sampler> m_inputSamplers;
 
-  void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override
+  // All setup lives in initState() rather than init(), because the
+  // incremental graph-edit path (Graph::incrementalEdgeUpdate) calls
+  // initState() directly on newly-spawned renderers without ever going
+  // through init(). If we put setup in init(), a play/stop/play cycle
+  // leaves the new GpuRenderer with empty shaders, no window, no engine,
+  // and the next update() crashes in defaultUBOUpdate. Mirror
+  // RenderedISFNode's split: initState() does all shared state;
+  // the inherited GenericNodeRenderer::init() calls initState() then
+  // addOutputPass() per output edge.
+  // Ignore the base GenericNodeRenderer::updateInputTexture behavior:
+  // GpuRenderer's m_samplers is a private, single-entry vector holding the
+  // internal "y_tex" sampler that points at m_internalTex (the texture Qt
+  // Quick renders into, which our fragment shader samples). Its 8 visible
+  // texture-inlet ports are routed through m_engine->m_texInlets and the
+  // per-frame res.copyTexture in update() — they are NOT meant to drive
+  // m_samplers. The base implementation indexes m_samplers by image-input
+  // position, so a sink-sampler update for input[0] (Image 1) writes
+  // m_samplers[0].texture = image1_rt_texture and rebinds the SRB's y_tex
+  // sampler away from m_internalTex, which makes the presentation render
+  // Image 1's content directly instead of the Qt Quick tree. This fires
+  // whenever Graph::updateAllSinkSamplers runs after initial pass
+  // construction — i.e. on every live graph edit — which is the
+  // "presentation reverts to Image 1" regression.
+  //
+  // Leaving it as a no-op is correct: sink-sampler updates targeting inlet
+  // items are already handled by GpuRenderer::update's per-frame
+  // copyTexture path (GPUNode.cpp:~470), which reads rt.texture fresh
+  // every frame.
+  void updateInputTexture(
+      const score::gfx::Port& input, QRhiTexture* tex,
+      QRhiTexture* depthTex = nullptr) override
   {
-    auto& rhi = *renderer.state.rhi;
+  }
 
+  void initState(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override
+  {
+    auto& rhi = *renderer.state.rhi;
     // Init the texture on which we are going to render
     // FIXME RGBA32F
     m_internalTex = score::gfx::createRenderTarget(
         renderer.state, QRhiTexture::RGBA8, renderer.state.renderSize,
         renderer.state.samples, true);
 
-    // Init basic rendering ubos
-    const auto& mesh = renderer.defaultQuad();
-    defaultMeshInit(renderer, mesh, res);
+    // Use the quad mesh (GenericNodeRenderer::initState would default to
+    // triangle). The inherited addOutputPass uses m_mesh to build pipelines.
+    m_mesh = &renderer.defaultQuad();
+    defaultMeshInit(renderer, *m_mesh, res);
     processUBOInit(renderer);
     std::tie(m_vertexS, m_fragmentS)
         = score::gfx::makeShaders(renderer.state, vertex_shader, fragment_shader);
@@ -275,82 +324,97 @@ void main ()
       m_samplers.push_back({sampler, m_internalTex.texture});
     }
 
-    defaultPassesInit(renderer, mesh);
+    // Acquire the Engine. release() drops the map entry and our own
+    // ref, so we always get a fresh Engine here — tying the Qt Quick
+    // runtime lifetime strictly to (initState, release) lets us free
+    // all QRhi-owned buffers before the RHI itself is destroyed in
+    // Graph::~Graph.
+    auto [key, engine] = node.acquireEngine(&rhi);
+    m_engineKey = key;
+    m_engine = engine;
+    if(!m_engine)
+    {
+      m_initialized = true;
+      return;
+    }
 
-    // Init the QQuick render stuff
-    m_renderControl = new QQuickRenderControl{};
-    m_window = new QQuickWindow{m_renderControl};
+    SCORE_ASSERT(!m_engine->m_quickWindow);
+    m_engine->m_quickRenderControl = new QQuickRenderControl{};
+    m_engine->m_quickWindow = new QQuickWindow{m_engine->m_quickRenderControl};
 
 #if QT_HAS_VULKAN
     if(renderer.state.api == score::gfx::GraphicsApi::Vulkan)
-    {
-      m_window->setVulkanInstance(score::gfx::staticVulkanInstance());
-    }
+      m_engine->m_quickWindow->setVulkanInstance(
+          score::gfx::staticVulkanInstance());
 #endif
 
     if(auto win = renderer.state.window.lock())
     {
       QObject::connect(
-          win.get(), &score::gfx::Window::interactiveEvent, m_window,
-          [qqw = QPointer{m_window}](QEvent* e) {
+          win.get(), &score::gfx::Window::interactiveEvent,
+          m_engine->m_quickWindow,
+          [qqw = QPointer{m_engine->m_quickWindow}](QEvent* e) {
         if(auto q = qqw.get())
           QCoreApplication::sendEvent(q, e);
       }, Qt::DirectConnection);
     }
-    m_window->setGraphicsDevice(QQuickGraphicsDevice::fromRhi(&rhi));
-
+    m_engine->m_quickWindow->setGraphicsDevice(
+        QQuickGraphicsDevice::fromRhi(&rhi));
+    m_engine->m_quickWindow->setColor(Qt::transparent);
+    m_engine->m_quickRenderControl->initialize();
+    // Mark the window as "visible" so QQuickItem::grabToImage() works.
+    // The window is driven by QQuickRenderControl (no native OS
+    // window) — this only sets the internal flag.
+    QQuickWindowPrivate::get(m_engine->m_quickWindow)->visible = true;
+
+    m_window = m_engine->m_quickWindow;
+    m_renderControl = m_engine->m_quickRenderControl;
+
+    // Size and render target are per-RenderList and must be refreshed
+    // on every initState() (resize changes the RT dimensions).
     const auto sz = renderer.state.renderSize;
     m_window->setWidth(sz.width());
     m_window->setHeight(sz.height());
     m_window->contentItem()->setWidth(sz.width());
-    m_window->contentItem()->setWidth(sz.height());
-    m_window->setColor(Qt::transparent);
-
-    m_renderControl->initialize();
+    m_window->contentItem()->setHeight(sz.height());
     m_window->setRenderTarget(
         QQuickRenderTarget::fromRhiRenderTarget(m_internalTex.renderTarget));
 
-    // Mark the window as "visible" so that QQuickItem::grabToImage() works.
-    // The window is managed by QQuickRenderControl (no native OS window),
-    // so this only sets the internal flag without creating a real window.
-    QQuickWindowPrivate::get(m_window)->visible = true;
+    m_engine->init(*this, node, m_window, renderer);
+    // Tolerant of script/port mismatches (live-edited QML may not line up
+    // with the node's declared ports): skip bad inlets instead of aborting.
+    // Mirrors Engine::setupComponent's guards.
+    for(auto& [texture_in, i] : this->m_engine->m_texInlets)
+    {
+      if(i >= (int)this->node.input.size())
+        continue;
+      score::gfx::Port* port = this->node.input[i];
+      if(!port || port->type != score::gfx::Types::Image)
+        continue;
+      auto rt = renderer.renderTargetForInputPort(*port);
+      auto item = qobject_cast<JS::TextureInletItem*>(texture_in->item());
+      if(item && rt.texture)
+        item->setSize(rt.texture->pixelSize());
+    }
+    sourceIndex.store(node.sourceIndex.load());
+    m_initialized = true;
   }
 
   void reloadEngine(score::gfx::RenderList& renderer)
   {
-    auto* rhi = renderer.state.rhi;
-    auto oldSourceIndex = this->sourceIndex.exchange(this->node.sourceIndex);
-    //= std::exchange(this->sourceIndex, this->node.sourceIndex.load());
-    // yes technically there is the overflow case but it's 2^64 editions away...
-    if(oldSourceIndex < this->node.sourceIndex)
-    {
-      if(m_engine)
-      {
-        m_engine->releaseItem();
-      }
-
-      node.releaseEngine(rhi);
-      m_engine.reset();
-      auto [key, engine] = node.acquireEngine(rhi);
-      m_tid = key.id;
-      m_engine = engine;
-      if(m_engine)
-      {
-        m_engine->init(*this, node, m_window);
+    // Guard: initState() bails out early if Engine acquisition failed,
+    // leaving m_window/m_renderControl/m_engine null. update() can still
+    // be invoked in that degraded state — short-circuit here.
+    if(!m_window || !m_renderControl || !m_engine)
+      return;
 
-        for(auto& [texture_in, i] : this->m_engine->m_texInlets)
-        {
-          SCORE_ASSERT(this->node.input.size() > i);
-          score::gfx::Port* port = this->node.input[i];
-          SCORE_ASSERT(port->type == score::gfx::Types::Image);
-          auto rt = renderer.renderTargetForInputPort(*port);
-          auto item = qobject_cast<JS::TextureInletItem*>(texture_in->item());
-          SCORE_ASSERT(item);
-          if(rt.texture)
-            item->setSize(rt.texture->pixelSize());
-        }
-      }
-    }
+    // NOTE: GpuNode::sourceIndex is fixed at 1 and never incremented (the
+    // incrementer that drove the in-place script reload was removed), so the
+    // GpuRenderer::sourceIndex seeded in initState() always equals it. The
+    // mid-play "drop the QML tree, keep the QQuickWindow, re-init" reload
+    // branch that used to live here was therefore dead code and has been
+    // removed. A live script change currently goes through a full
+    // releaseState()/initState() cycle instead.
   }
 
   void update(
@@ -360,30 +424,64 @@ void main ()
     reloadEngine(renderer);
     defaultUBOUpdate(renderer, res);
 
-    // Schedule a copy of the input textures into the actual textures
+    if(!m_engine)
+      return;
+
+    // Schedule a copy of the input textures into the actual textures.
+    // Tolerant of script/port mismatches (live-edited QML): skip bad inlets
+    // instead of asserting. Mirrors Engine::setupComponent's guards.
     {
       for(auto& [texture_in, i] : this->m_engine->m_texInlets)
       {
-        SCORE_ASSERT(this->node.input.size() > i);
+        if(i >= (int)this->node.input.size())
+          continue;
         score::gfx::Port* port = this->node.input[i];
-        SCORE_ASSERT(port->type == score::gfx::Types::Image);
+        if(!port || port->type != score::gfx::Types::Image)
+          continue;
         auto rt = renderer.renderTargetForInputPort(*port);
         auto item = qobject_cast<JS::TextureInletItem*>(texture_in->item());
-        SCORE_ASSERT(item);
+        if(!item)
+          continue;
         auto itemRenderer = item->renderer;
         auto texture = item->texture;
         if(itemRenderer && texture && rt.texture)
         {
-          if(rt.texture->pixelSize() == texture->pixelSize()
-             && rt.texture->sampleCount() == texture->sampleCount())
+          const bool sameSize = rt.texture->pixelSize() == texture->pixelSize();
+          const bool sameSamples
+              = rt.texture->sampleCount() == texture->sampleCount();
+          if(sameSize && sameSamples)
           {
             QRhiTextureCopyDescription desc;
             res.copyTexture(texture, rt.texture, desc);
           }
+          else if(!sameSize)
+          {
+            // The upstream RT changed dimensions since the last initState().
+            // Resize the inlet item so Qt Quick rebuilds its QSGRhiLayer at
+            // the new size; this frame's copy is intentionally skipped
+            // (src/dst pair is mismatched) and the next update() will copy
+            // correctly once the layer texture is recreated.
+            item->setSize(rt.texture->pixelSize());
+          }
           else
           {
-            qDebug() << "Mismatch!!!" << rt.texture->pixelSize() << texture->pixelSize()
-                     << rt.texture->sampleCount() << texture->sampleCount();
+            // Size matches but sample count differs (e.g. the inlet item's
+            // QSGRhiLayer is single-sampled while the upstream RT is MSAA).
+            // QRhi::copyTexture requires matching sample counts, so the copy
+            // can't run and setSize() is a no-op here — without a diagnostic
+            // the inlet would stay silently black. We can't resolve/recreate
+            // the layer at a different sample count from outside Qt Quick, so
+            // the defined fallback is: skip the copy (the inlet keeps its
+            // last content rather than showing undefined data) and warn once
+            // per item so the condition is observable.
+            if(m_warnedSampleMismatch.insert(item).second)
+            {
+              qWarning() << "JS::GPUNode: texture inlet" << i
+                         << "sample-count mismatch (upstream"
+                         << rt.texture->sampleCount() << "vs inlet"
+                         << texture->sampleCount()
+                         << ") - copy skipped, inlet may appear stale/black";
+            }
           }
         }
       }
@@ -406,6 +504,8 @@ void main ()
       score::gfx::RenderList& renderer, QRhiCommandBuffer& cb,
       QRhiResourceUpdateBatch*& res, score::gfx::Edge& e) override
   {
+    if(!m_window || !m_renderControl || !m_engine)
+      return;
     // Here we run the Qt Quick render loop which handles its own pass
     if(auto sz = m_window->size(); sz != m_window->contentItem()->size())
     {
@@ -429,7 +529,6 @@ void main ()
           item->update();
       }
     }
-
     // 2. Render
     m_window->beforeRendering();
 
@@ -439,7 +538,6 @@ void main ()
 
     cd->deliveryAgentPrivate()->flushFrameSynchronousEvents(m_window);
     cd->polishItems();
-
     m_window->afterRendering();
     m_window->afterAnimating();
 
@@ -454,20 +552,55 @@ void main ()
 
     cd->syncSceneGraph();
     rc->rc->endSync();
-
     // render:
     cd->renderSceneGraph();
-
     // endFrame:
     m_window->afterFrameEnd();
 
+    // Disassociate our transient cb — Qt's own qsgrhisupport pairs
+    // setCustomCommandBuffer(cb) with setCustomCommandBuffer(nullptr)
+    // to avoid leaving a dangling pointer past the frame.
+    cd->setCustomCommandBuffer(nullptr);
+    // Symmetric reset of QQuickRenderControlPrivate::cb. The earlier
+    // assignment at `rc->cb = &cb` (line ~523) bound the private field
+    // to a stack reference parameter; without this nullptr reset the
+    // pointer dangled into reclaimed stack memory after the frame
+    // returned. Whether Qt internals dereferenced it between frames
+    // depended on the QQuickRenderControlPrivate event-loop paths
+    // (animation tick / glyph upload completion / sync without render),
+    // but the fix is one line either way and removes the foot-gun.
+    rc->cb = nullptr;
+
+    // Force-drain Qt Quick's glyph-cache resource-update batch. The batch
+    // is lazily allocated in preprocess() (storeGlyphs → createTexture →
+    // glyphCacheResourceUpdates) and is normally released when a glyph
+    // node renders and calls commitResourceUpdates. When the QML scene
+    // has no glyph node, preprocess still populates the cache but no
+    // draw ever commits → the batch stays pinned, permanently consuming
+    // one slot of the 64-slot QRhi pool *per render context*. Each
+    // window resize spawns a fresh QQuickRenderControl + render context,
+    // so after a handful of resizes the pool exhausts and SIGSEGV lands
+    // inside QSGRhiDistanceFieldGlyphCache::createTexture. Merge any
+    // pending uploads into our outer batch so they still land, then
+    // reset the context's pointer so the pool slot returns.
+    if(auto* rcp = QQuickRenderControlPrivate::get(m_renderControl))
+    {
+      if(auto* defRc = qobject_cast<QSGDefaultRenderContext*>(rcp->rc))
+      {
+        if(auto* pending = defRc->maybeGlyphCacheResourceUpdates())
+        {
+          if(res)
+            res->merge(pending);
+          defRc->resetGlyphCacheResources();
+        }
+      }
+    }
     if(m_engine && m_engine->m_engine)
     {
       m_engine->m_engine->collectGarbage();
     }
-
-    QEvent* updateRequest = new QEvent(QEvent::UpdateRequest);
-    QCoreApplication::postEvent(m_window, updateRequest);
+    // No UpdateRequest post needed: runInitialPasses drives sync/render
+    // directly via polishItems/syncSceneGraph/renderSceneGraph each frame.
   }
 
   void runRenderPass(
@@ -476,16 +609,12 @@ void main ()
   {
     const auto& mesh = renderer.defaultQuad();
     defaultRenderPass(renderer, mesh, cb, edge);
-    m_window->frameSwapped();
+    if(m_window)
+      m_window->frameSwapped();
   }
 
-  void release(score::gfx::RenderList& r) override
+  void releaseState(score::gfx::RenderList& r) override
   {
-    if(m_engine)
-    {
-      m_engine->releaseItem();
-    }
-
     for(auto sampler : m_inputSamplers)
     {
       delete sampler.sampler;
@@ -493,16 +622,48 @@ void main ()
     }
     m_inputSamplers.clear();
 
-    if(m_window)
-    {
-      m_window->deleteLater();
-      m_window = nullptr;
-    }
-
-    if(m_renderControl)
+    // Tear down the Engine here — this is the last hook we get while
+    // the QRhi is still alive. Graph::~Graph calls RenderList::release()
+    // before out->destroyOutput() (which calls RenderState::destroy(),
+    // killing the RHI); the GpuRenderer destructor runs later, after
+    // the RHI is gone, so any QRhi-owned buffers still held by the
+    // QQuickRenderControl/QQuickWindow would leak (VUID-vkDestroyDevice
+    // validation fires at process exit).
+    //
+    // An earlier version kept the Engine alive across release+init to
+    // avoid re-creating the Qt Quick scene graph on every window
+    // resize, because each cycle pinned ~1 batch slot in Qt Quick's
+    // response to setRenderTarget. That workaround is no longer needed:
+    // the real batch-pool exhaustion was SimpleRenderedISFNode::initPass
+    // leaking an unsubmitted batch per addOutputPass (fixed separately),
+    // and Qt Quick's per-cycle slot churn alone doesn't exhaust the
+    // 64-slot pool in practice.
+    //
+    // Living in releaseState() (not release()) is what lets live graph
+    // edits that make this node unreachable actually free the Engine:
+    // Graph::reconcileAllRenderLists calls releaseState() on orphaned
+    // renderers, never release(). A previous version had the teardown
+    // in release(), which meant node.releaseEngine() never ran on a
+    // live disconnect — the next reconnection's acquireEngine returned
+    // the stale entry with m_quickWindow already set and tripped the
+    // SCORE_ASSERT in initState().
+    //
+    // USER-VISIBLE BEHAVIOR (known tradeoff): destroying the Engine here
+    // discards the entire QML runtime — the QQmlEngine, the Script object
+    // and ALL its script-side runtime state (JS variables, timers,
+    // accumulated/animation state, etc.). Because releaseState()/initState()
+    // run on every output resize (the render-target dimensions change), a
+    // mid-performance window/output resize silently restarts the user's
+    // script from scratch. Only the declared model state (node.m_modelState,
+    // replayed via Script.loadState() in Engine::setupComponent) survives;
+    // anything the script kept in plain JS variables is lost. This is
+    // accepted for the deterministic-teardown lifetime guarantees above.
+    m_window = nullptr;
+    m_renderControl = nullptr;
+    if(m_engine)
     {
-      m_renderControl->deleteLater();
-      m_renderControl = nullptr;
+      m_engine.reset();
+      node.releaseEngine(m_engineKey);
     }
 
     m_internalTex.release();
@@ -510,15 +671,23 @@ void main ()
     defaultRelease(r);
   }
 
+  void release(score::gfx::RenderList& r) override { releaseState(r); }
+
   score::gfx::TextureRenderTarget m_internalTex;
 
   QQuickRenderControl* m_renderControl{};
   QQuickWindow* m_window{};
 
   ossia::spsc_queue<score::gfx::Message> m_messages;
-  std::thread::id m_tid;
+  // Key under which our Engine was inserted in node.m_engines at acquire
+  // time. We release by this stored key (see GpuNode::releaseEngine).
+  JS::engine_key m_engineKey{};
   std::shared_ptr<GpuNode::Engine> m_engine;
 
+  // Texture inlet items for which a sample-count mismatch has already been
+  // reported, to rate-limit the warning to once per item (see update()).
+  std::set<const void*> m_warnedSampleMismatch;
+
   friend struct GpuNode;
 };
 
@@ -576,8 +745,9 @@ GpuNode::GpuNode(
     }
   }
 }
-GpuNode::~GpuNode() { }
-
+GpuNode::~GpuNode()
+{
+}
 
 void GpuNode::Engine::tick()
 {
@@ -653,21 +823,52 @@ GpuNode::Engine::~Engine()
   m_context = nullptr;
 
   m_engine = nullptr; // Not owned here!
+
+  // Destroy the persistent Qt Quick runtime synchronously. Order matches
+  // Qt's own QQuickWidget: QQuickRenderControl first (its destructor
+  // calls invalidate() and deletes the QSGRenderContext), then the
+  // QQuickWindow.
+  delete m_quickRenderControl;
+  m_quickRenderControl = nullptr;
+  delete m_quickWindow;
+  m_quickWindow = nullptr;
 }
 
 void GpuNode::Engine::releaseItem()
 {
-  qDebug(Q_FUNC_INFO);
   if(m_item)
   {
+    // LOAD-BEARING: these two detach calls must precede deleteLater().
+    // The immediate caller (GpuRenderer::reloadEngine, GPUNode.cpp:419-420)
+    // follows this with init(), whose QML reactive bindings and child-walkers
+    // must not observe the dying item. setParentItem(nullptr) removes it from
+    // contentItem->childItems() synchronously; setParent(nullptr) severs the
+    // QObject ownership chain. deleteLater() then safely defers actual
+    // destruction to the next event loop tick. Collapsing the two detach
+    // calls into deleteLater() alone would briefly expose two items under
+    // contentItem to the new createItem(), breaking the scene graph.
     m_item->setParent(nullptr);
     m_item->setParentItem(nullptr);
     m_item->deleteLater();
     m_item = nullptr;
   }
+  // A script reload destroys the whole QML tree. Clear the script-
+  // associated state here so Engine::init()'s `if(!m_item)` rebuild
+  // path can recreate everything cleanly without leaking the old
+  // component/object or appending to the inlet vectors.
+  delete m_object;
+  m_object = nullptr;
+  delete m_component;
+  m_component = nullptr;
+  m_jsInlets.clear();
+  m_ctrlInlets.clear();
+  m_impulseInlets.clear();
+  m_valInlets.clear();
+  m_texInlets.clear();
 }
 
-void GpuNode::Engine::setupComponent(GpuRenderer& renderer, GpuNode& node)
+void GpuNode::Engine::setupComponent(
+    GpuRenderer& renderer, GpuNode& node, score::gfx::RenderList& rl)
 {
   // FIXME refactor with CPUNode
   // FIXME only works because same thread right now.
@@ -685,18 +886,13 @@ void GpuNode::Engine::setupComponent(GpuRenderer& renderer, GpuNode& node)
     }, Qt::QueuedConnection);
   }, Qt::DirectConnection);
 
-  if(const auto& on_load = m_object->loadState(); on_load.isCallable())
-  {
-    QVariantMap vm;
-    for(auto& [k, v]: node.m_modelState) {
-      if(auto res = v.apply(ossia::qt::ossia_to_qvariant{}); res.isValid())
-        vm[k] = std::move(res);
-    }
-    on_load.call({m_engine->toScriptValue(vm)});
-  }
-
+  // (1) Enumerate QML children into the typed inlet vectors FIRST. loadState()
+  //     below fires reactive bindings like `ShaderEffectSource.sourceItem =
+  //     root.inletItems[src]`; those need each inlet item to already be at its
+  //     final pixel size so QQuickShaderEffectSource::updatePaintNode
+  //     (qquickshadereffectsource.cpp:657-664) does not take the "source item
+  //     is 0x0, delete paint node, return nullptr" branch on the first sync.
   int input_i = 0;
-
   for(auto n : m_object->children())
   {
     if(auto imp_in = qobject_cast<Impulse*>(n))
@@ -725,6 +921,44 @@ void GpuNode::Engine::setupComponent(GpuRenderer& renderer, GpuNode& node)
       input_i++;
     }
   }
+
+  // (2) Size each texture-inlet item to its upstream RT's pixel size BEFORE
+  //     loadState runs. QML's Component.onCompleted has already rebound each
+  //     inlet item's width/height to inletContainer.width/.height via
+  //     Qt.binding (presentation.qml:50-53), and inletContainer is 0x0 at
+  //     this point because outputRoot hasn't been reparented to contentItem
+  //     yet (updateItemTextureOut runs after this). Setting the size
+  //     explicitly breaks that binding and pins each item to the RT pixel
+  //     size — which is exactly what the copyTexture(rt.texture ->
+  //     item->texture) in GpuRenderer::update requires anyway (that copy is
+  //     skipped on any pixelSize mismatch — GPUNode.cpp:456-466).
+  for(auto& [texture_in, i] : m_texInlets)
+  {
+    if(i >= (int)node.input.size())
+      continue;
+    score::gfx::Port* port = node.input[i];
+    if(!port || port->type != score::gfx::Types::Image)
+      continue;
+    auto rt = rl.renderTargetForInputPort(*port);
+    auto* item = qobject_cast<JS::TextureInletItem*>(texture_in->item());
+    if(item && rt.texture)
+      item->setSize(rt.texture->pixelSize());
+  }
+
+  // (3) Now run loadState. Every ShaderEffectSource that resolves its
+  //     sourceItem to an inletItem during the stateVersion++ re-binding pass
+  //     will see a non-zero-sized source item and the first scene-graph sync
+  //     will create its QSGRhiLayer (qsgrhilayer.cpp:248-254 "!m_item ||
+  //     m_pixelSize.isEmpty()" branch is avoided).
+  if(const auto& on_load = m_object->loadState(); on_load.isCallable())
+  {
+    QVariantMap vm;
+    for(auto& [k, v]: node.m_modelState) {
+      if(auto res = v.apply(ossia::qt::ossia_to_qvariant{}); res.isValid())
+        vm[k] = std::move(res);
+    }
+    on_load.call({m_engine->toScriptValue(vm)});
+  }
 }
 
 void GpuNode::Engine::updateItemTextureOut(QQuickWindow* window)
@@ -744,14 +978,15 @@ void GpuNode::Engine::updateItemTextureOut(QQuickWindow* window)
   }
 }
 
-void GpuNode::Engine::createItem(GpuRenderer& renderer, GpuNode& node)
+void GpuNode::Engine::createItem(
+    GpuRenderer& renderer, GpuNode& node, score::gfx::RenderList& rl)
 {
   m_component = new QQmlComponent{this->m_engine.get()};
 
   m_component->setData(node.source.toUtf8(), QUrl::fromLocalFile(node.m_root));
   if(m_component->isError())
   {
-    qDebug() << m_component->errorString();
+    qWarning() << m_component->errorString();
     return;
   }
 
@@ -763,10 +998,12 @@ void GpuNode::Engine::createItem(GpuRenderer& renderer, GpuNode& node)
     return;
   }
 
-  setupComponent(renderer, node);
+  setupComponent(renderer, node, rl);
 }
 
-void GpuNode::Engine::init(GpuRenderer& renderer, GpuNode& node, QQuickWindow* window)
+void GpuNode::Engine::init(
+    GpuRenderer& renderer, GpuNode& node, QQuickWindow* window,
+    score::gfx::RenderList& rl)
 {
   if(!m_item)
   {
@@ -784,13 +1021,13 @@ void GpuNode::Engine::init(GpuRenderer& renderer, GpuNode& node, QQuickWindow* w
     if(!m_context)
     {
       m_context = new QQmlContext{m_engine.get()};
-      m_execFuncs = new DeviceContext{*m_engine};
+      m_execFuncs = new DeviceContext{*m_engine, m_context};
       m_execFuncs->init();
 
       m_context->setContextProperty("Device", m_execFuncs);
       setupExecFuncs(this, &node, m_execFuncs->m_impl);
     }
-    createItem(renderer, node);
+    createItem(renderer, node, rl);
   }
 
   updateItemTextureOut(window);
@@ -854,62 +1091,45 @@ void gpu_exec_node::setScript(
   exec_context->ui->unregister_node(id);
   id = score::gfx::invalid_node_index;
 
-  //if(id < 0)
+  auto n = std::make_unique<JS::GpuNode>(
+      m_context, std::move(new_state), root, str, this->root_inputs(),
+      this->root_outputs());
+
   {
-    auto n = std::make_unique<JS::GpuNode>(
-        m_context, std::move(new_state), root, str, this->root_inputs(),
-        this->root_outputs());
+    auto& element = *m_context;
 
+    n->moveToThread(m_context->thread());
+    n->m_uiContext = m_context;
+    n->m_messageToUi = [ctx=m_context] (const QVariant& v){
+      OSSIA_ENSURE_CURRENT_THREAD_KIND(ossia::thread_type::Ui);
+      if(!ctx)
+        return;
+      ctx->executionToUi(v);
+    };
+
+    QObject::connect(
+        &element, &JS::ProcessModel::uiToExecution, n.get(), &JS::GpuNode::uiMessage);
+    QObject::connect(
+        &element, &JS::ProcessModel::stateElementChanged, n.get(),
+        &JS::GpuNode::stateElementChanged);
     {
-      auto& element = *m_context;
-
-      n->moveToThread(m_context->thread());
-      n->m_uiContext = m_context;
-      n->m_messageToUi = [ctx=m_context] (const QVariant& v){
-        OSSIA_ENSURE_CURRENT_THREAD_KIND(ossia::thread_type::Ui);
-        if(!ctx)
-          return;
-        ctx->executionToUi(v);
-      };
 
-      QObject::connect(
-          &element, &JS::ProcessModel::uiToExecution, n.get(), &JS::GpuNode::uiMessage);
-      QObject::connect(
-          &element, &JS::ProcessModel::stateElementChanged, n.get(),
-          &JS::GpuNode::stateElementChanged);
+      int i = 0;
+      for(auto& ctl : element.inlets())
       {
-
-        int i = 0;
-        for(auto& ctl : element.inlets())
+        if(auto ctrl = qobject_cast<Gfx::TextureInlet*>(ctl))
         {
-          if(auto ctrl = qobject_cast<Gfx::TextureInlet*>(ctl))
-          {
-            ossia::texture_inlet& inl
-                = static_cast<ossia::texture_inlet&>(*root_inputs()[i]);
-            n->process(i, inl.data); // Setup render_target_spec
-            // FIXME this should be done at a more general level, right now it's only done here
-            // and in avendish nodes
-          }
-          i++;
+          ossia::texture_inlet& inl
+              = static_cast<ossia::texture_inlet&>(*root_inputs()[i]);
+          n->process(i, inl.data); // Setup render_target_spec
+          // FIXME this should be done at a more general level, right now it's only done here
+          // and in avendish nodes
         }
+        i++;
       }
     }
-    id = exec_context->ui->register_node(std::move(n));
-  }
-  /*
-  else
-  {
-    // FIXME need to update the ports if they changed on the host side!
-    auto msg = exec_context->allocateMessage(1);
-    msg.node_id = id;
-    msg.input.emplace_back(score::gfx::FunctionMessage{[str](score::gfx::Node& nn) {
-      auto& n = static_cast<GpuNode&>(nn);
-      n.source = str; // FIXME mutex
-      n.sourceIndex++;
-    }});
-    exec_context->ui->send_message(std::move(msg));
   }
-*/
+  id = exec_context->ui->register_node(std::move(n));
 }
 }
 #endif
diff --git a/src/plugins/score-plugin-js/JS/Qml/EditContext.hpp b/src/plugins/score-plugin-js/JS/Qml/EditContext.hpp
index b274479a26..99f2820a47 100644
--- a/src/plugins/score-plugin-js/JS/Qml/EditContext.hpp
+++ b/src/plugins/score-plugin-js/JS/Qml/EditContext.hpp
@@ -1,4 +1,5 @@
 #pragma once
+#include <Process/Dataflow/CableData.hpp>
 #include <Process/TimeValue.hpp>
 
 #include <score/document/DocumentContext.hpp>
@@ -157,7 +158,9 @@ class SCORE_PLUGIN_JS_EXPORT EditJsContext : public QObject
   W_SLOT(outlets)
 
   QObject* createCable(QObject* outlet, QObject* inlet);
-  W_SLOT(createCable)
+  W_SLOT(createCable, (QObject*, QObject*))
+  QObject* createCable(QObject* outlet, QObject* inlet, Process::CableType type);
+  W_SLOT(createCable, (QObject*, QObject*, Process::CableType))
 
   void setAddress(QObject* obj, QString addr);
   W_SLOT(setAddress)
diff --git a/src/plugins/score-plugin-js/JS/Qml/EditContext.port.cpp b/src/plugins/score-plugin-js/JS/Qml/EditContext.port.cpp
index b5846702f7..bb1549e8cf 100644
--- a/src/plugins/score-plugin-js/JS/Qml/EditContext.port.cpp
+++ b/src/plugins/score-plugin-js/JS/Qml/EditContext.port.cpp
@@ -103,6 +103,12 @@ int EditJsContext::outlets(QObject* obj)
 }
 
 QObject* EditJsContext::createCable(QObject* outlet, QObject* inlet)
+{
+  return createCable(outlet, inlet, Process::CableType::ImmediateGlutton);
+}
+
+QObject*
+EditJsContext::createCable(QObject* outlet, QObject* inlet, Process::CableType tp)
 {
   auto doc = ctx();
   if(!doc)
@@ -118,7 +124,7 @@ QObject* EditJsContext::createCable(QObject* outlet, QObject* inlet)
 
   auto& root = score::IDocument::get<Scenario::ScenarioDocumentModel>(doc->document);
   auto [m, _] = macro(*doc);
-  auto& c = m->createCable(root, *src, *sink, Process::CableType::ImmediateGlutton);
+  auto& c = m->createCable(root, *src, *sink, tp);
   return &c;
 }
 
diff --git a/src/plugins/score-plugin-threedim/CMakeLists.txt b/src/plugins/score-plugin-threedim/CMakeLists.txt
index cf29d7946f..16ffced3ac 100644
--- a/src/plugins/score-plugin-threedim/CMakeLists.txt
+++ b/src/plugins/score-plugin-threedim/CMakeLists.txt
@@ -14,6 +14,24 @@ endif()
 
 find_package(${QT_VERSION} REQUIRED COMPONENTS Xml)
 
+# fastgltf — vendored glTF 2.0 parser. The library auto-downloads simdjson
+# on first configure (into 3rdparty/fastgltf/deps/simdjson/) unless a
+# system simdjson is found via find_package.
+if(NOT TARGET fastgltf)
+  set(FASTGLTF_COMPILE_AS_CPP20 ON CACHE BOOL "" FORCE)
+  add_subdirectory("${3RDPARTY_FOLDER}/fastgltf" "${CMAKE_CURRENT_BINARY_DIR}/fastgltf" EXCLUDE_FROM_ALL)
+endif()
+
+# spz — Niantic / Adobe reference SPZ decoder for compressed 3DGS files.
+# v1-3 only (v4/ZSTD stubbed; see 3rdparty/spz/CMakeLists.txt). Pulls
+# in zlib via ZLIB::ZLIB.
+if(NOT TARGET spz)
+  set(SPZ_BUILD_TOOLS OFF CACHE BOOL "" FORCE)
+  set(SPZ_BUILD_PYTHON_BINDINGS OFF CACHE BOOL "" FORCE)
+  set(SPZ_BUILD_EXTENSIONS OFF CACHE BOOL "" FORCE)
+  add_subdirectory("${3RDPARTY_FOLDER}/spz" "${CMAKE_CURRENT_BINARY_DIR}/spz" EXCLUDE_FROM_ALL)
+endif()
+
 # libssynth
 add_library(
   ssynth STATIC
@@ -77,12 +95,125 @@ add_library(
   Threedim/GeometryToBufferStrategies.cpp
   Threedim/Noise.hpp
   Threedim/Noise.cpp
-  Threedim/ObjLoader.hpp
-  Threedim/ObjLoader.cpp
+  Threedim/GeometryLoader.hpp
+  Threedim/GeometryLoader.cpp
+  Threedim/AssetLoader.hpp
+  Threedim/AssetLoader.cpp
+  Threedim/FbxParser.hpp
+  Threedim/FbxParser.cpp
+  Threedim/GltfParser.hpp
+  Threedim/GltfParser.cpp
+  Threedim/VcgImporters.hpp
+  Threedim/VcgImporters.cpp
+  Threedim/Camera.hpp
+  Threedim/Camera.cpp
+  Threedim/CameraArray.hpp
+  Threedim/CameraArray.cpp
+  Threedim/CameraSwitch.hpp
+  Threedim/Light.hpp
+  Threedim/Light.cpp
+  Threedim/Transform3D.hpp
+  Threedim/Transform3D.cpp
+  Threedim/TransformHelper.hpp
+
+  Threedim/ScenePreprocessor/Executor.hpp
+  Threedim/ScenePreprocessor/Executor.cpp
+  Threedim/ScenePreprocessor/Metadata.hpp
+  Threedim/ScenePreprocessor/Process.hpp
+  Threedim/ScenePreprocessor/Process.cpp
+
+  Threedim/SceneFilter/Executor.hpp
+  Threedim/SceneFilter/Executor.cpp
+  Threedim/SceneFilter/Metadata.hpp
+  Threedim/SceneFilter/Process.hpp
+  Threedim/SceneFilter/Process.cpp
+
+  Threedim/FlattenedSceneFilter/Executor.hpp
+  Threedim/FlattenedSceneFilter/Executor.cpp
+  Threedim/FlattenedSceneFilter/Metadata.hpp
+  Threedim/FlattenedSceneFilter/Process.hpp
+  Threedim/FlattenedSceneFilter/Process.cpp
+
+  Threedim/MergeGeometries/Executor.hpp
+  Threedim/MergeGeometries/Executor.cpp
+  Threedim/MergeGeometries/Metadata.hpp
+  Threedim/MergeGeometries/Process.hpp
+  Threedim/MergeGeometries/Process.cpp
+
+  Threedim/SceneGraphFilter.hpp
+  Threedim/SceneGraphFilter.cpp
+  Threedim/SceneSwitch.hpp
+  Threedim/SceneSelector.hpp
+  Threedim/SceneSelector.cpp
+  Threedim/SceneGroup.hpp
+  Threedim/SceneGroup.cpp
+  Threedim/SceneDuplicator.hpp
+  Threedim/SceneDuplicator.cpp
+  Threedim/SceneFromMeshes.hpp
+  Threedim/SceneFromMeshes.cpp
+  Threedim/SceneInspector.hpp
+  Threedim/SceneInspector.cpp
+  Threedim/CreateCollection.hpp
+  Threedim/CreateCollection.cpp
+  Threedim/SceneResourceRoute.hpp
+  Threedim/SceneResourceRoute.cpp
+  Threedim/InjectBuffer.hpp
+  Threedim/InjectBuffer.cpp
+  Threedim/InjectTexture.hpp
+  Threedim/InjectTexture.cpp
+  Threedim/TagAs.hpp
+  Threedim/TagAs.cpp
+  Threedim/PBRMesh.hpp
+  Threedim/PBRMesh.cpp
+  Threedim/MaterialOverride.hpp
+  Threedim/MaterialOverride.cpp
+  Threedim/ConfigurePrimitive.hpp
+  Threedim/ConfigurePrimitive.cpp
+  Threedim/Instancer.hpp
+  Threedim/Instancer.cpp
+  Threedim/ShadowCascadeSetup.hpp
+  Threedim/ShadowCascadeSetup.cpp
+  Threedim/EnvironmentLoader.hpp
+  Threedim/EnvironmentLoader.cpp
+  Threedim/AnimationPlayer.hpp
+  Threedim/AnimationPlayer.cpp
+  Threedim/HumanoidPose.hpp
+  Threedim/HumanoidPresets.hpp
+  Threedim/HumanoidRetarget.hpp
+  Threedim/HumanoidSourceAdapters.hpp
+  Threedim/HumanoidSourceMaps.hpp
+  Threedim/InverseKinematics.hpp
+  Threedim/TextToMesh.hpp
+  Threedim/TextToMesh.cpp
+  Threedim/TextToTexture.hpp
+  Threedim/ExtractBuffer2.hpp
+  Threedim/ExtractBuffer2.cpp
+  Threedim/ExtractSceneBuffer.hpp
+  Threedim/ExtractSceneBuffer.cpp
+  Threedim/ExtractTexture.hpp
+  Threedim/ExtractTexture.cpp
+  Threedim/BufferInfo.hpp
+  Threedim/TextureInfo.hpp
+  Threedim/ImageLoader.hpp
+  Threedim/ImageLoader.cpp
+  Threedim/TangentUtils.hpp
+  Threedim/BufferToGeometryCommon.hpp
+  Threedim/Debug.hpp
+  Threedim/MeshHelpers.hpp
   Threedim/PCLToGeometry.hpp
   Threedim/PCLToGeometry.cpp
   Threedim/Ply.hpp
   Threedim/Ply.cpp
+  Threedim/PrimitiveCloud/PlyParser.hpp
+  Threedim/PrimitiveCloud/PlyParser.cpp
+  Threedim/PrimitiveCloud/SplatBinary.hpp
+  Threedim/PrimitiveCloud/SplatBinary.cpp
+  Threedim/PrimitiveCloud/SpzCodec.hpp
+  Threedim/PrimitiveCloud/SpzCodec.cpp
+  Threedim/PrimitiveCloud/SceneFromCloud.hpp
+  Threedim/PrimitiveCloud/SceneFromCloud.cpp
+  Threedim/PrimitiveCloud/FormatOverride.hpp
+  Threedim/PrimitiveCloud/FormatOverride.cpp
   Threedim/Primitive.hpp
   Threedim/Primitive.cpp
   Threedim/StructureSynth.hpp
@@ -107,16 +238,9 @@ add_library(
   Threedim/RenderPipeline/Process.cpp
   Threedim/RenderPipeline/Layer.hpp
 
-  Threedim/Splat/Executor.hpp
-  Threedim/Splat/Executor.cpp
-  Threedim/Splat/Metadata.hpp
-  Threedim/Splat/Process.hpp
-  Threedim/Splat/Process.cpp
-  Threedim/Splat/GaussianSplatNode.hpp
-  Threedim/Splat/GaussianSplatNode.cpp
-
   "${3RDPARTY_FOLDER}/miniply/miniply.cpp"
   "${3RDPARTY_FOLDER}/mikktspace/mikktspace.c"
+  "${3RDPARTY_FOLDER}/ufbx/ufbx.c"
   score_plugin_threedim.hpp
   score_plugin_threedim.cpp)
 
@@ -132,8 +256,10 @@ target_include_directories(
     "${3RDPARTY_FOLDER}/vcglib"
     "${3RDPARTY_FOLDER}/miniply"
     "${3RDPARTY_FOLDER}/mikktspace"
-    "${3RDPARTY_FOLDER}/opengametools/src")
+    "${3RDPARTY_FOLDER}/opengametools/src"
+    "${3RDPARTY_FOLDER}/ufbx")
 
 target_link_libraries(
   score_plugin_threedim PRIVATE score_plugin_engine score_plugin_avnd
-                                score_plugin_gfx fmt::fmt ssynth Eigen3::Eigen)
+                                score_plugin_gfx fmt::fmt ssynth Eigen3::Eigen
+                                fastgltf::fastgltf spz)
diff --git a/src/plugins/score-plugin-threedim/Threedim/AnimationPlayer.cpp b/src/plugins/score-plugin-threedim/Threedim/AnimationPlayer.cpp
new file mode 100644
index 0000000000..d2d2cf02a0
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/AnimationPlayer.cpp
@@ -0,0 +1,442 @@
+#include "AnimationPlayer.hpp"
+
+#include <QMatrix4x4>
+#include <QQuaternion>
+
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+#include <unordered_map>
+#include <vector>
+
+namespace Threedim
+{
+
+namespace
+{
+
+// Accumulated TRS override for a single scene_node. Any missing field
+// (has_* == false) leaves the original value untouched.
+struct TRSOverride
+{
+  float translation[3]{};
+  float rotation[4]{};  // quaternion x,y,z,w
+  float scale[3]{};
+  bool has_translation{false};
+  bool has_rotation{false};
+  bool has_scale{false};
+};
+
+using TRSMap = std::unordered_map<uint64_t, TRSOverride>;
+
+// Binary-search for the segment `[times[i], times[i+1]]` that contains `t`.
+// Returns (i, alpha) with alpha ∈ [0, 1). For t at or after the last
+// keyframe, returns (n-2, 1) so the caller lands on the final value.
+struct SegmentLookup
+{
+  std::size_t lower{};
+  float alpha{};
+};
+
+SegmentLookup
+findSegment(const std::vector<float>& times, float t) noexcept
+{
+  const std::size_t n = times.size();
+  if(n == 0)
+    return {0, 0.f};
+  if(n == 1 || t <= times.front())
+    return {0, 0.f};
+  if(t >= times.back())
+    return {n - 1, 1.f}; // alpha unused in the lerp when clamped below
+
+  // std::upper_bound finds the first key > t → segment is its left neighbour.
+  auto it = std::upper_bound(times.begin(), times.end(), t);
+  const std::size_t upper = std::size_t(it - times.begin());
+  const std::size_t lower = upper - 1;
+  const float t0 = times[lower];
+  const float t1 = times[upper];
+  const float span = t1 - t0;
+  const float alpha = span > 1e-8f ? (t - t0) / span : 0.f;
+  return {lower, alpha};
+}
+
+// Lerp for scalars / vec3 / vec4 depending on `stride`. Step and linear
+// covered; cubic_spline is treated as linear for this first pass (proper
+// cubic_spline keyframes pack `in-tangent, value, out-tangent` per slot
+// at 3× stride — handling it right requires knowing the format, added
+// later).
+void sampleLinear(
+    const std::vector<float>& values, std::size_t stride, SegmentLookup s,
+    float* out) noexcept
+{
+  const std::size_t n = values.size() / stride;
+  if(n == 0)
+    return;
+  if(s.lower >= n - 1 || s.alpha <= 0.f)
+  {
+    const std::size_t idx = std::min(s.lower, n - 1);
+    std::memcpy(out, values.data() + idx * stride, stride * sizeof(float));
+    return;
+  }
+  const float* a = values.data() + s.lower * stride;
+  const float* b = values.data() + (s.lower + 1) * stride;
+  const float alpha = s.alpha;
+  for(std::size_t i = 0; i < stride; ++i)
+    out[i] = a[i] + (b[i] - a[i]) * alpha;
+}
+
+// Quaternion slerp via QQuaternion — handles shortest-arc vs. double-cover.
+void sampleSlerp(
+    const std::vector<float>& values, SegmentLookup s, float out[4]) noexcept
+{
+  const std::size_t n = values.size() / 4;
+  if(n == 0)
+    return;
+  if(s.lower >= n - 1 || s.alpha <= 0.f)
+  {
+    const std::size_t idx = std::min(s.lower, n - 1);
+    std::memcpy(out, values.data() + idx * 4, 4 * sizeof(float));
+    return;
+  }
+  const float* a = values.data() + s.lower * 4;
+  const float* b = values.data() + (s.lower + 1) * 4;
+  // glTF convention: (x, y, z, w). QQuaternion uses (scalar, x, y, z).
+  QQuaternion qa(a[3], a[0], a[1], a[2]);
+  QQuaternion qb(b[3], b[0], b[1], b[2]);
+  QQuaternion r = QQuaternion::slerp(qa, qb, s.alpha).normalized();
+  out[0] = r.x();
+  out[1] = r.y();
+  out[2] = r.z();
+  out[3] = r.scalar();
+}
+
+// Walk the raw scene tree and emit a cloned subtree with overrides
+// applied. Subtrees that contain no animated node are returned as the
+// same shared_ptr (structural sharing) so downstream caches see
+// unchanged pointers for the un-animated branches.
+struct CloneVisitor
+{
+  const TRSMap& overrides;
+
+  // Recursive scan: is any descendant (including this node) animated?
+  // Result cached per-visit via the node identity — quick
+  // short-circuit since scene trees are typically shallow.
+  bool subtree_is_animated(const ossia::scene_node& n) const noexcept
+  {
+    if(overrides.find(n.id.value) != overrides.end())
+      return true;
+    if(!n.has_children())
+      return false;
+    for(const auto& child : *n.children)
+    {
+      if(auto* sub = ossia::get_if<ossia::scene_node_ptr>(&child))
+        if(*sub && subtree_is_animated(**sub))
+          return true;
+    }
+    return false;
+  }
+
+  ossia::scene_node_ptr clone(const ossia::scene_node_ptr& orig) const
+  {
+    if(!orig)
+      return orig;
+    if(!subtree_is_animated(*orig))
+      return orig; // whole subtree unchanged → share
+
+    auto new_node = std::make_shared<ossia::scene_node>(*orig);
+    std::vector<ossia::scene_payload> new_children;
+    if(orig->children)
+      new_children.reserve(orig->children->size());
+
+    bool xform_replaced = false;
+    auto it = overrides.find(orig->id.value);
+    const auto* ov = it != overrides.end() ? &it->second : nullptr;
+
+    if(orig->children)
+    {
+      for(const auto& payload : *orig->children)
+      {
+        if(ov && !xform_replaced)
+        {
+          if(auto* xf = ossia::get_if<ossia::scene_transform>(&payload))
+          {
+            // Override the first scene_transform we encounter in this
+            // node's children (GltfParser / FbxParser convention:
+            // they prepend one as the first child of each node).
+            ossia::scene_transform merged = *xf;
+            if(ov->has_translation)
+              std::memcpy(merged.translation, ov->translation, 12);
+            if(ov->has_rotation)
+              std::memcpy(merged.rotation, ov->rotation, 16);
+            if(ov->has_scale)
+              std::memcpy(merged.scale, ov->scale, 12);
+            new_children.push_back(merged);
+            xform_replaced = true;
+            continue;
+          }
+        }
+
+        // Recurse into sub-scene_node payloads so descendants can
+        // also be animated.
+        if(auto* sub = ossia::get_if<ossia::scene_node_ptr>(&payload))
+        {
+          new_children.push_back(clone(*sub));
+          continue;
+        }
+
+        new_children.push_back(payload);
+      }
+    }
+
+    // If this node is animated but had no scene_transform child, insert
+    // one at the start so the TRS takes effect on subsequent siblings.
+    if(ov && !xform_replaced)
+    {
+      ossia::scene_transform inserted{};
+      inserted.rotation[3] = 1.f; // identity quaternion w
+      inserted.scale[0] = inserted.scale[1] = inserted.scale[2] = 1.f;
+      if(ov->has_translation)
+        std::memcpy(inserted.translation, ov->translation, 12);
+      if(ov->has_rotation)
+        std::memcpy(inserted.rotation, ov->rotation, 16);
+      if(ov->has_scale)
+        std::memcpy(inserted.scale, ov->scale, 12);
+      new_children.insert(new_children.begin(), inserted);
+    }
+
+    new_node->children
+        = std::make_shared<const std::vector<ossia::scene_payload>>(
+            std::move(new_children));
+    new_node->dirty_index = orig->dirty_index + 1;
+    return new_node;
+  }
+};
+
+// Compute world-space transform matrix for a scene_transform payload.
+QMatrix4x4 trsToMat(const ossia::scene_transform& t) noexcept
+{
+  QMatrix4x4 m;
+  m.translate(t.translation[0], t.translation[1], t.translation[2]);
+  m.rotate(QQuaternion(
+      t.rotation[3], t.rotation[0], t.rotation[1], t.rotation[2]));
+  m.scale(t.scale[0], t.scale[1], t.scale[2]);
+  return m;
+}
+
+// Walk the (post-override) scene tree collecting world-space transform
+// matrices keyed by scene_node_id::value. Used by the skinning path to
+// resolve each joint's glTF joint_node_ids[i] → world matrix without
+// re-walking the tree per joint.
+using WorldMatMap = std::unordered_map<std::uint64_t, QMatrix4x4>;
+void collectNodeWorldMatrices(
+    const ossia::scene_node& n, const QMatrix4x4& parentWorld,
+    WorldMatMap& out)
+{
+  // A node's TRS is conventionally stored as the first scene_transform
+  // payload among its children (GltfParser / FbxParser / SceneGroup all
+  // follow this).
+  QMatrix4x4 local;
+  if(n.children)
+  {
+    for(const auto& p : *n.children)
+    {
+      if(auto* xf = ossia::get_if<ossia::scene_transform>(&p))
+      {
+        local = trsToMat(*xf);
+        break;
+      }
+    }
+  }
+  const QMatrix4x4 world = parentWorld * local;
+  if(n.id.value != 0)
+    out[n.id.value] = world;
+  if(n.children)
+  {
+    for(const auto& p : *n.children)
+    {
+      if(auto* sub = ossia::get_if<ossia::scene_node_ptr>(&p))
+        if(*sub)
+          collectNodeWorldMatrices(**sub, world, out);
+    }
+  }
+}
+
+} // namespace
+
+void AnimationPlayer::operator()()
+{
+  const auto& in = inputs.scene_in.scene;
+  if(!in.state || in.state->empty() || !in.state->animations
+     || in.state->animations->empty())
+  {
+    outputs.scene_out.scene = in;
+    outputs.scene_out.dirty = 0;
+    return;
+  }
+
+  float t = inputs.time.value;
+  // The speed control contributes purely additive offset between
+  // consecutive calls so users who wire only the Time inlet get
+  // unmodified behavior. If the user leaves Time at 0 and moves Speed,
+  // we integrate Speed over frame-delta (approximated as 1/60 s per
+  // call — halp doesn't expose a deterministic dt yet).
+  const float speed = inputs.speed.value;
+  if(t == m_prev_time && speed != 1.f && speed != 0.f)
+    t = m_prev_time + speed * (1.f / 60.f);
+  m_prev_time = t;
+
+  // Collect animation_components to sample.
+  const auto& anims = *in.state->animations;
+  const int clip_i = inputs.clip_index.value;
+  std::vector<const ossia::animation_component*> clips;
+  clips.reserve(anims.size());
+  if(clip_i < 0)
+  {
+    for(const auto& a : anims)
+      if(a)
+        clips.push_back(a.get());
+  }
+  else if(std::size_t(clip_i) < anims.size() && anims[clip_i])
+  {
+    clips.push_back(anims[clip_i].get());
+  }
+
+  TRSMap overrides;
+  for(const auto* clip : clips)
+  {
+    float clip_t = t;
+    if(inputs.loop.value && clip->duration > 0.f)
+    {
+      // Modulo into [0, duration). std::fmod preserves sign; add and
+      // modulo again for negative t (caused by negative speed).
+      clip_t = std::fmod(t, clip->duration);
+      if(clip_t < 0.f)
+        clip_t += clip->duration;
+    }
+    else if(clip->duration > 0.f)
+    {
+      clip_t = std::clamp(clip_t, 0.f, clip->duration);
+    }
+
+    for(const auto& channel : clip->channels)
+    {
+      if(!channel.times || !channel.values)
+        continue;
+      const auto& times = *channel.times;
+      const auto& values = *channel.values;
+      auto seg = findSegment(times, clip_t);
+
+      auto& ov = overrides[channel.target_node_id];
+      switch(channel.target_path)
+      {
+        case ossia::animation_target::translation: {
+          sampleLinear(values, 3, seg, ov.translation);
+          ov.has_translation = true;
+          break;
+        }
+        case ossia::animation_target::rotation: {
+          sampleSlerp(values, seg, ov.rotation);
+          ov.has_rotation = true;
+          break;
+        }
+        case ossia::animation_target::scale: {
+          sampleLinear(values, 3, seg, ov.scale);
+          ov.has_scale = true;
+          break;
+        }
+        default:
+          // weights / custom — deliberately ignored; see header comment.
+          break;
+      }
+    }
+  }
+
+  if(overrides.empty())
+  {
+    // No channels matched anything at this time (e.g., empty keyframe
+    // arrays). Pass through without bumping version.
+    outputs.scene_out.scene = in;
+    outputs.scene_out.dirty = 0;
+    return;
+  }
+
+  // Clone-and-override the tree.
+  CloneVisitor vis{overrides};
+  auto new_roots
+      = std::make_shared<std::vector<ossia::scene_node_ptr>>();
+  if(in.state->roots)
+  {
+    new_roots->reserve(in.state->roots->size());
+    for(const auto& r : *in.state->roots)
+      new_roots->push_back(vis.clone(r));
+  }
+
+  auto new_state = std::make_shared<ossia::scene_state>(*in.state);
+  new_state->roots = new_roots;
+  new_state->version = ++m_version_counter;
+  new_state->dirty_index = in.state->dirty_index + 1;
+
+  // ── Skinning update ──────────────────────────────────────────────
+  // When the scene has skeletons, walk the (post-override) tree once,
+  // cache every node's world-space matrix, then compute each skin's
+  // joint_matrix[i] = worldMat[joint_node_ids[i]] × inverse_bind.
+  // Pack into a fresh buffer_resource per skin and republish the
+  // skeletons list so downstream consumers see the new matrices.
+  if(in.state->skeletons && !in.state->skeletons->empty())
+  {
+    WorldMatMap worlds;
+    for(const auto& r : *new_roots)
+      if(r)
+        collectNodeWorldMatrices(*r, QMatrix4x4{}, worlds);
+
+    auto new_skels
+        = std::make_shared<std::vector<ossia::skeleton_component_ptr>>();
+    new_skels->reserve(in.state->skeletons->size());
+    for(const auto& src : *in.state->skeletons)
+    {
+      if(!src)
+      {
+        new_skels->push_back(src);
+        continue;
+      }
+      const std::size_t n = src->joints.size();
+      // Pack N joint matrices as column-major float[16] entries.
+      auto matrices = std::make_shared<std::vector<float>>(n * 16, 0.f);
+      for(std::size_t j = 0; j < n; ++j)
+      {
+        QMatrix4x4 ibm;
+        std::memcpy(
+            ibm.data(), src->joints[j].inverse_bind_matrix,
+            sizeof(float) * 16);
+        QMatrix4x4 world;
+        if(j < src->joint_node_ids.size())
+        {
+          auto it = worlds.find(src->joint_node_ids[j].value);
+          if(it != worlds.end())
+            world = it->second;
+        }
+        const QMatrix4x4 jm = world * ibm;
+        std::memcpy(
+            matrices->data() + j * 16, jm.constData(), sizeof(float) * 16);
+      }
+      auto buf = std::make_shared<ossia::buffer_resource>();
+      ossia::buffer_data bd;
+      bd.data = std::shared_ptr<const void>(matrices, matrices->data());
+      bd.byte_size = int64_t(matrices->size() * sizeof(float));
+      bd.usage_hint = ossia::buffer_data::usage::storage_buffer;
+      buf->resource = std::move(bd);
+      buf->dirty_index = new_state->version;
+
+      auto cloned = std::make_shared<ossia::skeleton_component>(*src);
+      cloned->joint_matrices_buffer = std::move(buf);
+      cloned->dirty_index = new_state->version;
+      new_skels->push_back(std::move(cloned));
+    }
+    new_state->skeletons = std::move(new_skels);
+  }
+
+  outputs.scene_out.scene.state = std::move(new_state);
+  outputs.scene_out.dirty = ossia::scene_port::dirty_animation;
+}
+
+} // namespace Threedim
diff --git a/src/plugins/score-plugin-threedim/Threedim/AnimationPlayer.hpp b/src/plugins/score-plugin-threedim/Threedim/AnimationPlayer.hpp
new file mode 100644
index 0000000000..46d119b94f
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/AnimationPlayer.hpp
@@ -0,0 +1,94 @@
+#pragma once
+#include <halp/controls.hpp>
+#include <halp/meta.hpp>
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <cstdint>
+#include <memory>
+#include <unordered_map>
+
+namespace Threedim
+{
+
+// Samples an incoming scene's animation channels at a user-provided
+// time and emits a scene_spec whose animated scene_nodes carry updated
+// scene_transform payloads (TRS) or whose skeletons carry updated bone
+// poses. Passthrough when the input scene has no animations.
+//
+// Sampling model:
+//   - animation_channel.target_node_id refers to a scene_node::id.
+//   - target_path ∈ {translation, rotation, scale, weights, custom}.
+//   - `times` + `values` hold the keyframes; `interpolation` is step /
+//     linear / cubic_spline.
+//
+// Output layout:
+//   - For TRS channels: find the first `scene_transform` payload in
+//     the matching node's children (the convention GltfParser /
+//     FbxParser follow — they prepend one per node) and override its
+//     translation/rotation/scale fields.
+//   - Subtrees that don't touch any animated node are shared as-is
+//     (shared_ptr reuse), so downstream identity caches stay hot
+//     outside the animated branch.
+//   - Materials / skeletons / cameras / environment pass through by
+//     shared_ptr identity.
+//
+// Currently unsupported (passthrough):
+//   - weights (morph targets).
+//   - custom paths.
+//   - skeletal joint tracks that target joints inside a
+//     skeleton_component rather than scene_node ids.
+// These are follow-ups; they need the same sample-and-override pattern
+// but on different storage.
+class AnimationPlayer
+{
+public:
+  halp_meta(name, "Animation Player")
+  halp_meta(category, "Visuals/3D/Scene")
+  halp_meta(c_name, "animation_player")
+  halp_meta(authors, "ossia team")
+  halp_meta(
+      manual_url,
+      "https://ossia.io/score-docs/processes/animation-player.html")
+  halp_meta(uuid, "2b4d7e8c-3a5f-4b9d-91c6-8d2e0f3a7b5e")
+
+  struct ins
+  {
+    struct
+    {
+      halp_meta(name, "Scene In");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_in;
+
+    halp::hslider_f32<"Time", halp::range{0., 3600., 0.}> time;
+    halp::hslider_f32<"Speed", halp::range{-4., 4., 1.}> speed;
+    halp::toggle<"Loop"> loop;
+    // When unset, 0 = first animation_component, 1 = second, …. -1 =
+    // blend all (sum of all channels — useful when animations target
+    // disjoint node sets, which is common for glTF scenes). Clamped to
+    // the number of components at sample time.
+    halp::spinbox_i32<"Clip index", halp::irange{-1, 32, -1}> clip_index;
+  } inputs;
+
+  struct outs
+  {
+    struct
+    {
+      halp_meta(name, "Scene Out");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_out;
+  } outputs;
+
+  void operator()();
+
+  std::shared_ptr<const ossia::scene_state> m_cached_state;
+  int64_t m_version_counter{0};
+
+  // Previous time — used only for the "speed" control's time advance;
+  // if the user is wiring a direct time inlet, this is ignored.
+  float m_prev_time{0.f};
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/AssetLoader.cpp b/src/plugins/score-plugin-threedim/Threedim/AssetLoader.cpp
new file mode 100644
index 0000000000..44756f2b92
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/AssetLoader.cpp
@@ -0,0 +1,368 @@
+#include "AssetLoader.hpp"
+
+#include "FbxParser.hpp"
+#include "GltfParser.hpp"
+#include "Ply.hpp"
+#include "PrimitiveCloud/FormatOverride.hpp"
+#include "PrimitiveCloud/PlyParser.hpp"
+#include "PrimitiveCloud/SceneFromCloud.hpp"
+#include "PrimitiveCloud/SplatBinary.hpp"
+#include "PrimitiveCloud/SpzCodec.hpp"
+#include "SceneFromMeshes.hpp"
+#include "VcgImporters.hpp"
+
+#include <Gfx/Graph/RenderList.hpp>
+#include <Gfx/Graph/SceneGPUState.hpp>
+
+#include <QFileInfo>
+#include <QQuaternion>
+#include <QString>
+
+#include <cstring>
+#include <mutex>
+#include <string>
+#include <vector>
+
+namespace Threedim
+{
+
+// =============================================================================
+// AssetLoaderRegistry — process-wide parser dispatch table.
+//
+// Storage is a function-local Meyers singleton so registrations at
+// static-init time work without worrying about dynamic-init order across
+// translation units. The small-vector-ish layout (O(N) lookup over a
+// ~4-entry list) is fine: registrations are one-shot per addon.
+// =============================================================================
+namespace
+{
+struct RegistryState
+{
+  std::mutex mutex;
+  std::vector<std::pair<std::string, AssetLoaderRegistry::ParseFn>> entries;
+};
+RegistryState& registryInstance()
+{
+  static RegistryState s;
+  return s;
+}
+
+std::string toLower(std::string_view s)
+{
+  std::string out;
+  out.reserve(s.size());
+  for(char c : s)
+    out.push_back(char(std::tolower((unsigned char)c)));
+  return out;
+}
+} // namespace
+
+void AssetLoaderRegistry::register_parser(
+    std::string_view extension, ParseFn fn)
+{
+  if(!fn || extension.empty())
+    return;
+  auto key = toLower(extension);
+  auto& r = registryInstance();
+  std::lock_guard lock{r.mutex};
+  for(auto& e : r.entries)
+  {
+    if(e.first == key)
+    {
+      e.second = fn;  // Last writer wins.
+      return;
+    }
+  }
+  r.entries.emplace_back(std::move(key), fn);
+}
+
+AssetLoaderRegistry::ParseFn
+AssetLoaderRegistry::lookup(std::string_view extension_lower) noexcept
+{
+  if(extension_lower.empty())
+    return nullptr;
+  auto& r = registryInstance();
+  std::lock_guard lock{r.mutex};
+  for(auto const& e : r.entries)
+    if(e.first == extension_lower)
+      return e.second;
+  return nullptr;
+}
+
+namespace
+{
+
+static bool hasSuffixCI(std::string_view path, std::string_view ext) noexcept
+{
+  if(path.size() < ext.size() + 1)
+    return false;
+  if(path[path.size() - ext.size() - 1] != '.')
+    return false;
+  auto a = path.rbegin();
+  auto b = ext.rbegin();
+  for(; b != ext.rend(); ++a, ++b)
+  {
+    char x = (char)std::tolower((unsigned char)*a);
+    char y = (char)std::tolower((unsigned char)*b);
+    if(x != y) return false;
+  }
+  return true;
+}
+
+// Extract the lowercased suffix after the final '.' (no dot). Empty
+// on a dotless path. Used to consult AssetLoaderRegistry after the
+// built-in dispatch misses.
+static std::string extensionLowerCI(std::string_view path)
+{
+  auto pos = path.find_last_of('.');
+  if(pos == std::string_view::npos || pos + 1 >= path.size())
+    return {};
+  return toLower(path.substr(pos + 1));
+}
+
+// Reuse FbxParser / GltfParser's static parsers by constructing a throwaway
+// inner instance, invoking the apply-lambda they return, and lifting the
+// parsed raw scene_state out. No cross-frame state from the inner loader
+// leaks into AssetLoader; its m_raw_state shared_ptr is copied into ours.
+//
+// Pin the file_type explicitly (halp::text_file_view — the default for
+// every loader's halp::file_port<"..."> here). A forwarding-reference
+// template parameter deduced from both the data arg and the function
+// pointer's by-value parameter produces a deduction conflict
+// (FileT& vs FileT), so we skip deduction.
+template <typename Loader>
+static std::shared_ptr<const ossia::scene_state>
+runInnerParser(const halp::text_file_view& data,
+               std::function<void(Loader&)> (*parse)(halp::text_file_view))
+{
+  auto apply = parse(data);
+  if(!apply)
+    return nullptr;
+  Loader inner;
+  apply(inner);
+  return inner.m_raw_state;
+}
+
+} // namespace
+
+std::function<void(AssetLoader&)>
+AssetLoader::ins::asset_t::process(file_type tv)
+{
+  if(tv.filename.empty())
+    return {};
+
+  const std::string_view fname{tv.filename};
+  std::shared_ptr<const ossia::scene_state> loaded;
+
+  if(hasSuffixCI(fname, "fbx"))
+  {
+    loaded = runInnerParser<FbxParser>(tv, &FbxParser::ins::fbx_t::process);
+  }
+  else if(hasSuffixCI(fname, "gltf") || hasSuffixCI(fname, "glb"))
+  {
+    loaded = runInnerParser<GltfParser>(tv, &GltfParser::ins::gltf_t::process);
+  }
+  else if(hasSuffixCI(fname, "obj"))
+  {
+    Threedim::float_vec buf;
+    auto meshes = Threedim::ObjFromString(tv.bytes, buf);
+    if(!meshes.empty())
+    {
+      const QString label = QFileInfo(QString::fromStdString(std::string{fname}))
+                                .fileName();
+      loaded = Threedim::sceneStateFromMeshes(
+          std::move(meshes), std::move(buf), label.toStdString());
+    }
+  }
+  else if(hasSuffixCI(fname, "ply"))
+  {
+    // Sniff the header first: a PLY whose vertex element carries
+    // splat-style columns (or no face element) goes through the
+    // primitive-cloud path; everything else stays on the existing
+    // mesh path. The sniff only reads the textual header, no row data.
+    if(Threedim::PrimitiveCloud::ply_is_splat_shaped(fname))
+    {
+      auto cloud = Threedim::PrimitiveCloud::parse_ply(fname);
+      if(cloud)
+      {
+        const QString label
+            = QFileInfo(QString::fromStdString(std::string{fname})).fileName();
+        loaded = Threedim::PrimitiveCloud::sceneStateFromCloud(
+            std::move(cloud), label.toStdString());
+      }
+    }
+    else
+    {
+      Threedim::float_vec buf;
+      auto meshes = Threedim::PlyFromFile(fname, buf);
+      if(!meshes.empty())
+      {
+        const QString label
+            = QFileInfo(QString::fromStdString(std::string{fname})).fileName();
+        loaded = Threedim::sceneStateFromMeshes(
+            std::move(meshes), std::move(buf), label.toStdString());
+      }
+    }
+  }
+  else if(hasSuffixCI(fname, "stl"))
+  {
+    Threedim::float_vec buf;
+    auto meshes = Threedim::StlFromFile(fname, buf);
+    if(!meshes.empty())
+    {
+      const QString label = QFileInfo(QString::fromStdString(std::string{fname}))
+                                .fileName();
+      loaded = Threedim::sceneStateFromMeshes(
+          std::move(meshes), std::move(buf), label.toStdString());
+    }
+  }
+  else if(hasSuffixCI(fname, "off"))
+  {
+    Threedim::float_vec buf;
+    auto meshes = Threedim::OffFromFile(fname, buf);
+    if(!meshes.empty())
+    {
+      const QString label = QFileInfo(QString::fromStdString(std::string{fname}))
+                                .fileName();
+      loaded = Threedim::sceneStateFromMeshes(
+          std::move(meshes), std::move(buf), label.toStdString());
+    }
+  }
+  else if(hasSuffixCI(fname, "splat"))
+  {
+    // Antimatter15 binary .splat: 32 bytes/primitive, fixed schema.
+    auto cloud = Threedim::PrimitiveCloud::parse_splat_binary(tv.bytes);
+    if(cloud)
+    {
+      const QString label
+          = QFileInfo(QString::fromStdString(std::string{fname})).fileName();
+      loaded = Threedim::PrimitiveCloud::sceneStateFromCloud(
+          std::move(cloud), label.toStdString());
+    }
+  }
+  else if(hasSuffixCI(fname, "spz"))
+  {
+    // Niantic .spz v1-3: gzip-compressed column-grouped 3DGS data.
+    // Decoded via the vendored Niantic library (3rdparty/spz),
+    // transposed into the canonical 62-float row layout that the
+    // 3dgs.classic preset reads. v4 (NGSP-magic + ZSTD) returns
+    // nullptr — see 3rdparty/spz/CMakeLists.txt for the rationale.
+    auto cloud = Threedim::PrimitiveCloud::parse_spz(tv.bytes);
+    if(cloud)
+    {
+      const QString label
+          = QFileInfo(QString::fromStdString(std::string{fname})).fileName();
+      loaded = Threedim::PrimitiveCloud::sceneStateFromCloud(
+          std::move(cloud), label.toStdString());
+    }
+  }
+  else
+  {
+    // Built-ins all missed — consult the addon-registered parsers.
+    // score-addon-academy registers its USD loader here at module load.
+    const std::string ext = extensionLowerCI(fname);
+    if(auto fn = AssetLoaderRegistry::lookup(ext))
+      loaded = fn(tv);
+  }
+
+  if(!loaded)
+    return {};
+
+  return [state = std::move(loaded)](AssetLoader& self) mutable {
+    self.m_parsed_state = std::move(state);
+    self.rebuild_format_state();        // m_parsed → m_overridden
+    self.m_cached_xform.valid = false;  // force wrap rebuild
+    self.rebuild_wrapped_state();
+  };
+}
+
+void AssetLoader::rebuild_format_state()
+{
+  m_cached_format_override = inputs.format_override.value;
+  m_overridden_state = Threedim::PrimitiveCloud::applyFormatOverride(
+      m_parsed_state, m_cached_format_override);
+  // The wrapped state derives from m_overridden_state and must be
+  // rebuilt whenever the override changes.
+  m_cached_xform.valid = false;
+  rebuild_wrapped_state();
+}
+
+void AssetLoader::rebuild_wrapped_state()
+{
+  m_wrapped_state = Threedim::wrapSceneWithTransform(
+      m_overridden_state, inputs, m_cached_xform, m_version_counter, m_xform_ref);
+}
+
+void AssetLoader::operator()()
+{
+  if(!m_parsed_state)
+  {
+    outputs.scene_out.scene.state = nullptr;
+    outputs.scene_out.dirty = 0;
+    return;
+  }
+
+  if(Threedim::transformChanged(inputs, m_cached_xform))
+    rebuild_wrapped_state();
+
+  outputs.scene_out.scene.state = m_wrapped_state;
+  outputs.scene_out.dirty = ossia::scene_port::dirty_transform;
+}
+
+void AssetLoader::init(score::gfx::RenderList& r, QRhiResourceUpdateBatch& res)
+{
+  if(!raw_transform_slot.valid())
+  {
+    raw_transform_slot = r.registry().allocate(
+        score::gfx::GpuResourceRegistry::Arena::RawTransform,
+        sizeof(score::gfx::RawLocalTransform));
+    m_xform_ref = r.registry().toOssiaRef(raw_transform_slot);
+    // Force the wrapped state to be rebuilt so the emitted
+    // scene_transform carries the fresh ref.
+    m_cached_xform.valid = false;
+  }
+  if(raw_transform_slot.valid())
+  {
+    score::gfx::RawLocalTransform seed{};
+    r.registry().updateSlot(res, raw_transform_slot, &seed, sizeof(seed));
+  }
+}
+
+void AssetLoader::update(
+    score::gfx::RenderList& r, QRhiResourceUpdateBatch& res, score::gfx::Edge*)
+{
+  if(!raw_transform_slot.valid())
+    return;
+
+  score::gfx::RawLocalTransform xform{};
+  xform.translation[0] = inputs.position.value.x;
+  xform.translation[1] = inputs.position.value.y;
+  xform.translation[2] = inputs.position.value.z;
+  QQuaternion q = QQuaternion::fromEulerAngles(
+      inputs.rotation.value.x, inputs.rotation.value.y,
+      inputs.rotation.value.z);
+  xform.rotation[0] = q.x();
+  xform.rotation[1] = q.y();
+  xform.rotation[2] = q.z();
+  xform.rotation[3] = q.scalar();
+  xform.scale[0] = inputs.scale.value.x;
+  xform.scale[1] = inputs.scale.value.y;
+  xform.scale[2] = inputs.scale.value.z;
+  r.registry().updateSlot(res, raw_transform_slot, &xform, sizeof(xform));
+}
+
+void AssetLoader::release(score::gfx::RenderList& r)
+{
+  if(raw_transform_slot.valid())
+    r.registry().free(raw_transform_slot);
+  m_xform_ref = {};
+  // Clear cached scene_state so the next operator()() rebuilds against
+  // the post-release registry. Producer-state-drift Option A — see
+  // matching comment in Light::release.  m_parsed_state stays valid
+  // (parser output, no slot refs); only m_overridden_state and
+  // m_wrapped_state embed registry refs and need clearing.
+  m_overridden_state.reset();
+  m_wrapped_state.reset();
+}
+
+} // namespace Threedim
diff --git a/src/plugins/score-plugin-threedim/Threedim/AssetLoader.hpp b/src/plugins/score-plugin-threedim/Threedim/AssetLoader.hpp
new file mode 100644
index 0000000000..4775820bee
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/AssetLoader.hpp
@@ -0,0 +1,166 @@
+#pragma once
+#include <Threedim/TinyObj.hpp>
+#include <Threedim/TransformHelper.hpp>
+#include <halp/controls.hpp>
+#include <halp/file_port.hpp>
+#include <halp/meta.hpp>
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <Gfx/Graph/GpuResourceRegistry.hpp>
+
+#include <score_plugin_threedim_export.h>
+
+#include <memory>
+#include <string_view>
+
+class QRhiResourceUpdateBatch;
+
+namespace score::gfx
+{
+class RenderList;
+struct Edge;
+}
+
+namespace Threedim
+{
+
+// External scene-file parser registry. Addons that ship format-specific
+// parsers (score-addon-academy's USD loader, a future Alembic loader,
+// etc.) register themselves here so AssetLoader can dispatch to them
+// without a link-time dependency from score-plugin-threedim to the addon.
+//
+// The registered callback takes the same halp::text_file_view that the
+// built-in glTF / FBX parsers receive and returns a populated
+// ossia::scene_state on success, or a null shared_ptr on failure /
+// unhandled input. AssetLoader wraps the state with the Position /
+// Rotation / Scale controls exactly as it does for the built-ins.
+//
+// Extensions are matched case-insensitively on the suffix after the
+// final '.'. Registrations that duplicate an extension replace any
+// prior one (last writer wins). Calls are thread-safe.
+class SCORE_PLUGIN_THREEDIM_EXPORT AssetLoaderRegistry
+{
+public:
+  using ParseFn = std::shared_ptr<const ossia::scene_state> (*)(
+      const halp::text_file_view&);
+
+  // Register a parser for an extension (without the dot). Safe at
+  // static-init time — the underlying storage is a function-local
+  // Meyers singleton.
+  static void register_parser(std::string_view extension, ParseFn fn);
+
+  // Lookup by lowercased extension. Returns nullptr if no match.
+  static ParseFn lookup(std::string_view extension_lower) noexcept;
+};
+
+// Unified 3D asset loader. Accepts .fbx / .gltf / .glb / .obj / .ply /
+// .stl / .off natively, plus .usd / .usda / .usdc / .usdz when
+// score-addon-academy is loaded (it registers its UsdParser through
+// AssetLoaderRegistry at module init).
+//
+// Dispatches by file extension to the appropriate parser:
+//   .fbx                    → ufbx           (FbxParser's static parser)
+//   .gltf / .glb            → fastgltf       (GltfParser's static parser)
+//   .obj                    → tinyobjloader  + sceneStateFromMeshes
+//   .ply                    → miniply        + sceneStateFromMeshes
+//   .stl / .off             → vcglib         + sceneStateFromMeshes
+//   .usd / .usda / .usdc    → OpenUSD        (academy UsdParser, optional)
+//   .usdz                   → OpenUSD        (academy UsdParser, optional)
+//   (others)                → AssetLoaderRegistry::lookup(ext)
+//
+// Position / Rotation / Scale controls wrap the loaded scene at a single
+// root TRS via TransformHelper::wrapSceneWithTransform — same convention
+// as FbxParser / GltfParser.
+//
+// For the geometry-only formats (OBJ/PLY/STL/OFF) the output is a scene
+// with one scene_node per mesh part, each containing a mesh_component
+// referencing a single shared CPU buffer. FBX/glTF retain their rich
+// scene hierarchy (lights, cameras, materials, skeletons, animations).
+class AssetLoader
+{
+public:
+  halp_meta(name, "Asset Loader")
+  halp_meta(category, "Visuals/3D")
+  halp_meta(c_name, "asset_loader")
+  halp_meta(authors, "ossia team, ufbx / fastgltf / tinyobj / miniply / vcglib")
+  halp_meta(
+      manual_url,
+      "https://ossia.io/score-docs/processes/asset-loader.html")
+  halp_meta(uuid, "2f6a8c41-7d93-4e5b-b1c8-4e3f9a7d2c5b")
+
+  struct ins
+  {
+    struct asset_t : halp::file_port<"Asset file">
+    {
+      halp_meta(
+          extensions,
+          "3D assets (*.fbx *.gltf *.glb *.obj *.ply *.stl *.off "
+          "*.splat *.spz "
+          "*.usd *.usda *.usdc *.usdz)");
+      static std::function<void(AssetLoader&)> process(file_type data);
+    } asset;
+
+    PositionControl position;
+    RotationControl rotation;
+    ScaleControl    scale;
+
+    // Stamps every primitive_cloud_component emitted by this asset
+    // with `format_id = value` when non-empty. Empty falls back to the
+    // parser's autodetection (PLY column sniffing, .splat / .spz
+    // hardcoded). Used to route unrecognised PLY columns or addon-
+    // produced files through a FlattenedSceneFilterNode in mode 12.
+    struct format_override_t : halp::lineedit<"Format override (auto if empty)", "">
+    {
+      void update(AssetLoader& n) { n.rebuild_format_state(); }
+    } format_override;
+  } inputs;
+
+  struct outs
+  {
+    struct
+    {
+      halp_meta(name, "Scene");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_out;
+  } outputs;
+
+  void operator()();
+
+  // Render-thread hooks. init() claims a RawTransform slot for the
+  // single root wrapping xform this node emits (TransformHelper's
+  // scene-wrapping transform). update() uploads the current TRS.
+  void init(score::gfx::RenderList& r, QRhiResourceUpdateBatch& res);
+  void update(
+      score::gfx::RenderList& r, QRhiResourceUpdateBatch& res,
+      score::gfx::Edge* e);
+  void release(score::gfx::RenderList& r);
+
+  // Raw scene as parsed from the file — stable as long as the file
+  // doesn't change. The pipeline is:
+  //   m_parsed_state         (parser output, never mutated)
+  //   ↓ applyFormatOverride(format_override.value)
+  //   m_overridden_state     (format_id rewrites applied, or = parsed)
+  //   ↓ wrapSceneWithTransform(position/rotation/scale)
+  //   m_wrapped_state        (final, published downstream)
+  std::shared_ptr<const ossia::scene_state> m_parsed_state;
+  std::shared_ptr<const ossia::scene_state> m_overridden_state;
+  std::shared_ptr<const ossia::scene_state> m_wrapped_state;
+  std::string m_cached_format_override;
+  CachedTRS m_cached_xform;
+  int64_t m_version_counter{0};
+
+  // Re-runs applyFormatOverride from the parsed state. Triggered by the
+  // lineedit's update() callback when the user edits the override
+  // field; also called once after parsing.
+  void rebuild_format_state();
+
+  score::gfx::GpuResourceRegistry::Slot raw_transform_slot;
+  ossia::gpu_slot_ref m_xform_ref{};
+
+private:
+  void rebuild_wrapped_state();
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/BufferInfo.hpp b/src/plugins/score-plugin-threedim/Threedim/BufferInfo.hpp
new file mode 100644
index 0000000000..b1374d6dc0
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/BufferInfo.hpp
@@ -0,0 +1,69 @@
+#pragma once
+#include <fmt/format.h>
+#include <halp/buffer.hpp>
+#include <halp/controls.hpp>
+#include <halp/meta.hpp>
+
+#include <cstdint>
+#include <string>
+
+namespace Threedim
+{
+// Tiny inspector node: takes a halp::gpu_buffer_input and exposes its
+// metadata (handle, byte size, byte offset, dirty flag) on regular
+// value-output ports plus a single human-readable summary string. Use
+// it as a debug breakpoint in any GPU buffer pipeline -- e.g.
+// SomeBufferSource -> BufferInfo -> Downstream -- to verify that the
+// buffer is actually wired up and that its size matches what the
+// downstream expects.
+//
+// Mirrors the structure of GeometryInfo: pure CPU operator(), no GPU
+// init/update/runInitialPasses needed because the framework already
+// publishes the gpu_buffer's metadata into our input port each tick.
+class BufferInfo
+{
+public:
+  halp_meta(name, "Buffer Info")
+  halp_meta(category, "Visuals/Utilities")
+  halp_meta(c_name, "buffer_info")
+  halp_meta(manual_url, "https://ossia.io/score-docs/processes/buffer-info.html")
+  halp_meta(uuid, "f1a3d6c8-2b4e-4c5d-8a9f-1e2d3c4b5a60")
+
+  struct
+  {
+    halp::gpu_buffer_input<"Buffer"> buffer;
+  } inputs;
+
+  struct
+  {
+    // Numeric metadata, exposed individually so it can be patched into
+    // other ports (size-driven UBO updates etc.).
+    halp::val_port<"Byte size", int64_t> byte_size;
+    halp::val_port<"Byte offset", int64_t> byte_offset;
+    // Raw native handle as an opaque integer. Useful only for visual
+    // identity ("did the upstream rebuild this buffer?"); the value is
+    // a QRhiBuffer* on every backend score supports today.
+    halp::val_port<"Handle", int64_t> handle;
+    halp::val_port<"Changed", bool> changed;
+    // One-line, copy-pasteable summary for tooltips / log scraping.
+    halp::val_port<"Readable", std::string> readable;
+  } outputs;
+
+  void operator()()
+  {
+    const auto& b = inputs.buffer.buffer;
+    outputs.byte_size.value = b.byte_size;
+    outputs.byte_offset.value = b.byte_offset;
+    outputs.handle.value = reinterpret_cast<std::int64_t>(b.handle);
+    outputs.changed.value = b.changed;
+
+    auto& ret = outputs.readable.value;
+    ret.clear();
+    fmt::format_to(
+        std::back_inserter(ret),
+        "handle=0x{:x}, byte_size={}, byte_offset={}, changed={}",
+        reinterpret_cast<std::uintptr_t>(b.handle), b.byte_size, b.byte_offset,
+        b.changed ? "yes" : "no");
+  }
+};
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/BufferToGeometry.cpp b/src/plugins/score-plugin-threedim/Threedim/BufferToGeometry.cpp
index 1827dff0b7..002abbc367 100644
--- a/src/plugins/score-plugin-threedim/Threedim/BufferToGeometry.cpp
+++ b/src/plugins/score-plugin-threedim/Threedim/BufferToGeometry.cpp
@@ -108,13 +108,12 @@ void BuffersToGeometry::operator()()
   // Check if anything changed
   bool meshChanged = false;
   bool buffersChanged = false;
-  bool transformChanged = false;
-
-  // Check transform changes
-  // (Assuming PositionControl, RotationControl, ScaleControl have .value members)
-  // You'll need to compute the transform matrix and compare
-  // For now, mark as changed if any transform input changed
-  transformChanged = true; // Simplified - compute properly based on your controls
+  // Compute TRS matrix from position/rotation/scale controls. Returns
+  // true iff the matrix actually changed — the old code hard-coded
+  // `true`, firing a downstream transform rebuild every frame even
+  // when the knobs hadn't moved.
+  const bool transformChanged
+      = computeTRSMatrix(inputs, outputs.geometry.transform, m_cachedTRS);
 
   // Check mesh configuration changes
   if(inputs.vertices.value != m_prevVertices || inputs.topology.value != m_prevTopology
diff --git a/src/plugins/score-plugin-threedim/Threedim/BufferToGeometry.hpp b/src/plugins/score-plugin-threedim/Threedim/BufferToGeometry.hpp
index 9343514709..d15f369f0d 100644
--- a/src/plugins/score-plugin-threedim/Threedim/BufferToGeometry.hpp
+++ b/src/plugins/score-plugin-threedim/Threedim/BufferToGeometry.hpp
@@ -1,5 +1,6 @@
 #pragma once
 #include "BufferToGeometryCommon.hpp"
+#include "TransformHelper.hpp"
 
 #include <Threedim/TinyObj.hpp>
 #include <halp/buffer.hpp>
@@ -109,6 +110,7 @@ class BuffersToGeometry
   PrimitiveTopology m_prevTopology{};
   CullMode m_prevCullMode{};
   FrontFace m_prevFrontFace{};
+  CachedTRS m_cachedTRS{};
 
   struct ui
   {
diff --git a/src/plugins/score-plugin-threedim/Threedim/BufferToGeometry2.cpp b/src/plugins/score-plugin-threedim/Threedim/BufferToGeometry2.cpp
index b013c969e6..eaf2bfe49a 100644
--- a/src/plugins/score-plugin-threedim/Threedim/BufferToGeometry2.cpp
+++ b/src/plugins/score-plugin-threedim/Threedim/BufferToGeometry2.cpp
@@ -75,10 +75,11 @@ void BuffersToGeometry2::operator()()
   // Check if anything changed
   bool meshChanged = false;
   bool buffersChanged = false;
-  bool transformChanged = false;
-
-  // Check transform changes
-  transformChanged = true; // Simplified - compute properly based on your controls
+  // Compute TRS matrix from position/rotation/scale controls; returns
+  // true iff the matrix actually changed (replaces the old hardcoded
+  // transformChanged=true which fired a downstream rebuild every frame).
+  const bool transformChanged
+      = computeTRSMatrix(inputs, outputs.geometry.transform, m_cachedTRS);
 
   // Check mesh configuration changes
   if(inputs.vertices.value != m_prevVertices || inputs.topology.value != m_prevTopology
diff --git a/src/plugins/score-plugin-threedim/Threedim/BufferToGeometry2.hpp b/src/plugins/score-plugin-threedim/Threedim/BufferToGeometry2.hpp
index d9f03fc452..69ee796759 100644
--- a/src/plugins/score-plugin-threedim/Threedim/BufferToGeometry2.hpp
+++ b/src/plugins/score-plugin-threedim/Threedim/BufferToGeometry2.hpp
@@ -1,5 +1,6 @@
 #pragma once
 #include "BufferToGeometryCommon.hpp"
+#include "TransformHelper.hpp"
 
 #include <Threedim/TinyObj.hpp>
 #include <halp/buffer.hpp>
@@ -108,6 +109,7 @@ class BuffersToGeometry2
   PrimitiveTopology m_prevTopology{};
   CullMode m_prevCullMode{};
   FrontFace m_prevFrontFace{};
+  CachedTRS m_cachedTRS{};
 
   struct ui
   {
diff --git a/src/plugins/score-plugin-threedim/Threedim/Camera.cpp b/src/plugins/score-plugin-threedim/Threedim/Camera.cpp
new file mode 100644
index 0000000000..dc25a61a31
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/Camera.cpp
@@ -0,0 +1,104 @@
+#include "Camera.hpp"
+
+#include <Gfx/Graph/RenderList.hpp>
+#include <Gfx/Graph/SceneGPUState.hpp>
+
+namespace Threedim
+{
+
+// Order invariant: called by GfxRenderer::initState BEFORE the first
+// operator()() and BEFORE processControlIn fires any rebuild() callback.
+// m_camera_ref / m_xform_ref populated here are therefore safe to read
+// in rebuild() without a guard. Adding prepare() to this node breaks the
+// invariant — see CpuFilterNode.hpp for details.
+void Camera::init(score::gfx::RenderList& r, QRhiResourceUpdateBatch& res)
+{
+  if(!raw_camera_slot.valid())
+  {
+    raw_camera_slot = r.registry().allocate(
+        score::gfx::GpuResourceRegistry::Arena::RawCamera,
+        sizeof(score::gfx::RawCameraData));
+    m_camera_ref = r.registry().toOssiaRef(raw_camera_slot);
+  }
+  if(raw_camera_slot.valid())
+  {
+    score::gfx::RawCameraData seed{};
+    r.registry().updateSlot(res, raw_camera_slot, &seed, sizeof(seed));
+  }
+  if(!raw_transform_slot.valid())
+  {
+    raw_transform_slot = r.registry().allocate(
+        score::gfx::GpuResourceRegistry::Arena::RawTransform,
+        sizeof(score::gfx::RawLocalTransform));
+    m_xform_ref = r.registry().toOssiaRef(raw_transform_slot);
+  }
+  if(raw_transform_slot.valid())
+  {
+    score::gfx::RawLocalTransform seed{};
+    r.registry().updateSlot(res, raw_transform_slot, &seed, sizeof(seed));
+  }
+}
+
+void Camera::update(
+    score::gfx::RenderList& r, QRhiResourceUpdateBatch& res, score::gfx::Edge*)
+{
+  if(raw_transform_slot.valid())
+  {
+    // Local TRS of the scene_transform this camera emits. Translation
+    // mirrors the eye; rotation matches the quaternion built in
+    // operator()() (local -Z → (target - eye)); scale is identity.
+    score::gfx::RawLocalTransform xform{};
+    xform.translation[0] = inputs.eye.value.x;
+    xform.translation[1] = inputs.eye.value.y;
+    xform.translation[2] = inputs.eye.value.z;
+    QVector3D forward(
+        inputs.target.value.x - inputs.eye.value.x,
+        inputs.target.value.y - inputs.eye.value.y,
+        inputs.target.value.z - inputs.eye.value.z);
+    if(forward.lengthSquared() > 1e-8f)
+    {
+      forward.normalize();
+      QQuaternion q = QQuaternion::fromDirection(
+          -forward, QVector3D(0.f, 1.f, 0.f));
+      xform.rotation[0] = q.x();
+      xform.rotation[1] = q.y();
+      xform.rotation[2] = q.z();
+      xform.rotation[3] = q.scalar();
+    }
+    xform.scale[0] = 1.f;
+    xform.scale[1] = 1.f;
+    xform.scale[2] = 1.f;
+    r.registry().updateSlot(res, raw_transform_slot, &xform, sizeof(xform));
+  }
+
+  if(!raw_camera_slot.valid())
+    return;
+
+  score::gfx::RawCameraData raw{};
+  raw.eye[0] = inputs.eye.value.x;
+  raw.eye[1] = inputs.eye.value.y;
+  raw.eye[2] = inputs.eye.value.z;
+  raw.target[0] = inputs.target.value.x;
+  raw.target[1] = inputs.target.value.y;
+  raw.target[2] = inputs.target.value.z;
+  raw.up[0] = 0.f;
+  raw.up[1] = 1.f;
+  raw.up[2] = 0.f;
+  raw.yfov = inputs.fov.value * float(M_PI) / 180.f;
+  raw.znear = inputs.near_plane.value;
+  raw.zfar = inputs.far_plane.value;
+  raw.projection = 0u;  // perspective
+  r.registry().updateSlot(res, raw_camera_slot, &raw, sizeof(raw));
+}
+
+void Camera::release(score::gfx::RenderList& r)
+{
+  if(raw_camera_slot.valid())
+    r.registry().free(raw_camera_slot);
+  if(raw_transform_slot.valid())
+    r.registry().free(raw_transform_slot);
+  m_camera_ref = {};
+  m_xform_ref = {};
+}
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/Camera.hpp b/src/plugins/score-plugin-threedim/Threedim/Camera.hpp
new file mode 100644
index 0000000000..7939d533c8
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/Camera.hpp
@@ -0,0 +1,216 @@
+#pragma once
+#include <halp/controls.hpp>
+#include <halp/meta.hpp>
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <Gfx/Graph/GpuResourceRegistry.hpp>
+
+#include <QMatrix4x4>
+#include <QQuaternion>
+#include <QVector3D>
+
+#include <cmath>
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+class QRhiResourceUpdateBatch;
+
+namespace score::gfx
+{
+class RenderList;
+struct Edge;
+}
+
+namespace Threedim
+{
+
+// Scene-producing camera node. Emits a scene_spec containing:
+//   - a scene_node with an id derived from this node's uuid (so the flatten
+//     visitor can attribute the camera back to it),
+//   - a scene_transform placing the camera at eye looking at target,
+//   - a camera_component carrying yfov / znear / zfar.
+//
+// ScenePreprocessor packs every camera it collects into its Camera UBO
+// output — when merged with a scene tree this camera becomes one entry in
+// that array. active_camera_id defaults to this node's id so a single
+// Camera is picked up automatically.
+class Camera
+{
+public:
+  halp_meta(name, "Camera")
+  halp_meta(c_name, "camera_avnd")
+  halp_meta(category, "Visuals/3D/Scene")
+  halp_meta(authors, "ossia team")
+  halp_meta(uuid, "4c91b5e2-8d76-4ab3-9f14-6e0d8b3a2c57")
+
+  struct ins
+  {
+    // Port-driven rebuild: every control carries an `update(Camera&)`
+    // callback that fires only when its value changes, triggering a
+    // `rebuild()` on the Camera. `operator()()` then just republishes
+    // the already-built m_state — no per-frame memcmp, no per-frame
+    // version bump, no merge_scenes / preprocessor thrash.
+    //
+    // halp::range only supports scalar inits (broadcast across x/y/z), so
+    // the non-uniform defaults are applied in the subclass constructor.
+    struct Eye : halp::xyz_spinboxes_f32<"Eye", halp::range{-10000., 10000., 0.}>
+    {
+      Eye() { value = {0.f, 1.f, 3.f}; }
+      void update(Camera& n) { n.rebuild(); }
+    } eye;
+    struct : halp::xyz_spinboxes_f32<"Target", halp::range{-10000., 10000., 0.}>
+    { void update(Camera& n) { n.rebuild(); } } target;
+    struct : halp::hslider_f32<"FOV", halp::range{5., 170., 60.}>
+    { void update(Camera& n) { n.rebuild(); } } fov;
+    struct : halp::hslider_f32<"Near", halp::range{0.001, 10., 0.1}>
+    { void update(Camera& n) { n.rebuild(); } } near_plane;
+    struct : halp::hslider_f32<"Far", halp::range{1., 100000., 1000.}>
+    { void update(Camera& n) { n.rebuild(); } } far_plane;
+  } inputs;
+
+  struct outs
+  {
+    struct
+    {
+      halp_meta(name, "Scene");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_out;
+  } outputs;
+
+  // Stable scene_node_id for this camera across frames. Set once in the
+  // first call. Used as scene_state::active_camera_id so ScenePreprocessor
+  // picks THIS camera even when other cameras show up in merged scenes.
+  ossia::scene_node_id m_id{};
+  std::shared_ptr<ossia::scene_state> m_state;
+  int64_t m_version{0};
+  // Dirty bits to stamp on the next emission. Accumulated in rebuild()
+  // and cleared after operator()() publishes them. When no control
+  // changed this frame, operator()() republishes the same m_state with
+  // dirty=0 so the preprocessor's pointer+version comparison short-
+  // circuits the rebuild path.
+  uint8_t m_pending_dirty{ossia::scene_port::dirty_transform};
+  // Stable ids for the single scene_transform + camera_component this
+  // node emits (minted on first rebuild).
+  uint64_t m_xform_stable_id{};
+  uint64_t m_camera_stable_id{};
+
+  // Rebuild m_state from current inputs. Called from every port's
+  // `update()` callback (fires only on control changes), and once from
+  // `operator()()` on the first tick to seed m_state.
+  void rebuild()
+  {
+    if(!m_state)
+    {
+      m_state = std::make_shared<ossia::scene_state>();
+      // Deterministic, non-zero id keyed on this node's address. Non-zero
+      // so merge_scenes' active_camera_id resolution treats it as "set".
+      m_id.value = reinterpret_cast<std::uintptr_t>(this) | 0x1u;
+    }
+    if(m_camera_stable_id == 0) m_camera_stable_id = ossia::mint_stable_id();
+    if(m_xform_stable_id == 0) m_xform_stable_id = ossia::mint_stable_id();
+
+    // Rebuild as {scene_transform, camera_component} inside a scene_node.
+    auto cam = std::make_shared<ossia::camera_component>();
+    cam->stable_id = m_camera_stable_id;
+    cam->projection = ossia::camera_projection::perspective;
+    cam->yfov = inputs.fov.value * float(M_PI) / 180.f;
+    cam->znear = inputs.near_plane.value;
+    cam->zfar = inputs.far_plane.value;
+    // Propagate the RawCamera arena slot ref (populated in init()).
+    cam->raw_slot = m_camera_ref;
+
+    // Encode the world transform as TRS for the scene_transform payload.
+    ossia::scene_transform xform;
+    xform.stable_id = m_xform_stable_id;
+    xform.translation[0] = inputs.eye.value.x;
+    xform.translation[1] = inputs.eye.value.y;
+    xform.translation[2] = inputs.eye.value.z;
+    // Build a quaternion for the camera's world orientation. Qt's
+    // QQuaternion::fromDirection(direction, up) maps local +Z (NOT -Z) to
+    // `direction` — see QMatrix4x4::fromAxes in Qt source, which takes
+    // zAxis = direction. We want the camera's local +Z axis (the "back"
+    // axis of a GL camera) to point along (eye − target) so that local -Z
+    // (the GL viewing direction) points from eye toward target. Hence the
+    // -forward. Equivalently: the inverse of the TRS matches
+    // QMatrix4x4::lookAt(eye, target, up).
+    QVector3D forward(
+        inputs.target.value.x - inputs.eye.value.x,
+        inputs.target.value.y - inputs.eye.value.y,
+        inputs.target.value.z - inputs.eye.value.z);
+    if(forward.lengthSquared() > 1e-8f)
+    {
+      forward.normalize();
+      QQuaternion q = QQuaternion::fromDirection(
+          -forward, QVector3D(0.f, 1.f, 0.f));
+      xform.rotation[0] = q.x();
+      xform.rotation[1] = q.y();
+      xform.rotation[2] = q.z();
+      xform.rotation[3] = q.scalar();
+    }
+    else
+    {
+      xform.rotation[0] = 0.f;
+      xform.rotation[1] = 0.f;
+      xform.rotation[2] = 0.f;
+      xform.rotation[3] = 1.f;
+    }
+    xform.scale[0] = 1.f;
+    xform.scale[1] = 1.f;
+    xform.scale[2] = 1.f;
+    // Propagate the RawTransform slot ref (populated in init()).
+    xform.raw_slot = m_xform_ref;
+
+    auto children = std::make_shared<std::vector<ossia::scene_payload>>();
+    children->push_back(xform);
+    children->push_back(ossia::camera_component_ptr(std::move(cam)));
+
+    auto node = std::make_shared<ossia::scene_node>();
+    node->id = m_id;
+    node->children = std::move(children);
+
+    auto roots
+        = std::make_shared<std::vector<ossia::scene_node_ptr>>();
+    roots->push_back(std::move(node));
+
+    m_state->roots = std::move(roots);
+    m_state->active_camera_id = m_id;
+    m_version++;
+    m_state->version = m_version;
+    m_pending_dirty = ossia::scene_port::dirty_transform;
+  }
+
+  void operator()()
+  {
+    if(!m_state)
+      rebuild();
+    outputs.scene_out.scene.state = m_state;
+    outputs.scene_out.dirty = m_pending_dirty;
+    m_pending_dirty = 0;
+  }
+
+  // Render-thread hooks. init claims one RawCamera slot; update packs
+  // eye / target / up / yfov / znear / zfar into a RawCameraData and
+  // uploads; release returns the slot. The preprocessor will consume
+  // the slot in a later pass (aspect-ratio-aware matrix composition
+  // happens there); for now the scene_spec emission still drives
+  // packAndUploadCameras and this slot is a producer-half plumbing.
+  void init(score::gfx::RenderList& r, QRhiResourceUpdateBatch& res);
+  void update(
+      score::gfx::RenderList& r, QRhiResourceUpdateBatch& res,
+      score::gfx::Edge* e);
+  void release(score::gfx::RenderList& r);
+
+  score::gfx::GpuResourceRegistry::Slot raw_camera_slot;
+  score::gfx::GpuResourceRegistry::Slot raw_transform_slot;
+
+  // Ossia-facing snapshots, stamped on the emitted components'
+  // raw_slot fields so the preprocessor can locate this camera's
+  // GPU bytes via isLive() + offset. Written once in init().
+  ossia::gpu_slot_ref m_camera_ref{};
+  ossia::gpu_slot_ref m_xform_ref{};
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/CameraArray.cpp b/src/plugins/score-plugin-threedim/Threedim/CameraArray.cpp
new file mode 100644
index 0000000000..4b3d2d397d
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/CameraArray.cpp
@@ -0,0 +1,133 @@
+#include "CameraArray.hpp"
+
+#include <Gfx/Graph/RenderList.hpp>
+#include <Gfx/Graph/SceneGPUState.hpp>
+
+#include <array>
+
+namespace Threedim
+{
+
+namespace
+{
+// Same face layout as operator()() — keep the two definitions in sync;
+// FlattenVisitor pulls scene_transform from the scene_spec emission,
+// ScenePreprocessor will (later) consume the raw slots here.
+struct Face
+{
+  float forward[3];
+  float up[3];
+};
+constexpr std::array<Face, 6> kFaces{{
+    {{ 1.f,  0.f,  0.f}, {0.f, -1.f,  0.f}},  // +X
+    {{-1.f,  0.f,  0.f}, {0.f, -1.f,  0.f}},  // -X
+    {{ 0.f,  1.f,  0.f}, {0.f,  0.f,  1.f}},  // +Y
+    {{ 0.f, -1.f,  0.f}, {0.f,  0.f, -1.f}},  // -Y
+    {{ 0.f,  0.f,  1.f}, {0.f, -1.f,  0.f}},  // +Z
+    {{ 0.f,  0.f, -1.f}, {0.f, -1.f,  0.f}},  // -Z
+}};
+}
+
+// Order invariant: called by GfxRenderer::initState BEFORE the first
+// operator()() and BEFORE processControlIn fires any rebuild() callback.
+// m_array_ref populated here is therefore safe to read in rebuild()
+// without a guard. Adding prepare() to this node breaks the invariant —
+// see CpuFilterNode.hpp for details.
+void CameraArray::init(
+    score::gfx::RenderList& r, QRhiResourceUpdateBatch& res)
+{
+  if(!raw_camera_slot.valid())
+  {
+    raw_camera_slot = r.registry().allocate(
+        score::gfx::GpuResourceRegistry::Arena::RawCamera,
+        6 * sizeof(score::gfx::RawCameraData));
+    m_array_ref = r.registry().toOssiaRef(raw_camera_slot);
+  }
+  if(raw_camera_slot.valid())
+  {
+    score::gfx::RawCameraData seed[6]{};
+    r.registry().updateSlot(res, raw_camera_slot, &seed, sizeof(seed));
+  }
+  if(!raw_transform_slot.valid())
+  {
+    raw_transform_slot = r.registry().allocate(
+        score::gfx::GpuResourceRegistry::Arena::RawTransform,
+        6 * sizeof(score::gfx::RawLocalTransform));
+    m_xform_array_ref = r.registry().toOssiaRef(raw_transform_slot);
+  }
+  if(raw_transform_slot.valid())
+  {
+    score::gfx::RawLocalTransform seed[6]{};
+    r.registry().updateSlot(res, raw_transform_slot, &seed, sizeof(seed));
+  }
+}
+
+void CameraArray::update(
+    score::gfx::RenderList& r, QRhiResourceUpdateBatch& res, score::gfx::Edge*)
+{
+  if(!raw_camera_slot.valid())
+    return;
+
+  const float eye[3]{
+      inputs.origin.value.x, inputs.origin.value.y, inputs.origin.value.z};
+  const float znear = inputs.near_plane.value;
+  const float zfar = inputs.far_plane.value;
+
+  score::gfx::RawCameraData raw[6]{};
+  for(int i = 0; i < 6; ++i)
+  {
+    raw[i].eye[0] = eye[0];
+    raw[i].eye[1] = eye[1];
+    raw[i].eye[2] = eye[2];
+    raw[i].target[0] = eye[0] + kFaces[i].forward[0];
+    raw[i].target[1] = eye[1] + kFaces[i].forward[1];
+    raw[i].target[2] = eye[2] + kFaces[i].forward[2];
+    raw[i].up[0] = kFaces[i].up[0];
+    raw[i].up[1] = kFaces[i].up[1];
+    raw[i].up[2] = kFaces[i].up[2];
+    raw[i].yfov = float(M_PI) / 2.f;  // 90° per face
+    raw[i].znear = znear;
+    raw[i].zfar = zfar;
+    raw[i].projection = 0u;  // perspective
+  }
+  r.registry().updateSlot(res, raw_camera_slot, &raw, sizeof(raw));
+
+  if(raw_transform_slot.valid())
+  {
+    // Per-face scene_transform local TRS: translation = origin;
+    // rotation from -forward via QQuaternion::fromDirection (same as
+    // the scene_spec emission path). scale = identity.
+    score::gfx::RawLocalTransform xforms[6]{};
+    for(int i = 0; i < 6; ++i)
+    {
+      xforms[i].translation[0] = eye[0];
+      xforms[i].translation[1] = eye[1];
+      xforms[i].translation[2] = eye[2];
+      QVector3D fwd(
+          kFaces[i].forward[0], kFaces[i].forward[1], kFaces[i].forward[2]);
+      QVector3D up(kFaces[i].up[0], kFaces[i].up[1], kFaces[i].up[2]);
+      QQuaternion q = QQuaternion::fromDirection(-fwd, up);
+      xforms[i].rotation[0] = q.x();
+      xforms[i].rotation[1] = q.y();
+      xforms[i].rotation[2] = q.z();
+      xforms[i].rotation[3] = q.scalar();
+      xforms[i].scale[0] = 1.f;
+      xforms[i].scale[1] = 1.f;
+      xforms[i].scale[2] = 1.f;
+    }
+    r.registry().updateSlot(
+        res, raw_transform_slot, &xforms, sizeof(xforms));
+  }
+}
+
+void CameraArray::release(score::gfx::RenderList& r)
+{
+  if(raw_camera_slot.valid())
+    r.registry().free(raw_camera_slot);
+  if(raw_transform_slot.valid())
+    r.registry().free(raw_transform_slot);
+  m_array_ref = {};
+  m_xform_array_ref = {};
+}
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/CameraArray.hpp b/src/plugins/score-plugin-threedim/Threedim/CameraArray.hpp
new file mode 100644
index 0000000000..c357282a19
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/CameraArray.hpp
@@ -0,0 +1,228 @@
+#pragma once
+#include <halp/controls.hpp>
+#include <halp/meta.hpp>
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <Gfx/Graph/GpuResourceRegistry.hpp>
+#include <Gfx/Graph/SceneGPUState.hpp>  // sizeof(score::gfx::RawCameraData) in operator()()
+
+#include <QQuaternion>
+#include <QVector3D>
+
+#include <array>
+#include <cmath>
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+class QRhiResourceUpdateBatch;
+
+namespace score::gfx
+{
+class RenderList;
+struct Edge;
+}
+
+namespace Threedim
+{
+
+// Scene-producing node that emits a six-camera array laid out for cubemap
+// / multiview rendering. Each camera is a scene_node with a
+// scene_transform + camera_component payload; ScenePreprocessor's flatten
+// visitor picks them up into FlatScene::cameras, and
+// packAndUploadCameras packs them into the Camera UBO aux-buffer on
+// Geometry Out. Multiview shaders (MULTIVIEW=6) then index camera[0..5]
+// via gl_ViewIndex.
+//
+// Face convention follows the GL cubemap layout:
+//   camera[0] = +X, [1] = -X, [2] = +Y, [3] = -Y, [4] = +Z, [5] = -Z
+// Each face uses a 90° square FOV with aspect 1:1 — consumers should
+// render into a cube render target at any square resolution.
+class CameraArray
+{
+public:
+  halp_meta(name, "Camera Array")
+  halp_meta(c_name, "camera_array_avnd")
+  halp_meta(category, "Visuals/3D/Scene")
+  halp_meta(authors, "ossia team")
+  halp_meta(uuid, "7a3e8d2f-1b94-4c6a-b7f5-8e2d0c1a4b93")
+
+  // Six GL-ordered cubemap faces at 90° FoV, aspect 1:1. Suitable as
+  // both a reflection probe array and a point-shadow cube array — the
+  // distinction is downstream (which render target / depth-only flag),
+  // not in the camera math here.
+  struct ins
+  {
+    // Port-driven rebuild: each control's update() callback fires
+    // CameraArray::rebuild() on change. operator()() republishes.
+    struct : halp::xyz_spinboxes_f32<"Origin", halp::range{-10000., 10000., 0.}>
+    { void update(CameraArray& n) { n.rebuild(); } } origin;
+    struct : halp::hslider_f32<"Near", halp::range{0.001, 10., 0.1}>
+    { void update(CameraArray& n) { n.rebuild(); } } near_plane;
+    struct : halp::hslider_f32<"Far", halp::range{1., 100000., 1000.}>
+    { void update(CameraArray& n) { n.rebuild(); } } far_plane;
+  } inputs;
+
+  struct outs
+  {
+    struct
+    {
+      halp_meta(name, "Scene");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_out;
+  } outputs;
+
+  // Canonical cubemap face orientations in the GL convention:
+  // { forward, up }. right = forward × up.
+  struct Face
+  {
+    float forward[3];
+    float up[3];
+  };
+
+  // Six deterministic ids rooted at this node's address — each face
+  // needs a stable, distinct scene_node_id so merge_scenes treats them
+  // as six separate cameras (same-id camera entries would collapse).
+  std::array<ossia::scene_node_id, 6> m_ids{};
+  std::shared_ptr<ossia::scene_state> m_state;
+  int64_t m_version{0};
+  uint8_t m_pending_dirty{ossia::scene_port::dirty_transform};
+
+  void rebuild()
+  {
+    if(!m_state)
+    {
+      m_state = std::make_shared<ossia::scene_state>();
+      // Seed six distinct ids from this node's address. OR the per-face
+      // index in so they're all non-zero AND all distinct.
+      const auto base = reinterpret_cast<std::uintptr_t>(this);
+      for(int i = 0; i < 6; ++i)
+        m_ids[std::size_t(i)].value = (base ^ (std::uintptr_t(i + 1) << 1)) | 0x1u;
+    }
+
+    static constexpr std::array<Face, 6> kFaces{{
+        {{ 1.f,  0.f,  0.f}, {0.f, -1.f,  0.f}},  // +X
+        {{-1.f,  0.f,  0.f}, {0.f, -1.f,  0.f}},  // -X
+        {{ 0.f,  1.f,  0.f}, {0.f,  0.f,  1.f}},  // +Y
+        {{ 0.f, -1.f,  0.f}, {0.f,  0.f, -1.f}},  // -Y
+        {{ 0.f,  0.f,  1.f}, {0.f, -1.f,  0.f}},  // +Z
+        {{ 0.f,  0.f, -1.f}, {0.f, -1.f,  0.f}},  // -Z
+    }};
+
+    const float near_f = inputs.near_plane.value;
+    const float far_f = inputs.far_plane.value;
+    const float eye[3]
+        = {inputs.origin.value.x, inputs.origin.value.y,
+           inputs.origin.value.z};
+
+    auto roots
+        = std::make_shared<std::vector<ossia::scene_node_ptr>>();
+    roots->reserve(6);
+
+    for(int i = 0; i < 6; ++i)
+    {
+      auto cam = std::make_shared<ossia::camera_component>();
+      cam->projection = ossia::camera_projection::perspective;
+      cam->yfov = float(M_PI) / 2.f;  // 90° per face for a seamless cube
+      cam->aspect_ratio = 1.f;
+      cam->znear = near_f;
+      cam->zfar = far_f;
+      // Each face owns one RawCameraData inside our single 6-wide slot.
+      // Stamp a derived ref with the face's offset — same arena /
+      // internal_index / generation, offset bumped by i entries.
+      if(m_array_ref.valid())
+      {
+        cam->raw_slot = m_array_ref;
+        cam->raw_slot.offset = m_array_ref.offset
+            + uint32_t(i * sizeof(score::gfx::RawCameraData));
+        cam->raw_slot.size = uint32_t(sizeof(score::gfx::RawCameraData));
+      }
+
+      ossia::scene_transform xform;
+      xform.translation[0] = eye[0];
+      xform.translation[1] = eye[1];
+      xform.translation[2] = eye[2];
+
+      // Same rationale as Camera.hpp: Qt's QQuaternion::fromDirection
+      // maps local +Z to `direction`, but GL cameras look along local -Z
+      // — pass the negated forward so local -Z ends up pointing along
+      // +forward (the face-direction).
+      QVector3D fwd(
+          kFaces[std::size_t(i)].forward[0], kFaces[std::size_t(i)].forward[1],
+          kFaces[std::size_t(i)].forward[2]);
+      QVector3D up(
+          kFaces[std::size_t(i)].up[0], kFaces[std::size_t(i)].up[1],
+          kFaces[std::size_t(i)].up[2]);
+      QQuaternion q = QQuaternion::fromDirection(-fwd, up);
+      xform.rotation[0] = q.x();
+      xform.rotation[1] = q.y();
+      xform.rotation[2] = q.z();
+      xform.rotation[3] = q.scalar();
+      xform.scale[0] = 1.f;
+      xform.scale[1] = 1.f;
+      xform.scale[2] = 1.f;
+      // Per-face RawTransform slot ref — same shape as the camera
+      // array ref, offset bumped to the i-th RawLocalTransform slot.
+      if(m_xform_array_ref.valid())
+      {
+        xform.raw_slot = m_xform_array_ref;
+        xform.raw_slot.offset = m_xform_array_ref.offset
+            + uint32_t(i * sizeof(score::gfx::RawLocalTransform));
+        xform.raw_slot.size
+            = uint32_t(sizeof(score::gfx::RawLocalTransform));
+      }
+
+      auto children
+          = std::make_shared<std::vector<ossia::scene_payload>>();
+      children->push_back(xform);
+      children->push_back(ossia::camera_component_ptr(std::move(cam)));
+
+      auto node = std::make_shared<ossia::scene_node>();
+      node->id = m_ids[std::size_t(i)];
+      node->children = std::move(children);
+
+      roots->push_back(std::move(node));
+    }
+
+    m_state->roots = std::move(roots);
+    // Face 0 (+X) acts as the "active" camera for non-multiview consumers
+    // that only read the first entry. Multiview shaders ignore this and
+    // index all six via gl_ViewIndex.
+    m_state->active_camera_id = m_ids[0];
+    m_version++;
+    m_state->version = m_version;
+    m_pending_dirty = ossia::scene_port::dirty_transform;
+  }
+
+  void operator()()
+  {
+    if(!m_state)
+      rebuild();
+    outputs.scene_out.scene.state = m_state;
+    outputs.scene_out.dirty = m_pending_dirty;
+    m_pending_dirty = 0;
+  }
+
+  // Render-thread hooks. A single RawCamera slot holds all six faces
+  // contiguously (6 × RawCameraData). The preprocessor will later
+  // consume this slot and compose view/projection matrices for each
+  // face with the target's aspect (1:1 for the cubemap case).
+  void init(score::gfx::RenderList& r, QRhiResourceUpdateBatch& res);
+  void update(
+      score::gfx::RenderList& r, QRhiResourceUpdateBatch& res,
+      score::gfx::Edge* e);
+  void release(score::gfx::RenderList& r);
+
+  score::gfx::GpuResourceRegistry::Slot raw_camera_slot;
+  score::gfx::GpuResourceRegistry::Slot raw_transform_slot;
+
+  // Ossia-facing base refs for our 6-wide RawCamera + 6-wide
+  // RawTransform slots. Each emitted camera_component / scene_transform
+  // gets these refs with its per-face offset bumped.
+  ossia::gpu_slot_ref m_array_ref{};
+  ossia::gpu_slot_ref m_xform_array_ref{};
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/CameraSwitch.hpp b/src/plugins/score-plugin-threedim/Threedim/CameraSwitch.hpp
new file mode 100644
index 0000000000..a7ba217a54
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/CameraSwitch.hpp
@@ -0,0 +1,346 @@
+#pragma once
+#include <halp/controls.hpp>
+#include <halp/meta.hpp>
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <QQuaternion>
+#include <QVector3D>
+
+#include <cmath>
+#include <cstdint>
+#include <memory>
+
+namespace Threedim
+{
+
+// 4-way camera switch + weighted blender.
+//
+// Operates at the scene_spec level like SceneSwitch but specialised to a
+// single purpose: select or blend between up to 4 Camera producers. Each
+// input is expected to be the output of a `Threedim::Camera` node (or any
+// scene_spec whose first root carries a scene_transform + camera_component
+// pair).
+//
+// Modes:
+//   - Select: the `index` parameter picks one of the four inputs; the other
+//             three are ignored. Equivalent to dropping SceneSwitch in front
+//             of a camera, but avoids the caveat that non-camera scene data
+//             from the unselected inputs would get dropped too.
+//   - Blend : the `weights` (x,y,z,w) parameter linearly blends the
+//             positions + FOV + near/far of the four inputs, normalise-lerps
+//             (nlerp) the orientation quaternions. Weights are auto-
+//             normalised to sum=1 internally — users can pass raw
+//             envelopes / LFO outputs directly.
+//
+// Blend semantics chosen to match what TD's Camera Blend COMP does
+// conceptually: treat each input camera as a "keyframe pose" and produce
+// a smooth in-between. nlerp is fine for small angular deltas; when you
+// need great-circle blending across wide angles, upgrade to slerp (two
+// slerps for 4-way is the standard recipe).
+//
+// Unwired inputs fall back to a zero-weight contribution. When all wired
+// inputs have zero effective weight the output is empty.
+class CameraSwitch
+{
+public:
+  halp_meta(name, "Camera Switch")
+  halp_meta(category, "Visuals/3D/Scene")
+  halp_meta(c_name, "camera_switch")
+  halp_meta(authors, "ossia team")
+  halp_meta(
+      manual_url,
+      "https://ossia.io/score-docs/processes/camera-switch.html")
+  halp_meta(uuid, "d1e8c4b7-6a32-4f9e-b5d8-2c4f3a1e8b6d")
+
+  struct ins
+  {
+    struct
+    {
+      halp_meta(name, "Camera 0");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } cam0;
+    struct
+    {
+      halp_meta(name, "Camera 1");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } cam1;
+    struct
+    {
+      halp_meta(name, "Camera 2");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } cam2;
+    struct
+    {
+      halp_meta(name, "Camera 3");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } cam3;
+
+    enum CameraMode
+    {
+      Select,
+      Blend
+    };
+    // Port-driven rebuild: controls trigger CameraSwitch::rebuild().
+    // Upstream camera-input changes are detected in operator()().
+    struct Mode : halp::enum_t<CameraMode, "Mode">
+    {
+      struct range
+      {
+        std::string_view values[2]{"Select", "Blend"};
+        CameraMode init{Select};
+      };
+      void update(CameraSwitch& n) { n.rebuild(); }
+    } mode;
+
+    struct : halp::spinbox_i32<"Index", halp::irange{0, 3, 0}>
+    { void update(CameraSwitch& n) { n.rebuild(); } } index;
+
+    // Four-channel blend weights. Negative values are clamped to zero.
+    struct : halp::xyzw_spinboxes_f32<"Weights", halp::range{-10000., 10000., 0.}>
+    { void update(CameraSwitch& n) { n.rebuild(); } } weights;
+  } inputs;
+
+  struct outs
+  {
+    struct
+    {
+      halp_meta(name, "Scene Out");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_out;
+  } outputs;
+
+  // Stable id for the synthesised camera in Blend mode. One id kept for the
+  // whole life of the node so downstream preprocessor logic treats frames
+  // as updates to the same camera rather than a sequence of add/remove
+  // events.
+  ossia::scene_node_id m_id{};
+  std::shared_ptr<ossia::scene_state> m_state;
+  int64_t m_version{0};
+  uint8_t m_pending_dirty{ossia::scene_port::dirty_transform};
+  // Cached upstream identity for detecting scene_in pointer/version
+  // changes from within the new 5-line operator()() republish path.
+  const ossia::scene_state* m_cached_cam_state[4]{};
+  int64_t m_cached_cam_ver[4]{-1, -1, -1, -1};
+
+  // Locate the first (scene_transform, camera_component) pair in a scene.
+  // Returns false if the input has no camera (or is empty).
+  static bool extractCameraPose(
+      const ossia::scene_spec& in, ossia::scene_transform& xform,
+      ossia::camera_component& cam)
+  {
+    if(!in.state || !in.state->roots || in.state->roots->empty())
+      return false;
+    const auto& root = (*in.state->roots)[0];
+    if(!root || !root->children)
+      return false;
+
+    bool gotXform = false;
+    bool gotCam   = false;
+    for(const auto& child : *root->children)
+    {
+      if(auto* t = ossia::get_if<ossia::scene_transform>(&child))
+      {
+        xform = *t;
+        gotXform = true;
+      }
+      else if(auto* c = ossia::get_if<ossia::camera_component_ptr>(&child))
+      {
+        if(*c)
+        {
+          cam = **c;
+          gotCam = true;
+        }
+      }
+    }
+    return gotXform && gotCam;
+  }
+
+  void rebuild()
+  {
+    const int mode = inputs.mode.value;
+    if(mode == ins::CameraMode::Select)
+    {
+      // Select-mode: operator()() forwards the picked upstream
+      // scene_spec directly; rebuild() just marks pending dirty so
+      // downstream sees a transition event.
+      m_pending_dirty = 0xFF;
+      return;
+    }
+
+    // Blend mode.
+    float w[4]{
+        inputs.weights.value.x, inputs.weights.value.y,
+        inputs.weights.value.z, inputs.weights.value.w};
+    for(float& x : w) x = x > 0.f ? x : 0.f;
+
+    const ossia::scene_spec* inputsArr[4]{
+        &inputs.cam0.scene, &inputs.cam1.scene,
+        &inputs.cam2.scene, &inputs.cam3.scene};
+
+    // Extract each input's pose; zero the weight of any missing one.
+    ossia::scene_transform xforms[4]{};
+    ossia::camera_component cams[4]{};
+    float effWeights[4]{};
+    float wsum = 0.f;
+    for(int i = 0; i < 4; ++i)
+    {
+      if(w[i] <= 0.f) continue;
+      if(!extractCameraPose(*inputsArr[i], xforms[i], cams[i]))
+        continue;
+      effWeights[i] = w[i];
+      wsum += w[i];
+    }
+
+    if(wsum <= 1e-6f)
+    {
+      // No wired-and-weighted camera to blend — emit empty.
+      if(m_state)
+      {
+        m_state->roots.reset();
+        m_state->active_camera_id = {};
+        m_version++;
+        m_state->version = m_version;
+      }
+      // Bump dirty so consumers (preprocessor cache, downstream
+      // SceneSelector) detect the empty-state transition. Without
+      // this they'd see the same shared_ptr identity + stale
+      // version + dirty=0 and keep rendering last frame's blend.
+      m_pending_dirty = 0xFF;
+      return;
+    }
+    for(float& x : effWeights) x /= wsum;
+
+    // Blend transform: translation is weighted sum; rotation is nlerp
+    // (weighted sum of quaternions, then normalise); scale is weighted sum.
+    ossia::scene_transform outX{};
+    QQuaternion qSum(0, 0, 0, 0);
+    for(int i = 0; i < 4; ++i)
+    {
+      if(effWeights[i] <= 0.f) continue;
+      const float wi = effWeights[i];
+      outX.translation[0] += xforms[i].translation[0] * wi;
+      outX.translation[1] += xforms[i].translation[1] * wi;
+      outX.translation[2] += xforms[i].translation[2] * wi;
+      outX.scale[0]       += xforms[i].scale[0] * wi;
+      outX.scale[1]       += xforms[i].scale[1] * wi;
+      outX.scale[2]       += xforms[i].scale[2] * wi;
+
+      // Quaternion double-cover handling: flip the sign of later quats if
+      // they point away from the running sum, to avoid interpolating the
+      // long way around.
+      QQuaternion qi(
+          xforms[i].rotation[3], xforms[i].rotation[0],
+          xforms[i].rotation[1], xforms[i].rotation[2]);
+      if(QQuaternion::dotProduct(qSum, qi) < 0.f)
+        qi = -qi;
+      qSum += qi * wi;
+    }
+    qSum.normalize();
+    outX.rotation[0] = qSum.x();
+    outX.rotation[1] = qSum.y();
+    outX.rotation[2] = qSum.z();
+    outX.rotation[3] = qSum.scalar();
+
+    // Blend camera parameters.
+    ossia::camera_component outCam{};
+    outCam.projection = cams[0].projection; // projection mode not blendable
+    for(int i = 0; i < 4; ++i)
+    {
+      if(effWeights[i] <= 0.f) continue;
+      const float wi = effWeights[i];
+      outCam.yfov         += cams[i].yfov         * wi;
+      outCam.aspect_ratio += cams[i].aspect_ratio * wi;
+      outCam.xmag         += cams[i].xmag         * wi;
+      outCam.ymag         += cams[i].ymag         * wi;
+      outCam.znear        += cams[i].znear        * wi;
+      outCam.zfar         += cams[i].zfar         * wi;
+      outCam.physical.focal_length   += cams[i].physical.focal_length   * wi;
+      outCam.physical.focus_distance += cams[i].physical.focus_distance * wi;
+      outCam.physical.fstop          += cams[i].physical.fstop          * wi;
+    }
+
+    // Build the output scene_spec.
+    if(!m_state)
+    {
+      m_state = std::make_shared<ossia::scene_state>();
+      m_id.value = reinterpret_cast<std::uintptr_t>(this) | 0x1u;
+    }
+
+    auto camPtr = std::make_shared<ossia::camera_component>(std::move(outCam));
+    auto children = std::make_shared<std::vector<ossia::scene_payload>>();
+    children->push_back(outX);
+    children->push_back(ossia::camera_component_ptr(std::move(camPtr)));
+
+    auto node = std::make_shared<ossia::scene_node>();
+    node->id = m_id;
+    node->children = std::move(children);
+
+    auto roots = std::make_shared<std::vector<ossia::scene_node_ptr>>();
+    roots->push_back(std::move(node));
+
+    m_state->roots = std::move(roots);
+    m_state->active_camera_id = m_id;
+    m_version++;
+    m_state->version = m_version;
+    m_pending_dirty = ossia::scene_port::dirty_transform;
+  }
+
+  void operator()()
+  {
+    // Detect upstream camera-input pointer/version changes so a
+    // scene_in that changed without a local control event still causes
+    // a rebuild. Controls themselves trigger rebuild via their
+    // update() callbacks.
+    const ossia::scene_spec* cams[4]{
+        &inputs.cam0.scene, &inputs.cam1.scene,
+        &inputs.cam2.scene, &inputs.cam3.scene};
+    bool upstream_changed = false;
+    for(int i = 0; i < 4; ++i)
+    {
+      const auto* s = cams[i]->state.get();
+      const int64_t v = s ? s->version : -1;
+      if(m_cached_cam_state[i] != s || m_cached_cam_ver[i] != v)
+      {
+        upstream_changed = true;
+        m_cached_cam_state[i] = s;
+        m_cached_cam_ver[i] = v;
+      }
+    }
+
+    if(inputs.mode.value == ins::CameraMode::Select)
+    {
+      // Forward the picked upstream scene directly — no local
+      // shared_ptr identity to preserve beyond what upstream already
+      // maintains.
+      const int idx = inputs.index.value;
+      const ossia::scene_spec* picked = nullptr;
+      switch(idx)
+      {
+        case 0: picked = &inputs.cam0.scene; break;
+        case 1: picked = &inputs.cam1.scene; break;
+        case 2: picked = &inputs.cam2.scene; break;
+        case 3: picked = &inputs.cam3.scene; break;
+        default: picked = &inputs.cam0.scene; break;
+      }
+      outputs.scene_out.scene.state = picked->state;
+      outputs.scene_out.dirty
+          = (upstream_changed && picked->state) ? 0xFF : 0;
+      m_pending_dirty = 0;
+      return;
+    }
+
+    if(!m_state || upstream_changed)
+      rebuild();
+    outputs.scene_out.scene.state = m_state;
+    outputs.scene_out.dirty = m_pending_dirty;
+    m_pending_dirty = 0;
+  }
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/ConfigurePrimitive.cpp b/src/plugins/score-plugin-threedim/Threedim/ConfigurePrimitive.cpp
new file mode 100644
index 0000000000..6dbafa8e4c
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/ConfigurePrimitive.cpp
@@ -0,0 +1,236 @@
+#include "ConfigurePrimitive.hpp"
+
+#include <algorithm>
+
+namespace Threedim
+{
+
+namespace
+{
+
+// Minimal glob matcher — shared logic with SceneGraphFilter.cpp, but
+// duplicated here to avoid pulling that TU's anonymous-namespace
+// contents. Move to a shared header if a third node needs it.
+bool configure_glob_match(std::string_view pattern, std::string_view text) noexcept
+{
+  std::size_t pi = 0, ti = 0;
+  std::size_t star_pi = std::string_view::npos;
+  std::size_t star_ti = 0;
+  bool star_double = false;
+
+  while(ti < text.size())
+  {
+    if(pi < pattern.size())
+    {
+      char pc = pattern[pi];
+      if(pc == '*')
+      {
+        star_double = (pi + 1 < pattern.size() && pattern[pi + 1] == '*');
+        pi += star_double ? 2 : 1;
+        star_pi = pi;
+        star_ti = ti;
+        continue;
+      }
+      if(pc == '?' && text[ti] != '/')
+      {
+        ++pi;
+        ++ti;
+        continue;
+      }
+      if(pc == text[ti])
+      {
+        ++pi;
+        ++ti;
+        continue;
+      }
+    }
+    if(star_pi != std::string_view::npos)
+    {
+      if(!star_double && text[star_ti] == '/')
+        return false;
+      pi = star_pi;
+      ++star_ti;
+      ti = star_ti;
+      continue;
+    }
+    return false;
+  }
+  while(pi < pattern.size() && pattern[pi] == '*')
+    ++pi;
+  return pi == pattern.size();
+}
+
+bool configure_any_match(
+    const std::vector<std::string>& pats, std::string_view text) noexcept
+{
+  for(const auto& p : pats)
+    if(configure_glob_match(p, text))
+      return true;
+  return false;
+}
+
+struct PrimitiveWalker
+{
+  ConfigurePrimitive::Mode mode;
+  const std::vector<std::string>& paths;
+
+  // Returns the updated node. Shares the original shared_ptr when no
+  // descendant needed a change, so pointer identity is preserved for
+  // un-touched branches (keeps downstream caches warm).
+  ossia::scene_node_ptr
+  rewrite(const ossia::scene_node_ptr& src, const std::string& path) const
+  {
+    if(!src)
+      return src;
+
+    const bool matches = configure_any_match(paths, path);
+    bool need_self_update = false;
+    bool new_active = src->active;
+    bool new_visible = src->visible;
+
+    if(matches)
+    {
+      switch(mode)
+      {
+        case ConfigurePrimitive::SetActive:
+          new_active = true;
+          break;
+        case ConfigurePrimitive::SetInactive:
+          new_active = false;
+          break;
+        case ConfigurePrimitive::SetVisible:
+          new_visible = true;
+          break;
+        case ConfigurePrimitive::SetInvisible:
+          new_visible = false;
+          break;
+        case ConfigurePrimitive::SetActiveAndVisible:
+          new_active = true;
+          new_visible = true;
+          break;
+        case ConfigurePrimitive::SetInactiveAndInvisible:
+          new_active = false;
+          new_visible = false;
+          break;
+      }
+      need_self_update
+          = (new_active != src->active) || (new_visible != src->visible);
+    }
+
+    if(!src->has_children())
+    {
+      if(!need_self_update)
+        return src;
+      auto copy = std::make_shared<ossia::scene_node>(*src);
+      copy->active = new_active;
+      copy->visible = new_visible;
+      copy->dirty_index = src->dirty_index + 1;
+      return copy;
+    }
+
+    auto new_children
+        = std::make_shared<std::vector<ossia::scene_payload>>();
+    new_children->reserve(src->children->size());
+    bool any_child_changed = false;
+    for(const auto& payload : *src->children)
+    {
+      if(auto* sub = ossia::get_if<ossia::scene_node_ptr>(&payload))
+      {
+        if(!*sub)
+        {
+          new_children->push_back(payload);
+          continue;
+        }
+        std::string childPath = path + '/' + (*sub)->name;
+        auto rw = rewrite(*sub, childPath);
+        if(rw.get() != sub->get())
+          any_child_changed = true;
+        new_children->push_back(rw ? rw : *sub);
+      }
+      else
+      {
+        new_children->push_back(payload);
+      }
+    }
+
+    if(!need_self_update && !any_child_changed)
+      return src;
+
+    auto copy = std::make_shared<ossia::scene_node>(*src);
+    copy->active = new_active;
+    copy->visible = new_visible;
+    copy->children = std::move(new_children);
+    copy->dirty_index = src->dirty_index + 1;
+    return copy;
+  }
+};
+
+} // namespace
+
+void ConfigurePrimitive::rebuild()
+{
+  const auto& in = inputs.scene_in.scene;
+  if(!in.state)
+  {
+    m_cached_out.reset();
+    m_pending_dirty = 0;
+    return;
+  }
+
+  const auto* in_state = in.state.get();
+  const int64_t in_version = in.state->version;
+
+  // Empty pattern list = no-op passthrough. Skip the walk entirely.
+  if(inputs.paths.value.empty())
+  {
+    m_cached_out = in.state;
+    m_cached_in_state = in_state;
+    m_cached_in_version = in_version;
+    m_cached_mode = inputs.mode.value;
+    m_cached_paths = inputs.paths.value;
+    m_pending_dirty = 0xFF;
+    return;
+  }
+
+  PrimitiveWalker w{Mode(inputs.mode.value), inputs.paths.value};
+  auto new_roots
+      = std::make_shared<std::vector<ossia::scene_node_ptr>>();
+  if(in.state->roots)
+  {
+    new_roots->reserve(in.state->roots->size());
+    for(const auto& r : *in.state->roots)
+    {
+      if(auto rw = w.rewrite(r, r ? ("/" + r->name) : std::string{}))
+        new_roots->push_back(std::move(rw));
+    }
+  }
+
+  auto new_state = std::make_shared<ossia::scene_state>(*in.state);
+  new_state->roots = std::move(new_roots);
+  new_state->version = ++m_version_counter;
+  new_state->dirty_index = in.state->dirty_index + 1;
+
+  m_cached_out = new_state;
+  m_cached_in_state = in_state;
+  m_cached_in_version = in_version;
+  m_cached_mode = inputs.mode.value;
+  m_cached_paths = inputs.paths.value;
+  m_pending_dirty = 0xFF;
+}
+
+void ConfigurePrimitive::operator()()
+{
+  // Detect upstream scene_in pointer/version change and rebuild.
+  // Control changes come through their update() callbacks.
+  const auto* in_state = inputs.scene_in.scene.state.get();
+  const int64_t in_version = in_state ? in_state->version : -1;
+  const bool upstream_changed
+      = m_cached_in_state != in_state || m_cached_in_version != in_version;
+  if(upstream_changed || (!m_cached_out && in_state))
+    rebuild();
+  outputs.scene_out.scene.state = m_cached_out;
+  outputs.scene_out.dirty = m_pending_dirty;
+  m_pending_dirty = 0;
+}
+
+} // namespace Threedim
diff --git a/src/plugins/score-plugin-threedim/Threedim/ConfigurePrimitive.hpp b/src/plugins/score-plugin-threedim/Threedim/ConfigurePrimitive.hpp
new file mode 100644
index 0000000000..508112f65a
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/ConfigurePrimitive.hpp
@@ -0,0 +1,108 @@
+#pragma once
+#include <halp/controls.hpp>
+#include <halp/meta.hpp>
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace Threedim
+{
+
+// Authors metadata flags on matching scene_nodes: active, visible.
+// Matches Solaris's "Configure Primitive" LOP, trimmed to the flags
+// that are meaningful for a live renderer. (USD also has `kind` and
+// `purpose` fields; we can add those later if needed — for now they
+// don't change rendering behaviour.)
+//
+// Usage pattern:
+//   glTF → ConfigurePrimitive(paths=["*/chairs/*"], active=false) → ScenePreprocessor
+// disables the entire `chairs` subtree non-destructively — flipping
+// the toggle re-activates it without reloading the glTF or rebuilding
+// any GPU state.
+//
+// `visible` acts at the leaf level (hides from rendering but keeps the
+// subtree composed); `active` is stronger (skips the subtree in the
+// flatten walk entirely — no transforms applied, no data uploaded).
+class ConfigurePrimitive
+{
+public:
+  halp_meta(name, "Configure Primitive")
+  halp_meta(category, "Visuals/3D/Scene")
+  halp_meta(c_name, "configure_primitive")
+  halp_meta(authors, "ossia team")
+  halp_meta(
+      manual_url,
+      "https://ossia.io/score-docs/processes/configure-primitive.html")
+  halp_meta(uuid, "4b8e9d2a-7c5f-4e3a-9b1c-3d2f5e8a7b9c")
+
+  enum Mode
+  {
+    // Applies the flags to every matching node. Non-matching nodes
+    // keep their existing flags (no change).
+    SetActive,
+    SetInactive,
+    SetVisible,
+    SetInvisible,
+    // Apply both at once — useful for "this subtree is off right now".
+    SetActiveAndVisible,
+    SetInactiveAndInvisible
+  };
+
+  struct ins
+  {
+    struct
+    {
+      halp_meta(name, "Scene In");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_in;
+
+    // Port-driven rebuild: controls trigger rebuild() via update().
+    // scene_in pointer/version changes detected in operator()().
+    struct : halp::combobox_t<"Mode", Mode>
+    {
+      struct range
+      {
+        std::string_view values[6]{
+            "Set active",    "Set inactive",
+            "Set visible",   "Set invisible",
+            "Active + visible", "Inactive + invisible"};
+        int init{0};
+      };
+      void update(ConfigurePrimitive& n) { n.rebuild(); }
+    } mode;
+
+    // Path-glob list. Same syntax as SceneGraphFilter: `*` wildcards
+    // within a segment, `**` crosses slashes, `?` single char, literal
+    // names otherwise.
+    struct : halp::val_port<"Paths", std::vector<std::string>>
+    { void update(ConfigurePrimitive& n) { n.rebuild(); } } paths;
+  } inputs;
+
+  struct outs
+  {
+    struct
+    {
+      halp_meta(name, "Scene Out");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_out;
+  } outputs;
+
+  void rebuild();
+  void operator()();
+
+  std::shared_ptr<const ossia::scene_state> m_cached_out;
+  uint8_t m_pending_dirty{0xFF};
+  const ossia::scene_state* m_cached_in_state{};
+  int64_t m_cached_in_version{-1};
+  int m_cached_mode{-1};
+  std::vector<std::string> m_cached_paths;
+  int64_t m_version_counter{0};
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/CreateCollection.cpp b/src/plugins/score-plugin-threedim/Threedim/CreateCollection.cpp
new file mode 100644
index 0000000000..a340cabdea
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/CreateCollection.cpp
@@ -0,0 +1,80 @@
+#include "CreateCollection.hpp"
+
+namespace Threedim
+{
+
+void CreateCollection::rebuild()
+{
+  const auto& in = inputs.scene_in.scene;
+  const ossia::scene_state* in_state = in.state.get();
+  const int64_t in_version = in_state ? in_state->version : -1;
+
+  const auto& cur_name = inputs.name.value;
+  const auto& cur_paths = inputs.paths.value;
+  const auto& cur_tags = inputs.tags.value;
+
+  m_cached_in_state = in_state;
+  m_cached_in_version = in_version;
+
+  // An empty name is a no-op — pass the input through so the node is
+  // safe to wire in even before the user fills in the Name field.
+  if(cur_name.empty() || cur_paths.empty())
+  {
+    m_cached_out = in.state;
+    m_pending_dirty = 0xFF;
+    return;
+  }
+
+  auto coll = std::make_shared<ossia::scene_collection>();
+  coll->name = cur_name;
+  for(const auto& p : cur_paths)
+    coll->paths.push_back(p);
+  for(const auto& t : cur_tags)
+    coll->tags.push_back(t);
+
+  // Rebuild the collections vector: copy existing entries whose name
+  // doesn't collide with ours (overwriting duplicates keeps the
+  // interaction model simple — each CreateCollection "owns" its name),
+  // then append the new one.
+  auto merged = std::make_shared<std::vector<ossia::scene_collection_ptr>>();
+  if(in_state && in_state->collections)
+  {
+    for(const auto& c : *in_state->collections)
+      if(c && c->name != cur_name)
+        merged->push_back(c);
+  }
+  merged->push_back(std::move(coll));
+
+  auto state = std::make_shared<ossia::scene_state>();
+  if(in_state)
+  {
+    state->roots = in_state->roots;
+    state->materials = in_state->materials;
+    state->animations = in_state->animations;
+    state->cameras = in_state->cameras;
+    state->skeletons = in_state->skeletons;
+    state->environment = in_state->environment;
+    state->active_camera_id = in_state->active_camera_id;
+  }
+  state->collections = std::move(merged);
+  state->version = ++m_version_counter;
+  state->dirty_index = m_version_counter;
+
+  m_cached_out = state;
+  m_pending_dirty = 0xFF;
+}
+
+void CreateCollection::operator()()
+{
+  const auto* in_state = inputs.scene_in.scene.state.get();
+  const int64_t in_version = in_state ? in_state->version : -1;
+  const bool upstream_changed
+      = m_cached_in_state != in_state || m_cached_in_version != in_version;
+  if(!m_cached_out || upstream_changed)
+    rebuild();
+  outputs.scene_out.scene.state = m_cached_out;
+  outputs.scene_out.dirty = m_pending_dirty;
+  m_pending_dirty = 0;
+}
+
+} // namespace Threedim
diff --git a/src/plugins/score-plugin-threedim/Threedim/CreateCollection.hpp b/src/plugins/score-plugin-threedim/Threedim/CreateCollection.hpp
new file mode 100644
index 0000000000..38a1055f92
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/CreateCollection.hpp
@@ -0,0 +1,79 @@
+#pragma once
+#include <halp/controls.hpp>
+#include <halp/meta.hpp>
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace Threedim
+{
+
+// Authors a named collection (Solaris-style reusable path set) and
+// stamps it onto the passthrough scene_spec's collections vector.
+//
+// Collections are addressable by name anywhere downstream — a consumer
+// node that takes a collection name (e.g. a future SceneGraphFilter
+// "by collection" mode) resolves the paths at consume-time. This
+// decouples "what is the set of things I care about?" from "what am I
+// doing to them?" — the classic Solaris LIVRPS composition win.
+//
+// Multiple CreateCollection nodes can chain: each contributes its own
+// named collection to the scene, and downstream consumers can pick any
+// of them by name. merge_scenes concatenates collections additively
+// across multi-producer merges.
+class CreateCollection
+{
+public:
+  halp_meta(name, "Create Collection")
+  halp_meta(category, "Visuals/3D/Scene")
+  halp_meta(c_name, "create_collection")
+  halp_meta(authors, "ossia team")
+  halp_meta(
+      manual_url,
+      "https://ossia.io/score-docs/processes/create-collection.html")
+  halp_meta(uuid, "6c2e9b7a-4d3f-4a1c-8f5e-2b7d9e4c3a1f")
+
+  struct ins
+  {
+    struct
+    {
+      halp_meta(name, "Scene In");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_in;
+
+    // Port-driven rebuild: controls trigger rebuild(); upstream
+    // scene_in changes detected in operator()().
+    struct : halp::lineedit<"Name", "">
+    { void update(CreateCollection& n) { n.rebuild(); } } name;
+    struct : halp::val_port<"Paths", std::vector<std::string>>
+    { void update(CreateCollection& n) { n.rebuild(); } } paths;
+    struct : halp::val_port<"Tags", std::vector<std::string>>
+    { void update(CreateCollection& n) { n.rebuild(); } } tags;
+  } inputs;
+
+  struct outs
+  {
+    struct
+    {
+      halp_meta(name, "Scene Out");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_out;
+  } outputs;
+
+  void rebuild();
+  void operator()();
+
+  std::shared_ptr<const ossia::scene_state> m_cached_out;
+  uint8_t m_pending_dirty{0xFF};
+  const ossia::scene_state* m_cached_in_state{};
+  int64_t m_cached_in_version{-1};
+  int64_t m_version_counter{0};
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/CubemapComposer.hpp b/src/plugins/score-plugin-threedim/Threedim/CubemapComposer.hpp
index c5783a3907..c6f4fcaf43 100644
--- a/src/plugins/score-plugin-threedim/Threedim/CubemapComposer.hpp
+++ b/src/plugins/score-plugin-threedim/Threedim/CubemapComposer.hpp
@@ -6,8 +6,13 @@
 
 #include <Gfx/Graph/RenderList.hpp>
 
+#include <ossia/dataflow/geometry_port.hpp>
+
 #include <QtGui/private/qrhi_p.h>
 
+#include <cstdint>
+#include <memory>
+
 namespace Threedim
 {
 
@@ -34,12 +39,50 @@ class CubemapComposer
 
   struct
   {
-    halp::gpu_texture_output<"Cubemap"> cubemap;
+    halp::gpu_cubemap_output<"Cubemap"> cubemap;
+    // Scene-graph route: emits a scene_spec whose environment.skybox_texture
+    // points at our cube handle. See CubemapLoader for the same pattern.
+    struct
+    {
+      halp_meta(name, "Scene");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_out;
   } outputs;
 
+  // Per-face shape cache. Drives texture-recreation when face size changes.
+  // Content-change detection uses the producer's `changed` flag instead of
+  // a bytes-pointer compare — pointer identity missed in-place buffer
+  // updates (video readback into a ring buffer reuses the same pointer
+  // address, so the old fingerprint check stayed equal across content
+  // changes and the cube never re-uploaded).
+  struct FaceFingerprint
+  {
+    int width{0};
+    int height{0};
+  };
+
   QRhiTexture* m_cubemapTex{};
   int m_faceSize{0};
   bool m_dirty{true};
+  FaceFingerprint m_lastFaces[6]{};
+  std::shared_ptr<ossia::scene_state> m_sceneState;
+  int64_t m_sceneVersion{0};
+  void* m_lastPublishedHandle{};
+
+  // Dtor safety net — same rationale as CubemapLoader: guarantees the
+  // VkImage is deleteLater'd even if release(RenderList&) was skipped,
+  // so QRhi's destructor drains the pending-delete list before
+  // vkDestroyDevice. Without this, Vulkan validation reports a leaked
+  // VkImage on exit.
+  ~CubemapComposer()
+  {
+    if(m_cubemapTex)
+    {
+      m_cubemapTex->deleteLater();
+      m_cubemapTex = nullptr;
+    }
+  }
 
   void operator()() { }
 
@@ -52,9 +95,28 @@ class CubemapComposer
       score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res,
       score::gfx::Edge* e)
   {
-    // Determine face size from the largest input
+    // Determine face size from the largest input; detect content changes
+    // by reading the producer's `changed` flag (set by halp::texture's
+    // update() — see avendish texture_formats.hpp). Resetting `changed`
+    // to false after consumption keeps the next frame's check fresh.
+    // Size changes are tracked separately so a producer that resizes the
+    // face still triggers a texture recreation even when it forgot to
+    // toggle `changed`.
     int maxSize = 0;
-    auto checkFace = [&](const auto& tex) {
+    int faceIdx = 0;
+    auto checkFace = [&](auto& tex) {
+      FaceFingerprint cur{tex.texture.width, tex.texture.height};
+      const bool sizeChanged
+          = (cur.width != m_lastFaces[faceIdx].width
+             || cur.height != m_lastFaces[faceIdx].height);
+      const bool contentChanged = tex.texture.changed;
+      if(sizeChanged || contentChanged)
+      {
+        m_lastFaces[faceIdx] = cur;
+        m_dirty = true;
+      }
+      tex.texture.changed = false; // consumed; producer will set it on next update()
+      ++faceIdx;
       if(tex.texture.bytes && tex.texture.width > 0 && tex.texture.height > 0)
       {
         int s = std::max(tex.texture.width, tex.texture.height);
@@ -94,6 +156,22 @@ class CubemapComposer
       outputs.cubemap.texture.handle = m_cubemapTex;
       m_dirty = true;
     }
+
+    // Publish the cube on the Scene outlet (skybox_texture only — other
+    // environment fields are left for EnvironmentLoader / elsewhere to
+    // populate, merge_scenes overlays field-by-field).
+    if(!m_sceneState)
+      m_sceneState = std::make_shared<ossia::scene_state>();
+    if(m_lastPublishedHandle != m_cubemapTex)
+    {
+      m_sceneState->environment = {};
+      m_sceneState->environment.skybox_texture.native_handle = m_cubemapTex;
+      m_lastPublishedHandle = m_cubemapTex;
+      m_sceneVersion++;
+      m_sceneState->version = m_sceneVersion;
+      outputs.scene_out.scene.state = m_sceneState;
+      outputs.scene_out.dirty = ossia::scene_port::dirty_environment;
+    }
   }
 
   void release(score::gfx::RenderList& r)
@@ -105,6 +183,14 @@ class CubemapComposer
     }
     m_faceSize = 0;
     outputs.cubemap.texture.handle = nullptr;
+    if(m_sceneState)
+    {
+      m_sceneState->environment = {};
+      m_lastPublishedHandle = nullptr;
+      m_sceneVersion++;
+      m_sceneState->version = m_sceneVersion;
+      outputs.scene_out.dirty = ossia::scene_port::dirty_environment;
+    }
   }
 
   void runInitialPasses(
diff --git a/src/plugins/score-plugin-threedim/Threedim/CubemapLoader.cpp b/src/plugins/score-plugin-threedim/Threedim/CubemapLoader.cpp
index b3d596e42e..73b6d32297 100644
--- a/src/plugins/score-plugin-threedim/Threedim/CubemapLoader.cpp
+++ b/src/plugins/score-plugin-threedim/Threedim/CubemapLoader.cpp
@@ -2,17 +2,24 @@
 
 #include <Gfx/Graph/ShaderCache.hpp>
 
-#include <QFile>
-#include <QFileInfo>
+#include <QDebug>
 
 #include <cmath>
 
 namespace Threedim
 {
 
-// Fullscreen triangle vertex shader
+// Fullscreen triangle vertex shader. Applies clipSpaceCorrMatrix + the
+// non-GL conditional Y-flip — matches the engine-wide ossia convention
+// (see isf.cpp's vertexInitFunc). Guarantees v_texcoord.y=1 is the top
+// of the rendered face across GL / Vulkan / Metal / D3D.
 static const constexpr auto equirect_vs = R"_(#version 450
 
+layout(std140, binding = 0) uniform renderer_t {
+  mat4 clipSpaceCorrMatrix;
+  vec2 RENDERSIZE;
+} renderer;
+
 layout(location = 0) out vec2 v_texcoord;
 
 out gl_PerVertex { vec4 gl_Position; };
@@ -22,25 +29,39 @@ void main()
   // Fullscreen triangle
   vec2 pos = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2);
   v_texcoord = pos;
-  gl_Position = vec4(pos * 2.0 - 1.0, 0.0, 1.0);
+  gl_Position = renderer.clipSpaceCorrMatrix * vec4(pos * 2.0 - 1.0, 0.0, 1.0);
+#if defined(QSHADER_SPIRV) || defined(QSHADER_HLSL) || defined(QSHADER_MSL)
+  gl_Position.y = -gl_Position.y;
+#endif
 }
 )_";
 
-// Fragment shader: sample equirectangular map for a specific cubemap face
-// The face index is passed via UBO
+// Fragment shader: sample equirectangular map for a specific cubemap face.
+// renderer_t (binding 0) matches the engine convention; FaceInfo moves to
+// binding 2 so it doesn't collide.
 static const constexpr auto equirect_fs = R"_(#version 450
 
+layout(std140, binding = 0) uniform renderer_t {
+  mat4 clipSpaceCorrMatrix;
+  vec2 RENDERSIZE;
+} renderer;
+
 layout(location = 0) in vec2 v_texcoord;
 layout(location = 0) out vec4 fragColor;
 
-layout(std140, binding = 0) uniform FaceInfo {
+layout(std140, binding = 2) uniform FaceInfo {
   int faceIndex;
 } face;
 
-layout(binding = 1) uniform sampler2D equirectMap;
+layout(binding = 3) uniform sampler2D equirectMap;
 
 const float PI = 3.14159265358979323846;
 
+// Face direction — v_texcoord.y=1 is the TOP of the rendered face
+// (after the vertex stage's clipSpaceCorrMatrix + non-GL flip). This
+// maps to sampled UV.y=0 in QRhi's top-left-origin UV, which per cube
+// spec corresponds to cube-spec t=-1 → direction biased toward +Y.
+// Hence the signs on `v` (flipped vs. the legacy raw-NDC form).
 vec3 faceDirection(int faceIdx, vec2 uv)
 {
   // Map UV from [0,1] to [-1,1]
@@ -50,12 +71,12 @@ vec3 faceDirection(int faceIdx, vec2 uv)
   // QRhi cubemap face order: +X, -X, +Y, -Y, +Z, -Z
   switch(faceIdx)
   {
-    case 0: return vec3( 1.0,   -v,   -u); // +X
-    case 1: return vec3(-1.0,   -v,    u); // -X
-    case 2: return vec3(   u,  1.0,    v); // +Y
-    case 3: return vec3(   u, -1.0,   -v); // -Y
-    case 4: return vec3(   u,   -v,  1.0); // +Z
-    case 5: return vec3(  -u,   -v, -1.0); // -Z
+    case 0: return vec3( 1.0,    v,   -u); // +X
+    case 1: return vec3(-1.0,    v,    u); // -X
+    case 2: return vec3(   u,  1.0,   -v); // +Y
+    case 3: return vec3(   u, -1.0,    v); // -Y
+    case 4: return vec3(   u,    v,  1.0); // +Z
+    case 5: return vec3(  -u,    v, -1.0); // -Z
     default: return vec3(0.0);
   }
 }
@@ -64,44 +85,43 @@ void main()
 {
   vec3 dir = normalize(faceDirection(face.faceIndex, v_texcoord));
 
-  // Convert direction to equirectangular UV
-  float theta = atan(dir.z, dir.x);       // [-PI, PI]
-  float phi   = asin(clamp(dir.y, -1.0, 1.0)); // [-PI/2, PI/2]
+  // Convert direction to equirectangular UV.
+  // Longitude: atan2(z, x) ∈ [-π, π] → u ∈ [0, 1].
+  // Latitude:  asin(y)    ∈ [-π/2, π/2].
+  //
+  // Y flip: QRhi normalizes texture sampling to top-left-origin UV
+  // (UV.y = 0 at the top of the stored image — uniform across
+  // backends, see qrhi.cpp + QRhi::isYUpInFramebuffer). QImage
+  // uploads via uploadTexture(QImage) land scanline 0 at the
+  // texture's UV.y = 0, so sky (image top) is at UV.y = 0 and
+  // ground (image bottom) at UV.y = 1. The raw formula
+  // `v = phi/π + 0.5` would put sky at UV.y = 1 — wrong. Flip.
+  //
+  // LearnOpenGL uses the unflipped formula and works because GL's
+  // bottom-left-origin UV cancels the inversion — QRhi's top-left
+  // convention doesn't cancel it, so we flip explicitly.
+  //
+  // (Cube-face rendering side: this shader, like the rest of the
+  // IBL / test-cube shader family, writes raw NDC without
+  // clipSpaceCorrMatrix. That choice is backend-specific — the
+  // face-direction convention in `faceDirection()` above matches
+  // what Vulkan / Metal / D3D store after rasterization. Under
+  // OpenGL the whole cube content ends up vertically flipped —
+  // normalising that would require either applying
+  // clipSpaceCorrMatrix across every shader in the family OR
+  // conditionally flipping v_texcoord by isYUpInFramebuffer.
+  // Out of scope for this edit.)
+  float theta = atan(dir.z, dir.x);
+  float phi   = asin(clamp(dir.y, -1.0, 1.0));
 
   vec2 equirectUV;
   equirectUV.x = theta / (2.0 * PI) + 0.5;
-  equirectUV.y = phi / PI + 0.5;
+  equirectUV.y = 0.5 - phi / PI;
 
   fragColor = texture(equirectMap, equirectUV);
 }
 )_";
 
-void CubemapLoader::loadImage()
-{
-  const auto& path = inputs.image.value;
-  if(path.empty())
-  {
-    m_loadedImage = QImage{};
-    return;
-  }
-
-  QString qpath = QString::fromStdString(path);
-  if(!QFileInfo::exists(qpath))
-  {
-    m_loadedImage = QImage{};
-    return;
-  }
-
-  QImage img(qpath);
-  if(img.isNull())
-  {
-    m_loadedImage = QImage{};
-    return;
-  }
-
-  m_loadedImage = img.convertToFormat(QImage::Format_RGBA8888);
-}
-
 QImage CubemapLoader::extractFace(int faceIndex) const
 {
   if(m_loadedImage.isNull())
@@ -179,6 +199,23 @@ void CubemapLoader::createCubemapTexture(QRhi& rhi, int faceSize)
   m_cubemapTex->create();
 
   outputs.cubemap.texture.handle = m_cubemapTex;
+
+  // Publish the cube on the Scene outlet too: one shared_ptr-stable
+  // scene_state whose environment.skybox_texture.native_handle points at
+  // our QRhiTexture. Version bumps only when the handle actually changes
+  // so merge_scenes / ScenePreprocessor short-circuit unchanged frames.
+  if(!m_sceneState)
+    m_sceneState = std::make_shared<ossia::scene_state>();
+  if(m_lastPublishedHandle != m_cubemapTex)
+  {
+    m_sceneState->environment = {};  // only skybox_texture is ours to touch
+    m_sceneState->environment.skybox_texture.native_handle = m_cubemapTex;
+    m_lastPublishedHandle = m_cubemapTex;
+    m_sceneVersion++;
+    m_sceneState->version = m_sceneVersion;
+    outputs.scene_out.scene.state = m_sceneState;
+    outputs.scene_out.dirty = ossia::scene_port::dirty_environment;
+  }
 }
 
 void CubemapLoader::releaseCubemapTexture()
@@ -204,9 +241,20 @@ void CubemapLoader::releaseCubemapTexture()
   }
   m_faceSize = 0;
   outputs.cubemap.texture.handle = nullptr;
+
+  // Clear the scene outlet too: downstream merge_scenes will stop
+  // contributing a skybox_texture from us once the handle goes null.
+  if(m_sceneState)
+  {
+    m_sceneState->environment = {};
+    m_lastPublishedHandle = nullptr;
+    m_sceneVersion++;
+    m_sceneState->version = m_sceneVersion;
+    outputs.scene_out.dirty = ossia::scene_port::dirty_environment;
+  }
 }
 
-void CubemapLoader::releaseEquirectResources()
+void CubemapLoader::releaseEquirectResources(score::gfx::RenderList* renderer)
 {
   if(m_equirectPipeline)
   {
@@ -220,7 +268,10 @@ void CubemapLoader::releaseEquirectResources()
   }
   if(m_equirectUbo)
   {
-    m_equirectUbo->deleteLater();
+    if(renderer)
+      renderer->releaseBuffer(m_equirectUbo);
+    else
+      m_equirectUbo->deleteLater();
     m_equirectUbo = nullptr;
   }
   if(m_equirectSampler)
@@ -235,7 +286,10 @@ void CubemapLoader::releaseEquirectResources()
   }
   if(m_quadVbuf)
   {
-    m_quadVbuf->deleteLater();
+    if(renderer)
+      renderer->releaseBuffer(m_quadVbuf);
+    else
+      m_quadVbuf->deleteLater();
     m_quadVbuf = nullptr;
   }
 }
@@ -247,6 +301,7 @@ void CubemapLoader::setupEquirectPipeline(score::gfx::RenderList& renderer)
   // UBO for face index
   m_equirectUbo = rhi.newBuffer(
       QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, sizeof(int32_t) * 4);
+  m_equirectUbo->setName("CubemapLoader::equirect_ubo");
   m_equirectUbo->create();
 
   // Sampler for equirectangular source
@@ -255,15 +310,25 @@ void CubemapLoader::setupEquirectPipeline(score::gfx::RenderList& renderer)
       QRhiSampler::Repeat, QRhiSampler::ClampToEdge);
   m_equirectSampler->create();
 
-  // SRB
+  // SRB — matches the new shader layout:
+  //   binding 0: renderer_t (shared engine UBO with clipSpaceCorrMatrix)
+  //   binding 2: FaceInfo (our per-face index)
+  //   binding 3: equirectangular source sampler
+  // Binding 1 is reserved for the engine's process_t UBO convention
+  // (not used here, but skipped to avoid future collisions).
   m_equirectSrb = rhi.newShaderResourceBindings();
   m_equirectSrb->setBindings(
       {QRhiShaderResourceBinding::uniformBuffer(
            0,
+           QRhiShaderResourceBinding::VertexStage
+               | QRhiShaderResourceBinding::FragmentStage,
+           &renderer.outputUBO()),
+       QRhiShaderResourceBinding::uniformBuffer(
+           2,
            QRhiShaderResourceBinding::FragmentStage,
            m_equirectUbo),
        QRhiShaderResourceBinding::sampledTexture(
-           1,
+           3,
            QRhiShaderResourceBinding::FragmentStage,
            m_equirectTex,
            m_equirectSampler)});
@@ -312,14 +377,31 @@ void CubemapLoader::update(
     score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res,
     score::gfx::Edge* e)
 {
-  if(!m_imageChanged)
-    return;
-
-  loadImage();
+  // No-op on the render thread. The decode runs on the halp file-port
+  // worker (see image_t::process in CubemapLoader.hpp) which delivers
+  // the decoded QImage to m_loadedImage and sets m_imageChanged.
+  // runInitialPasses() picks that up and uploads + transcodes the cube.
 }
 
 void CubemapLoader::release(score::gfx::RenderList& r)
 {
+  releaseEquirectResources(&r);
+  releaseCubemapTexture();
+}
+
+CubemapLoader::~CubemapLoader()
+{
+  // Safety net — idempotent. releaseEquirectResources() and
+  // releaseCubemapTexture() null each pointer after deleteLater(), so
+  // calling them again is a no-op if the framework already ran
+  // release(RenderList&).
+  if(m_cubemapTex || m_equirectTex)
+  {
+    qDebug() << "[BUFTRACE] ~CubemapLoader FALLBACK this=" << (void*)this
+             << " m_cubemapTex=" << (void*)m_cubemapTex
+             << " m_equirectTex=" << (void*)m_equirectTex
+             << " (release(RenderList&) was never called — leaked textures)";
+  }
   releaseEquirectResources();
   releaseCubemapTexture();
 }
@@ -390,14 +472,21 @@ void CubemapLoader::renderEquirectangular(
   }
   else
   {
-    // Update SRB if equirect texture changed
+    // Update SRB if equirect texture changed. Mirror the slot layout
+    // from setupEquirectPipeline: binding 0 = engine renderer_t,
+    // binding 2 = FaceInfo, binding 3 = equirect sampler.
     m_equirectSrb->setBindings(
         {QRhiShaderResourceBinding::uniformBuffer(
              0,
+             QRhiShaderResourceBinding::VertexStage
+                 | QRhiShaderResourceBinding::FragmentStage,
+             &renderer.outputUBO()),
+         QRhiShaderResourceBinding::uniformBuffer(
+             2,
              QRhiShaderResourceBinding::FragmentStage,
              m_equirectUbo),
          QRhiShaderResourceBinding::sampledTexture(
-             1,
+             3,
              QRhiShaderResourceBinding::FragmentStage,
              m_equirectTex,
              m_equirectSampler)});
diff --git a/src/plugins/score-plugin-threedim/Threedim/CubemapLoader.hpp b/src/plugins/score-plugin-threedim/Threedim/CubemapLoader.hpp
index 26d0a0ddf4..d1bdd65d19 100644
--- a/src/plugins/score-plugin-threedim/Threedim/CubemapLoader.hpp
+++ b/src/plugins/score-plugin-threedim/Threedim/CubemapLoader.hpp
@@ -1,15 +1,22 @@
 #pragma once
 
 #include <halp/controls.hpp>
+#include <halp/file_port.hpp>
 #include <halp/meta.hpp>
 #include <halp/texture.hpp>
 
 #include <Gfx/Graph/RenderList.hpp>
 
+#include <ossia/dataflow/geometry_port.hpp>
+
 #include <QtGui/private/qrhi_p.h>
 
 #include <QImage>
 
+#include <cstdint>
+#include <functional>
+#include <memory>
+
 namespace Threedim
 {
 
@@ -35,9 +42,37 @@ class CubemapLoader
 
   struct ins
   {
-    struct : halp::lineedit<"Image", "">
+    // File-port boilerplate — same pattern as ImageLoader. process()
+    // runs on the file-load worker thread, decodes the image off the
+    // render thread, returns a lambda that stages the decoded QImage
+    // onto the node from the execution thread. See diagnostic 041 —
+    // the previous lineedit<…> path called QImage(qpath) from update()
+    // on the render thread, blocking command recording for many frames
+    // on a large cube cross / equirect HDR.
+    struct image_t : halp::file_port<"Image", halp::mmap_file_view>
     {
-      void update(CubemapLoader& self) { self.m_imageChanged = true; }
+      halp_meta(extensions,
+          "Images (*.png *.jpg *.jpeg *.bmp *.tga *.webp *.tif *.tiff *.hdr *.exr)");
+      static std::function<void(CubemapLoader&)> process(file_type data)
+      {
+        QImage img;
+        if(!data.bytes.empty())
+        {
+          img.loadFromData(
+              reinterpret_cast<const uchar*>(data.bytes.data()),
+              (int)data.bytes.size());
+        }
+        if(img.isNull() && !data.filename.empty())
+        {
+          img = QImage(data.filename.data());
+        }
+        if(!img.isNull() && img.format() != QImage::Format_RGBA8888)
+          img = img.convertToFormat(QImage::Format_RGBA8888);
+        return [img = std::move(img)](CubemapLoader& self) mutable {
+          self.m_loadedImage = std::move(img);
+          self.m_imageChanged = true;
+        };
+      }
     } image;
 
     struct : halp::enum_t<CubemapLayout, "Layout">
@@ -53,9 +88,32 @@ class CubemapLoader
 
   struct
   {
-    halp::gpu_texture_output<"Cubemap"> cubemap;
+    // Raw cube texture — kept for consumers that want the handle
+    // directly (e.g. a bare-skybox rendering shader). Tagged via the
+    // new halp::gpu_cubemap_output so sinks know to grab-from-source
+    // rather than allocate a 2D render target.
+    halp::gpu_cubemap_output<"Cubemap"> cubemap;
+
+    // Scene-graph output: a scene_spec whose scene_environment has only
+    // skybox_texture.native_handle populated (no ambient / fog / etc.,
+    // no roots). Lets users wire the cubemap into a scene without a
+    // side-channel cable — merge_scenes's per-field env overlay folds
+    // it together with an EnvironmentLoader's params independent of
+    // wiring order.
+    struct
+    {
+      halp_meta(name, "Scene");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_out;
   } outputs;
 
+  // Stable scene_state identity so downstream scene-identity caches
+  // (ScenePreprocessor, merge_scenes passthrough) stay hot across frames.
+  std::shared_ptr<ossia::scene_state> m_sceneState;
+  int64_t m_sceneVersion{0};
+  void* m_lastPublishedHandle{};
+
   // GPU resources
   QRhiTexture* m_cubemapTex{};
   QRhiTexture* m_equirectTex{};
@@ -79,6 +137,16 @@ class CubemapLoader
 
   void operator()() { }
 
+  // Dtor safety net: if the renderer framework's release(RenderList&)
+  // path was skipped (e.g. a reconcile path that deletes the renderer
+  // without first calling release — or any future code that drops the
+  // GfxRenderer's shared_ptr<CubemapLoader> without going through
+  // CpuFilterNode::releaseState), any still-live textures and GPU
+  // resources go to deleteLater here so QRhi's destructor can collect
+  // them before vkDestroyDevice. Without this the Vulkan validation
+  // layer flags "VkImage has not been destroyed" on app exit.
+  ~CubemapLoader();
+
   void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res);
   void update(
       score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res,
@@ -89,10 +157,16 @@ class CubemapLoader
       QRhiResourceUpdateBatch*& res, score::gfx::Edge& edge);
 
 private:
-  void loadImage();
   void createCubemapTexture(QRhi& rhi, int faceSize);
   void releaseCubemapTexture();
-  void releaseEquirectResources();
+  // `renderer` is optional: when non-null QRhiBuffers go through
+  // RenderList::releaseBuffer (the project-wide lifetime invariant);
+  // when null (dtor fallback, after the RenderList itself may have
+  // already been destroyed) we fall back to direct deleteLater.
+  // Textures always deleteLater directly — they're not tracked in
+  // RenderList::m_vertexBuffers, so the double-free risk only applies
+  // to buffers.
+  void releaseEquirectResources(score::gfx::RenderList* renderer = nullptr);
 
   void uploadCrossOrStrip(QRhiResourceUpdateBatch* res);
   void renderEquirectangular(
diff --git a/src/plugins/score-plugin-threedim/Threedim/EnvironmentLoader.cpp b/src/plugins/score-plugin-threedim/Threedim/EnvironmentLoader.cpp
new file mode 100644
index 0000000000..055bf9d0cb
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/EnvironmentLoader.cpp
@@ -0,0 +1,153 @@
+#include "EnvironmentLoader.hpp"
+
+#include <Gfx/Graph/RenderList.hpp>
+#include <Gfx/Graph/SceneGPUState.hpp>
+
+#include <cmath>
+
+namespace Threedim
+{
+
+void EnvironmentLoader::rebuild()
+{
+  if(!m_state)
+  {
+    m_state = std::make_shared<ossia::scene_state>();
+    m_state->roots = std::make_shared<std::vector<ossia::scene_node_ptr>>();
+  }
+
+  auto& env = m_state->environment;
+  // Reset: this node only sets the ambient / exposure / gamma / fog
+  // groups. It does NOT touch skybox_texture / IBL handles — those
+  // come from cube-texture producers (CubemapLoader, …) that emit
+  // their own scene_spec with only the relevant fields populated.
+  // merge_scenes overlays field-by-field using the params_set mask.
+  env = {};
+
+  env.ambient_color[0] = inputs.ambient_color.value.x;
+  env.ambient_color[1] = inputs.ambient_color.value.y;
+  env.ambient_color[2] = inputs.ambient_color.value.z;
+  env.ambient_intensity = inputs.ambient_intensity.value;
+  // Photographic exposure: EV100 is the scene anchor, exposure_stops is
+  // the user-facing fine-tune (analogous to a camera's ±EV dial). The
+  // standard formula is `mul = stops_gain / (K * 2^EV100)`; we use K=1
+  // so EV100 = 0, stops = 0 leaves `env.exposure = 1` (preserving
+  // backward compat with scenes from before EV100 existed). Switch to
+  // the photometric K=1.2 (Frostbite/UE/Filament) once tone-mapping
+  // post-processes are the norm — at that point a non-unit default
+  // multiplier stops being surprising.
+  constexpr float K = 1.0f;
+  env.exposure = std::exp2(inputs.exposure_stops.value)
+               / (K * std::exp2(inputs.ev100.value));
+  env.gamma = inputs.gamma.value;
+  env.fog.enabled = inputs.fog_enabled.value;
+  env.fog.color[0] = inputs.fog_color.value.x;
+  env.fog.color[1] = inputs.fog_color.value.y;
+  env.fog.color[2] = inputs.fog_color.value.z;
+  env.fog.start = inputs.fog_start.value;
+  env.fog.end = inputs.fog_end.value;
+
+  env.params_set = ossia::scene_environment::params_ambient
+                   | ossia::scene_environment::params_exposure_gamma
+                   | ossia::scene_environment::params_fog;
+
+  // Render target size: only publish the overlay when both dimensions
+  // are positive. 0,0 (the default) means "let downstream fall back to
+  // the RenderList swap-chain size" — don't stamp the bit so other
+  // branches with legitimate sizes can still win the merge.
+  if(inputs.render_target_size.value.x > 0
+     && inputs.render_target_size.value.y > 0)
+  {
+    env.render_target_size[0] = (uint32_t)inputs.render_target_size.value.x;
+    env.render_target_size[1] = (uint32_t)inputs.render_target_size.value.y;
+    env.params_set |= ossia::scene_environment::params_render_target_size;
+  }
+
+  // Propagate the Env arena slot ref so the preprocessor can resolve
+  // our slot via ossia::gpu_slot_ref. m_env_ref is populated once in
+  // init() on the render thread — here on the execution thread we
+  // just copy the POD value. It stays zero (invalid) until init() runs,
+  // which is fine: preprocessor's isLive() will reject a zero ref.
+  env.raw_slot = m_env_ref;
+
+  m_version++;
+  m_state->version = m_version;
+  m_pending_dirty = ossia::scene_port::dirty_environment;
+}
+
+void EnvironmentLoader::operator()()
+{
+  if(!m_state)
+    rebuild();
+  outputs.scene_out.scene.state = m_state;
+  outputs.scene_out.dirty = m_pending_dirty;
+  m_pending_dirty = 0;
+}
+
+void EnvironmentLoader::init(
+    score::gfx::RenderList& r, QRhiResourceUpdateBatch& res)
+{
+  // Claim one slot in the Env arena for this node's lifetime. Kept in
+  // env_slot; released in release() below. The slot's offset + buffer
+  // are stable — consumer shaders bind r.registry().buffer(Env) with
+  // registry.slotOffset(env_slot) as the range base.
+  if(!env_slot.valid())
+  {
+    env_slot = r.registry().allocate(
+        score::gfx::GpuResourceRegistry::Arena::Env,
+        sizeof(score::gfx::EnvParamsUBO));
+    m_env_ref = r.registry().toOssiaRef(env_slot);
+  }
+  // Seed the slot with default-constructed bytes so downstream consumers
+  // that sample the slot before operator()() has ever run see a sane
+  // neutral environment rather than undefined memory.
+  if(env_slot.valid())
+  {
+    score::gfx::EnvParamsUBO seed{};
+    r.registry().updateSlot(res, env_slot, &seed, sizeof(seed));
+  }
+}
+
+void EnvironmentLoader::update(
+    score::gfx::RenderList& r, QRhiResourceUpdateBatch& res,
+    score::gfx::Edge*)
+{
+  // Render-thread path: pack the current CPU-side scene_environment into
+  // the EnvParamsUBO layout and upload to our slot. CpuFilterNode runs
+  // processControlIn before calling us, so `inputs.*.value` already
+  // reflects the latest control state — and operator()() has already
+  // run this frame, so m_state->environment holds the freshest data.
+  if(!env_slot.valid() || !m_state)
+    return;
+
+  const auto& env = m_state->environment;
+  score::gfx::EnvParamsUBO gpu{};
+  gpu.ambient[0] = env.ambient_color[0];
+  gpu.ambient[1] = env.ambient_color[1];
+  gpu.ambient[2] = env.ambient_color[2];
+  gpu.ambient[3] = env.ambient_intensity;
+  gpu.fog_color_density[0] = env.fog.color[0];
+  gpu.fog_color_density[1] = env.fog.color[1];
+  gpu.fog_color_density[2] = env.fog.color[2];
+  gpu.fog_color_density[3] = env.fog.density;
+  gpu.fog_range[0] = env.fog.start;
+  gpu.fog_range[1] = env.fog.end;
+  gpu.fog_range[2] = float(env.fog.mode);
+  gpu.fog_range[3] = env.fog.enabled ? 1.f : 0.f;
+  gpu.exposure_gamma[0] = env.exposure;
+  gpu.exposure_gamma[1] = env.gamma;
+  gpu.exposure_gamma[2] = 0.f;
+  gpu.exposure_gamma[3] = 0.f;
+  r.registry().updateSlot(res, env_slot, &gpu, sizeof(gpu));
+}
+
+void EnvironmentLoader::release(score::gfx::RenderList& r)
+{
+  if(env_slot.valid())
+    r.registry().free(env_slot);
+  m_env_ref = {};
+  // Producer-state-drift Option A — see Light::release.
+  m_state.reset();
+}
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/EnvironmentLoader.hpp b/src/plugins/score-plugin-threedim/Threedim/EnvironmentLoader.hpp
new file mode 100644
index 0000000000..5cf8e8d5a1
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/EnvironmentLoader.hpp
@@ -0,0 +1,146 @@
+#pragma once
+#include <halp/controls.hpp>
+#include <halp/meta.hpp>
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <Gfx/Graph/GpuResourceRegistry.hpp>
+
+#include <cstdint>
+#include <memory>
+
+class QRhiResourceUpdateBatch;
+
+namespace score::gfx
+{
+class RenderList;
+struct Edge;
+}
+
+namespace Threedim
+{
+
+// Scene-producing node that defines the environment of a scene:
+// ambient light, exposure, gamma, fog.
+//
+// Pairs with the project-wide scene_spec merge rule: environment is
+// merged field-by-field using the params_set bitmask. This node sets
+// only ambient / exposure-gamma / fog bits — skybox texture and IBL
+// handles are owned by CubemapLoader / CubemapComposer (and a future
+// EnvironmentPrecompute for real IBL) and overlay cleanly via
+// merge_scenes.
+//
+// Downstream pipeline:
+//   - `ossia::merge_scenes` overlays this environment onto the merged
+//     scene_state — field groups without matching bits pass through
+//     from whichever producer set them.
+//   - ScenePreprocessor packs scene_environment fields into an Environment
+//     Params UBO (auto-bound as aux buffer on Geometry Out).
+//   - classic_pbr_ibl shaders read the UBO for ambient / exposure / fog.
+class EnvironmentLoader
+{
+public:
+  halp_meta(name, "Environment")
+  halp_meta(c_name, "environment_loader")
+  halp_meta(category, "Visuals/3D")
+  halp_meta(authors, "ossia team")
+  halp_meta(uuid, "d3f5a8c1-8b47-4e91-9c2d-6f1a9b5e3c82")
+
+  struct ins
+  {
+    // Port-driven rebuild: each control's update() callback fires only
+    // on real change, triggering EnvironmentLoader::rebuild().
+    struct : halp::xyz_spinboxes_f32<"Ambient Color", halp::range{0., 1., 0.03}>
+    { void update(EnvironmentLoader& n) { n.rebuild(); } } ambient_color;
+    struct : halp::hslider_f32<"Ambient Intensity", halp::range{0., 8., 1.}>
+    { void update(EnvironmentLoader& n) { n.rebuild(); } } ambient_intensity;
+
+    // Photographic exposure value at ISO 100. Describes the scene's
+    // expected brightness in photometric terms; downstream shaders
+    // compensate so brighter scenes (higher EV100) display darker
+    // without manual rebalancing. Reference values:
+    //   EV100 ≈ -3   moonlit night
+    //   EV100 ≈  3   indoor lighting
+    //   EV100 ≈ 12   midday outdoor
+    //   EV100 ≈ 16   direct sunlight
+    // EV100 = 0 leaves the linear multiplier at 1× (combined with the
+    // default exposure_stops below it), preserving backward
+    // compatibility with scenes authored before EV100 existed.
+    struct : halp::hslider_f32<"Exposure EV100", halp::range{-6., 18., 0.}>
+    { void update(EnvironmentLoader& n) { n.rebuild(); } } ev100;
+
+    // Fine-tune compensation atop EV100, in stops (±EV). Same role as
+    // a photographer's "exposure compensation" dial: ev100 sets the
+    // photographic anchor, exposure_stops biases above/below.
+    struct : halp::hslider_f32<"Exposure (stops)", halp::range{-8., 8., 0.}>
+    { void update(EnvironmentLoader& n) { n.rebuild(); } } exposure_stops;
+    struct : halp::hslider_f32<"Gamma", halp::range{1., 3., 2.2}>
+    { void update(EnvironmentLoader& n) { n.rebuild(); } } gamma;
+
+    struct : halp::toggle<"Fog">
+    { void update(EnvironmentLoader& n) { n.rebuild(); } } fog_enabled;
+    struct : halp::xyz_spinboxes_f32<"Fog Color", halp::range{0., 1., 0.8}>
+    { void update(EnvironmentLoader& n) { n.rebuild(); } } fog_color;
+    struct : halp::hslider_f32<"Fog Start", halp::range{0., 1000., 10.}>
+    { void update(EnvironmentLoader& n) { n.rebuild(); } } fog_start;
+    struct : halp::hslider_f32<"Fog End", halp::range{0., 10000., 100.}>
+    { void update(EnvironmentLoader& n) { n.rebuild(); } } fog_end;
+
+    // Downstream render-target dimensions (width, height). Stamped on
+    // scene_environment::render_target_size + params_render_target_size
+    // bit when both values > 0. Overrides the preprocessor's default
+    // derivation from the RenderList swap chain.
+    struct : halp::xy_spinboxes_i32<"Render target size", halp::range{0, 16384, 0}>
+    { void update(EnvironmentLoader& n) { n.rebuild(); } } render_target_size;
+  } inputs;
+
+  struct outs
+  {
+    struct
+    {
+      halp_meta(name, "Scene");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_out;
+  } outputs;
+
+  // Rebuild m_state from current inputs. Invoked by each port's
+  // update() callback on real control changes. operator()() just
+  // republishes m_state, so the emitted shared_ptr + version stay
+  // stable when nothing changed — keeps every downstream cache hot.
+  void rebuild();
+  void operator()();
+
+  // Render-thread GPU hooks, invoked by CpuFilterNode. init allocates a
+  // slot in the Env arena once; update rebuilds the EnvParamsUBO bytes
+  // and uploads them into the slot (ScenePreprocessor will later pick
+  // these up directly instead of repacking the CPU struct — producer
+  // half only for now); release returns the slot.
+  void init(score::gfx::RenderList& r, QRhiResourceUpdateBatch& res);
+  void update(
+      score::gfx::RenderList& r, QRhiResourceUpdateBatch& res,
+      score::gfx::Edge* e);
+  void release(score::gfx::RenderList& r);
+
+  // Invariant identity for the shared scene_environment struct we emit —
+  // holding one stable scene_state across frames lets downstream
+  // `scene.state.get()` comparisons short-circuit the no-op case. We
+  // mutate the state's environment in place on parameter changes.
+  std::shared_ptr<ossia::scene_state> m_state;
+  int64_t m_version{0};
+  uint8_t m_pending_dirty{ossia::scene_port::dirty_environment};
+
+  // Slot in RenderList::registry().buffer(Env). Allocated in init(),
+  // written in update(), freed in release().
+  score::gfx::GpuResourceRegistry::Slot env_slot;
+
+  // Ossia-facing snapshot of env_slot, stamped on scene_state::
+  // environment.raw_slot in operator()() so the preprocessor can
+  // resolve our slot via isLive(). Written once in init() on the
+  // render thread, read every tick in operator()() on the execution
+  // thread (trivially-copyable POD, initialised to zero so pre-init
+  // reads look like an invalid ref).
+  ossia::gpu_slot_ref m_env_ref{};
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/ExtractBuffer2.cpp b/src/plugins/score-plugin-threedim/Threedim/ExtractBuffer2.cpp
new file mode 100644
index 0000000000..7046e3ebf2
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/ExtractBuffer2.cpp
@@ -0,0 +1,407 @@
+#include "ExtractBuffer2.hpp"
+
+#include <Threedim/Debug.hpp>
+
+#include <QDebug>
+
+#include <charconv>
+#include <string_view>
+
+namespace Threedim
+{
+namespace
+{
+// Tiny helper: parse `n` as a non-negative integer. Returns -1 on miss.
+[[nodiscard]] int parseInt(std::string_view n) noexcept
+{
+  int v{};
+  const auto* first = n.data();
+  const auto* last = n.data() + n.size();
+  auto [ptr, ec] = std::from_chars(first, last, v);
+  if(ec != std::errc{} || ptr != last || v < 0)
+    return -1;
+  return v;
+}
+
+// Map a user-supplied name to a halp::attribute_semantic. Returns
+// nullopt for unknown names (the caller then falls back to the
+// custom-name lookup against geometry_attribute::name).
+[[nodiscard]] std::optional<halp::attribute_semantic>
+nameToSemantic(std::string_view n) noexcept
+{
+  using S = halp::attribute_semantic;
+  // FIXME add all the others
+  if(n == "position" || n == "pos")
+    return S::position;
+  if(n == "normal" || n == "norm")
+    return S::normal;
+  if(n == "tangent")
+    return S::tangent;
+  if(n == "bitangent")
+    return S::bitangent;
+  if(n == "uv" || n == "texcoord" || n == "texcoord0")
+    return S::texcoord0;
+  if(n == "texcoord1")
+    return S::texcoord1;
+  if(n == "texcoord2")
+    return S::texcoord2;
+  if(n == "texcoord3")
+    return S::texcoord3;
+  if(n == "color" || n == "color0")
+    return S::color0;
+  if(n == "color1")
+    return S::color1;
+  if(n == "color2")
+    return S::color2;
+  if(n == "color3")
+    return S::color3;
+  if(n == "joints" || n == "joints0")
+    return S::joints0;
+  if(n == "joints1")
+    return S::joints1;
+  if(n == "weights" || n == "weights0")
+    return S::weights0;
+  if(n == "weights1")
+    return S::weights1;
+  if(n == "velocity")
+    return S::velocity;
+  return std::nullopt;
+}
+}
+
+ExtractBuffer2::ExtractBuffer2() = default;
+
+std::optional<attribute_lookup> ExtractBuffer2::resolveAttribute(
+    const halp::dynamic_gpu_geometry& mesh, std::string_view n) noexcept
+{
+  if(n.empty())
+    return std::nullopt;
+
+  // Numeric -> Nth attribute slot.
+  if(const int idx = parseInt(n); idx >= 0)
+    return findAttribute(mesh, idx);
+
+  // Well-known semantic name.
+  if(const auto sem = nameToSemantic(n))
+    return findAttribute(mesh, *sem);
+
+  // Custom-name lookup against geometry_attribute::name.
+  for(int i = 0; i < (int)mesh.attributes.size(); ++i)
+  {
+    if(mesh.attributes[i].name == n)
+      return findAttribute(mesh, i);
+  }
+
+  return std::nullopt;
+}
+
+ExtractBuffer2::BufferRef ExtractBuffer2::resolveBuffer(
+    const halp::dynamic_gpu_geometry& mesh, std::string_view n) noexcept
+{
+  if(n.empty())
+    return {};
+
+  // "index" -> the index buffer
+  if(n == "index")
+  {
+    if(mesh.index.buffer < 0 || mesh.index.buffer >= (int)mesh.buffers.size())
+      return {};
+    int64_t bytes = 0;
+    switch(mesh.index.format)
+    {
+      case halp::index_format::uint16:
+        bytes = (int64_t)mesh.vertices * 2;
+        break;
+      case halp::index_format::uint32:
+        bytes = (int64_t)mesh.vertices * 4;
+        break;
+    }
+    return {
+        .buffer_index = mesh.index.buffer,
+        .byte_offset = mesh.index.byte_offset,
+        .byte_size = bytes};
+  }
+
+  // Numeric -> Nth buffer in mesh.buffers[]
+  if(const int idx = parseInt(n); idx >= 0)
+  {
+    if(idx >= (int)mesh.buffers.size())
+      return {};
+    return {
+        .buffer_index = idx,
+        .byte_offset = 0,
+        .byte_size = mesh.buffers[idx].byte_size};
+  }
+
+  // Named auxiliary buffer (scene_lights, scene_materials, model_matrices, ...).
+  // ScenePreprocessor and other producers attach scene-level data here. Checked
+  // first because aux names are user-chosen and may shadow attribute names.
+  for(const auto& aux : mesh.auxiliary)
+  {
+    if(aux.name == n)
+    {
+      if(aux.buffer < 0 || aux.buffer >= (int)mesh.buffers.size())
+        return {};
+      const int64_t size
+          = aux.byte_size > 0 ? aux.byte_size : mesh.buffers[aux.buffer].byte_size;
+      return {
+          .buffer_index = aux.buffer,
+          .byte_offset = aux.byte_offset,
+          .byte_size = size};
+    }
+  }
+
+  // Otherwise: try to resolve as an attribute name and walk to the
+  // backing buffer.
+  if(const auto lk = resolveAttribute(mesh, n); lk && lk->input)
+  {
+    const int bidx = lk->input->buffer;
+    if(bidx >= 0 && bidx < (int)mesh.buffers.size())
+    {
+      return {
+          .buffer_index = bidx,
+          .byte_offset = 0,
+          .byte_size = mesh.buffers[bidx].byte_size};
+    }
+  }
+
+  return {};
+}
+
+void ExtractBuffer2::initStrategy(score::gfx::RenderList& renderer)
+{
+  const auto& mesh = inputs.geometry.mesh;
+  if(mesh.vertices == 0)
+  {
+    m_strategy = std::monostate{};
+    return;
+  }
+
+  QRhi& rhi = *renderer.state.rhi;
+
+  m_currentMode = inputs.mode.value;
+  m_currentName = inputs.name.value;
+  m_currentPadToVec4 = inputs.pad_to_vec4.value;
+
+  if(inputs.mode.value == Attribute)
+  {
+    const auto lookup = resolveAttribute(mesh, m_currentName);
+    if(!lookup)
+    {
+      qWarning() << this << "ExtractBuffer2: attribute not found:"
+               << QString::fromStdString(m_currentName);
+      m_strategy = std::monostate{};
+      return;
+    }
+    if(!lookup->buffer || !lookup->buffer->handle)
+    {
+      qWarning() << this << "ExtractBuffer2: source buffer is null";
+      m_strategy = std::monostate{};
+      return;
+    }
+
+    const bool hasIndexBuffer = mesh.index.buffer >= 0;
+    const bool canDirectRef = lookup->canDirectReference() && !hasIndexBuffer;
+
+    bool ok = false;
+    if(hasIndexBuffer)
+    {
+      auto& s = m_strategy.emplace<IndexedExtractionStrategy>();
+      ok = s.init(renderer.state, rhi, mesh, *lookup, m_currentPadToVec4);
+    }
+    else if(canDirectRef)
+    {
+      auto& s = m_strategy.emplace<DirectReferenceStrategy>();
+      ok = s.init(renderer.state, rhi, mesh, *lookup, m_currentPadToVec4);
+    }
+    else
+    {
+      auto& s = m_strategy.emplace<ComputeExtractionStrategy>();
+      ok = s.init(renderer.state, rhi, mesh, *lookup, m_currentPadToVec4);
+    }
+    if(!ok)
+    {
+      qWarning() << this << "ExtractBuffer2: strategy init failed";
+      m_strategy = std::monostate{};
+    }
+  }
+  else // Buffer
+  {
+    const auto ref = resolveBuffer(mesh, m_currentName);
+    if(ref.buffer_index < 0 || ref.byte_size <= 0)
+    {
+      qWarning() << this << "ExtractBuffer2: buffer not found:"
+               << QString::fromStdString(m_currentName);
+      m_strategy = std::monostate{};
+      return;
+    }
+    auto& s = m_strategy.emplace<DirectBufferReferenceStrategy>();
+    if(!s.init(renderer.state, rhi, mesh, ref.buffer_index, ref.byte_offset, ref.byte_size))
+    {
+      qWarning() << this << "ExtractBuffer2: DirectBufferReferenceStrategy failed";
+      m_strategy = std::monostate{};
+    }
+  }
+}
+
+void ExtractBuffer2::init(
+    score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res)
+{
+  initStrategy(renderer);
+  updateOutput();
+}
+
+void ExtractBuffer2::update(
+    score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res,
+    score::gfx::Edge* /*e*/)
+{
+  const auto& mesh = inputs.geometry.mesh;
+  if(mesh.vertices == 0)
+    return;
+
+  // Selector or pad change -> tear down and rebuild from scratch. The
+  // strategies are cheap to recreate (they own at most one compute
+  // pipeline) so this keeps the update path simple.
+  const bool modeChanged = (inputs.mode.value != m_currentMode);
+  const bool nameChanged = (inputs.name.value != m_currentName);
+  const bool padChanged = (inputs.pad_to_vec4.value != m_currentPadToVec4);
+  if(modeChanged || nameChanged || padChanged)
+  {
+    release(renderer);
+    initStrategy(renderer);
+    updateOutput();
+    return;
+  }
+
+  // Drain dirty flags so the upstream knows we picked them up. We
+  // always re-check the source buffer pointers below regardless.
+  bool any_dirty = inputs.geometry.dirty_mesh;
+  for(auto& buf : inputs.geometry.mesh.buffers)
+  {
+    any_dirty |= buf.dirty;
+    buf.dirty = false;
+  }
+  inputs.geometry.dirty_mesh = false;
+
+  if(inputs.mode.value == Attribute)
+  {
+    const auto lookup = resolveAttribute(mesh, m_currentName);
+    if(!lookup)
+      return;
+
+    // Strategy class may need to change if the upstream changed its
+    // index/binding layout (e.g. went from non-indexed to indexed).
+    const bool hasIndexBuffer = mesh.index.buffer >= 0;
+    const bool canDirectRef = lookup->canDirectReference() && !hasIndexBuffer;
+
+    const bool needsIndexed = hasIndexBuffer;
+    const bool needsDirect = canDirectRef && !hasIndexBuffer;
+    const bool needsCompute = !canDirectRef && !hasIndexBuffer;
+    const bool isIndexed = std::holds_alternative<IndexedExtractionStrategy>(m_strategy);
+    const bool isDirect = std::holds_alternative<DirectReferenceStrategy>(m_strategy);
+    const bool isCompute = std::holds_alternative<ComputeExtractionStrategy>(m_strategy);
+
+    if((needsIndexed && !isIndexed) || (needsDirect && !isDirect)
+       || (needsCompute && !isCompute))
+    {
+      release(renderer);
+      initStrategy(renderer);
+      updateOutput();
+      return;
+    }
+
+    QRhi& rhi = *renderer.state.rhi;
+    std::visit(
+        [&](auto& strategy) {
+      using T = std::decay_t<decltype(strategy)>;
+      if constexpr(!std::is_same_v<T, std::monostate>)
+        strategy.update(rhi, mesh, *lookup, m_currentPadToVec4);
+        },
+        m_strategy);
+  }
+  else // Buffer
+  {
+    auto* strat = std::get_if<DirectBufferReferenceStrategy>(&m_strategy);
+    if(!strat)
+    {
+      release(renderer);
+      initStrategy(renderer);
+      updateOutput();
+      return;
+    }
+
+    // Re-resolve and re-init in place: even if the user-visible name
+    // hasn't changed, the upstream may have rebuilt the QRhiBuffer*
+    // (resize, format change). DirectBufferReferenceStrategy is
+    // pointer-only state, so this is effectively just a re-fetch.
+    const auto ref = resolveBuffer(mesh, m_currentName);
+    if(ref.buffer_index < 0 || ref.byte_size <= 0)
+    {
+      release(renderer);
+      return;
+    }
+    QRhi& rhi = *renderer.state.rhi;
+    if(!strat->init(
+           renderer.state, rhi, mesh, ref.buffer_index, ref.byte_offset,
+           ref.byte_size))
+    {
+      qWarning() << this << "ExtractBuffer2: re-init failed in update";
+      release(renderer);
+      return;
+    }
+  }
+
+  updateOutput();
+}
+
+void ExtractBuffer2::release(score::gfx::RenderList& /*renderer*/)
+{
+  std::visit(
+      [](auto& strategy) {
+    using T = std::decay_t<decltype(strategy)>;
+    if constexpr(!std::is_same_v<T, std::monostate>)
+      strategy.release();
+      },
+      m_strategy);
+  m_strategy = std::monostate{};
+}
+
+void ExtractBuffer2::runInitialPasses(
+    score::gfx::RenderList& renderer, QRhiCommandBuffer& commands,
+    QRhiResourceUpdateBatch*& res, score::gfx::Edge& /*edge*/)
+{
+  QRhi& rhi = *renderer.state.rhi;
+  std::visit(
+      [&](auto& strategy) {
+    using T = std::decay_t<decltype(strategy)>;
+    if constexpr(!std::is_same_v<T, std::monostate>)
+    {
+      if constexpr(T::needsCompute())
+        strategy.runCompute(rhi, commands, res);
+    }
+      },
+      m_strategy);
+}
+
+void ExtractBuffer2::updateOutput()
+{
+  std::visit(
+      [this](const auto& strategy) {
+    using T = std::decay_t<decltype(strategy)>;
+    if constexpr(!std::is_same_v<T, std::monostate>)
+    {
+      gpu_buffer_view out = strategy.output();
+      outputs.buffer.buffer.handle = out.buffer;
+      outputs.buffer.buffer.byte_size = out.size;
+      outputs.buffer.buffer.byte_offset = out.offset;
+    }
+    else
+    {
+      outputs.buffer.buffer = {};
+    }
+      },
+      m_strategy);
+}
+
+void ExtractBuffer2::operator()() { }
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/ExtractBuffer2.hpp b/src/plugins/score-plugin-threedim/Threedim/ExtractBuffer2.hpp
new file mode 100644
index 0000000000..9eda1a5c62
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/ExtractBuffer2.hpp
@@ -0,0 +1,134 @@
+#pragma once
+#include <Threedim/GeometryToBufferStrategies.hpp>
+
+#include <halp/buffer.hpp>
+#include <halp/controls.hpp>
+#include <halp/geometry.hpp>
+#include <halp/meta.hpp>
+
+namespace Threedim
+{
+// Name-based version of Threedim::ExtractBuffer.
+//
+// The original ExtractBuffer enumerates a fixed list of attribute slots
+// (Position / TexCoord / Normal / ... / Buffer_0..Buffer_8) and selects
+// one through a combobox. The Buffer_i path was wrong (the source buffer
+// pointer was not refreshed in update(), and there was no way to refer
+// to a buffer through anything other than its raw index in the mesh's
+// buffer list, which is brittle whenever the upstream geometry rebuilds
+// its buffer layout).
+//
+// This version takes:
+//   * a Mode enum  -- Attribute or Buffer
+//   * a name       -- a free-form string interpreted differently per mode
+//
+// Mode == Attribute: extract a single per-vertex attribute (one vec lane)
+//   * "position" / "normal" / "tangent" / "bitangent" /
+//     "texcoord" or "texcoord0".."texcoord7" / "uv" (alias for texcoord0) /
+//     "color" or "color0".."color3" :
+//        match against halp::attribute_semantic
+//   * "<integer>" :  Nth entry in mesh.attributes[]
+//   * anything else: custom-name lookup in mesh.attributes[].name
+//   The output is one of the existing extraction strategies
+//   (Direct / Compute / Indexed) just like ExtractBuffer.
+//
+// Mode == Buffer: extract a whole raw buffer (all bytes)
+//   * "<integer>" :  the Nth entry in mesh.buffers[] (the index path
+//     ExtractBuffer's combobox tried to expose)
+//   * "index"     :  the buffer mesh.index points at
+//   * a name matching one of `mesh.auxiliary[].name` (checked first,
+//     since user-chosen aux names may shadow attribute names):
+//     returns the auxiliary's backing buffer + its byte_offset /
+//     byte_size. This is how ScenePreprocessor's per-frame auxiliaries
+//     (camera, camera_prev, env, scene_lights, scene_materials,
+//     per_draws, indirect_draw_cmds, scene_counts, and every
+//     scene_data_ptr name) can be pulled out onto a standalone
+//     gpu_buffer outlet for downstream consumers that don't want to
+//     auto-bind via try_bind_from_geometry.
+//   * anything else: look up an attribute by semantic / custom name
+//     and return the buffer it lives in (via attribute -> input ->
+//     buffer).
+//
+// On every update() the source buffer handle is re-fetched from the
+// mesh, so an upstream that rebuilds its QRhiBuffer (resize / new
+// allocation) is reflected on the next frame instead of leaving us
+// holding a stale pointer.
+class ExtractBuffer2
+{
+public:
+  halp_meta(name, "Extract buffer (by name)")
+  halp_meta(category, "Visuals/Utilities")
+  halp_meta(c_name, "extract_buffer_by_name")
+  halp_meta(
+      manual_url, "https://ossia.io/score-docs/processes/extract-buffer.html")
+  halp_meta(uuid, "3c9d6c2b-1f04-4f7d-9bc2-a4b1d7c8e5f0")
+
+  enum Mode
+  {
+    Attribute,
+    Buffer
+  };
+
+  struct ins
+  {
+    struct
+    {
+      halp_meta(name, "Geometry");
+      halp::dynamic_gpu_geometry mesh;
+      float transform[16]{};
+      bool dirty_mesh = false;
+      bool dirty_transform = false;
+    } geometry;
+
+    halp::combobox_t<"Mode", Mode> mode;
+    struct : halp::lineedit<"Name / index", "position">
+    {
+      halp_meta(symbol, "name")
+    } name;
+    halp::toggle<"Pad vec3 to vec4"> pad_to_vec4;
+  } inputs;
+
+  struct
+  {
+    halp::gpu_buffer_output<"Buffer"> buffer;
+  } outputs;
+
+  ExtractBuffer2();
+
+  void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res);
+  void update(
+      score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res,
+      score::gfx::Edge* e);
+  void release(score::gfx::RenderList& r);
+  void runInitialPasses(
+      score::gfx::RenderList& renderer, QRhiCommandBuffer& commands,
+      QRhiResourceUpdateBatch*& res, score::gfx::Edge& edge);
+  void operator()();
+
+private:
+  // Resolve the user's name string to an attribute_lookup, taking the
+  // active mesh into account. Returns nullopt on miss.
+  [[nodiscard]] static std::optional<attribute_lookup>
+  resolveAttribute(const halp::dynamic_gpu_geometry& mesh, std::string_view n) noexcept;
+
+  // Resolve the user's name string to a (buffer index, byte_offset, byte_size)
+  // triple suitable for DirectBufferReferenceStrategy. Returns -1 on miss.
+  struct BufferRef
+  {
+    int buffer_index{-1};
+    int64_t byte_offset{};
+    int64_t byte_size{};
+  };
+  [[nodiscard]] static BufferRef
+  resolveBuffer(const halp::dynamic_gpu_geometry& mesh, std::string_view n) noexcept;
+
+  // (Re)initialise m_strategy based on the current inputs and mesh.
+  void initStrategy(score::gfx::RenderList& renderer);
+  void updateOutput();
+
+  ExtractionStrategyVariant m_strategy;
+  Mode m_currentMode{Attribute};
+  std::string m_currentName{};
+  bool m_currentPadToVec4{false};
+};
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/ExtractSceneBuffer.cpp b/src/plugins/score-plugin-threedim/Threedim/ExtractSceneBuffer.cpp
new file mode 100644
index 0000000000..2aeea8b4f5
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/ExtractSceneBuffer.cpp
@@ -0,0 +1,124 @@
+#include "ExtractSceneBuffer.hpp"
+
+#include <Gfx/Graph/RenderList.hpp>
+
+namespace Threedim
+{
+
+namespace
+{
+// Resolve the slot ref from the scene + kind + index selectors. Returns
+// an all-zero (invalid) ref on miss; the caller's isLive() call will
+// reject it without a separate null check.
+inline ossia::gpu_slot_ref
+pickSlotRef(const ossia::scene_state& state, ExtractSceneBuffer::Kind kind,
+            int index) noexcept
+{
+  switch(kind)
+  {
+    case ExtractSceneBuffer::Environment:
+      return state.environment.raw_slot;
+
+    case ExtractSceneBuffer::Camera:
+    {
+      if(!state.cameras)
+        return {};
+      const auto& cams = *state.cameras;
+      if(index < 0 || std::size_t(index) >= cams.size())
+        return {};
+      if(!cams[index])
+        return {};
+      return cams[index]->raw_slot;
+    }
+
+    case ExtractSceneBuffer::Material:
+    {
+      if(!state.materials)
+        return {};
+      const auto& mats = *state.materials;
+      if(index < 0 || std::size_t(index) >= mats.size())
+        return {};
+      if(!mats[index])
+        return {};
+      return mats[index]->raw_slot;
+    }
+  }
+  return {};
+}
+
+inline score::gfx::GpuResourceRegistry::Arena arenaOf(uint32_t tag) noexcept
+{
+  return static_cast<score::gfx::GpuResourceRegistry::Arena>(tag);
+}
+}
+
+void ExtractSceneBuffer::operator()()
+{
+  // Execution thread — no GPU work here. The port's scene_spec is what
+  // update() reads. Drain the dirty flag so upstream knows the tick
+  // was observed.
+  inputs.scene_in.dirty = 0;
+}
+
+void ExtractSceneBuffer::init(
+    score::gfx::RenderList&, QRhiResourceUpdateBatch&)
+{
+  // Nothing to allocate — the node only reads through the registry.
+  outputs.buffer.buffer = {};
+}
+
+void ExtractSceneBuffer::update(
+    score::gfx::RenderList& renderer, QRhiResourceUpdateBatch&,
+    score::gfx::Edge*)
+{
+  // No scene → clear outlet. Downstream consumers see buffer.handle ==
+  // nullptr and fall back to whatever default they define.
+  if(!inputs.scene_in.scene.state)
+  {
+    outputs.buffer.buffer = {};
+    return;
+  }
+
+  const auto ref = pickSlotRef(
+      *inputs.scene_in.scene.state,
+      Kind(inputs.kind.value), inputs.index.value);
+
+  // Liveness is the one authoritative check: catches stale refs
+  // (producer released), default-constructed refs (no slot stamped),
+  // refs from a different registry (different RenderList), and
+  // mismatched-arena refs in one compare.
+  if(!renderer.registry().isLive(ref))
+  {
+    outputs.buffer.buffer = {};
+    return;
+  }
+
+  QRhiBuffer* buf = renderer.registry().buffer(arenaOf(ref.arena));
+  if(!buf)
+  {
+    outputs.buffer.buffer = {};
+    return;
+  }
+
+  const void* prev_handle = outputs.buffer.buffer.handle;
+  const int64_t prev_offset = outputs.buffer.buffer.byte_offset;
+  const int64_t prev_size = outputs.buffer.buffer.byte_size;
+
+  outputs.buffer.buffer.handle = buf;
+  outputs.buffer.buffer.byte_offset = (int64_t)ref.offset;
+  outputs.buffer.buffer.byte_size = (int64_t)ref.size;
+  // Flip `changed` only when something downstream-observable actually
+  // moved — most frames the slot is stable and we want downstream
+  // rebinds to short-circuit on identity.
+  outputs.buffer.buffer.changed
+      = (prev_handle != buf)
+        || (prev_offset != (int64_t)ref.offset)
+        || (prev_size != (int64_t)ref.size);
+}
+
+void ExtractSceneBuffer::release(score::gfx::RenderList&)
+{
+  outputs.buffer.buffer = {};
+}
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/ExtractSceneBuffer.hpp b/src/plugins/score-plugin-threedim/Threedim/ExtractSceneBuffer.hpp
new file mode 100644
index 0000000000..86ba08eb13
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/ExtractSceneBuffer.hpp
@@ -0,0 +1,116 @@
+#pragma once
+#include <halp/buffer.hpp>
+#include <halp/controls.hpp>
+#include <halp/meta.hpp>
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <Gfx/Graph/GpuResourceRegistry.hpp>
+
+#include <cstdint>
+
+class QRhiResourceUpdateBatch;
+
+namespace score::gfx
+{
+class RenderList;
+struct Edge;
+}
+
+namespace Threedim
+{
+
+// Scene-level buffer extractor. Takes a scene_spec in, picks one of the
+// GPU arena slots stamped on the scene's components, and republishes
+// the backing `{QRhiBuffer*, byte_offset, byte_size}` triple on a
+// halp::gpu_buffer outlet.
+//
+// Unlike Threedim::ExtractBuffer2 — which extracts from a flattened
+// geometry's aux list downstream of a ScenePreprocessor — this node
+// works directly on a raw scene_spec. Useful when:
+//
+//   - A custom compute shader wants to consume a producer's Raw arena
+//     slot without paying the cost of a preprocessor flatten.
+//   - The downstream pipeline has no preprocessor (e.g. a pure
+//     data-probing tool inspecting the environment's bytes).
+//
+// Source resolution uses the `raw_slot` field on each component type:
+//
+//   - Environment: scene.state->environment.raw_slot
+//   - Camera(N):   (*scene.state->cameras)[N]->raw_slot
+//   - Material(N): (*scene.state->materials)[N]->raw_slot
+//
+// The registry's isLive() check guards every read. Stale refs (producer
+// released, mismatched generation) clear the outlet rather than handing
+// a dangling QRhiBuffer* downstream.
+//
+// Lights aren't exposed here because the light tree isn't a flat
+// scene_state.lights vector (lights live as scene_payload children).
+// Walking the tree to find the Nth light by preorder index is a
+// reasonable future addition if the use case shows up; for now,
+// extract light data downstream of a ScenePreprocessor via
+// ExtractBuffer2(name="scene_lights").
+class ExtractSceneBuffer
+{
+public:
+  halp_meta(name, "Extract Scene Buffer")
+  halp_meta(category, "Visuals/3D/Scene")
+  halp_meta(c_name, "extract_scene_buffer")
+  halp_meta(authors, "ossia team")
+  halp_meta(
+      manual_url,
+      "https://ossia.io/score-docs/processes/extract-scene-buffer.html")
+  halp_meta(uuid, "5f2b8e1c-4a7d-4e9b-b0f1-3c6e8d2a5b74")
+
+  enum Kind
+  {
+    Environment,
+    Camera,
+    Material
+  };
+
+  struct ins
+  {
+    struct
+    {
+      halp_meta(name, "Scene In");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_in;
+
+    struct : halp::combobox_t<"Kind", Kind>
+    {
+      struct range
+      {
+        std::string_view values[3]{"Environment", "Camera", "Material"};
+        int init{0};
+      };
+    } kind;
+
+    // Index inside scene.state->cameras / ->materials. Ignored when
+    // Kind == Environment (the environment is a singleton on scene_state).
+    halp::spinbox_i32<"Index", halp::irange{0, 1024, 0}> index;
+  } inputs;
+
+  struct outs
+  {
+    halp::gpu_buffer_output<"Buffer"> buffer;
+  } outputs;
+
+  // Execution-thread tick. No heavy work here — just snapshot the
+  // current scene ref + control values. Slot resolution needs the
+  // registry (render thread) so it happens in update().
+  void operator()();
+
+  // Render-thread hooks. update() resolves the slot ref against the
+  // renderer's GpuResourceRegistry, validates via isLive(), and
+  // publishes the buffer handle + offset + size on the outlet. init()
+  // and release() are no-ops for now — the node owns no GPU state.
+  void init(score::gfx::RenderList& r, QRhiResourceUpdateBatch& res);
+  void update(
+      score::gfx::RenderList& r, QRhiResourceUpdateBatch& res,
+      score::gfx::Edge* e);
+  void release(score::gfx::RenderList& r);
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/ExtractTexture.cpp b/src/plugins/score-plugin-threedim/Threedim/ExtractTexture.cpp
new file mode 100644
index 0000000000..d7b288c977
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/ExtractTexture.cpp
@@ -0,0 +1,173 @@
+#include "ExtractTexture.hpp"
+
+#include <Gfx/Graph/RenderList.hpp>
+
+#include <QtGui/private/qrhi_p.h>
+
+namespace Threedim
+{
+
+void ExtractTexture::init(
+    score::gfx::RenderList& /*renderer*/, QRhiResourceUpdateBatch& /*res*/)
+{
+}
+
+void ExtractTexture::update(
+    score::gfx::RenderList& /*renderer*/, QRhiResourceUpdateBatch& /*res*/,
+    score::gfx::Edge* /*e*/)
+{
+  const auto& mesh = inputs.geometry.mesh;
+  const auto& target_name = inputs.name.value;
+
+  // Resolve by name. aux names are producer-chosen (ScenePreprocessor
+  // uses "skybox", "irradiance_map", "camera", "base_color_array",
+  // …); if the target is missing we hand out a null handle so the
+  // downstream binding drops to its empty-placeholder.
+  void* resolved = nullptr;
+  void* resolved_sampler = nullptr;
+  for(const auto& aux : mesh.auxiliary_textures)
+  {
+    if(aux.name == target_name)
+    {
+      resolved = aux.handle;
+      resolved_sampler = aux.sampler_handle;
+      break;
+    }
+  }
+
+  // Short-circuit identical-state updates. Texture metadata re-emission
+  // trips downstream SRB rebuilds, so we only publish when the handle
+  // pointer or the target name actually changed.
+  if(resolved == m_lastHandle && target_name == m_lastName)
+    return;
+  m_lastHandle = resolved;
+  m_lastName = target_name;
+
+  outputs.texture.texture.handle = resolved;
+  // Forward the producer-side sampler if any. ScenePreprocessor's per-
+  // bucket sampler split (per-glTF wrap/filter mode) ships a sampler
+  // alongside each material texture array — passing it through here
+  // lets downstream sampler-config-sensitive nodes (anisotropy, custom
+  // wrap mode) honour it. Null = downstream falls back to its own.
+  outputs.texture.texture.sampler_handle = resolved_sampler;
+
+  if(!resolved)
+  {
+    outputs.texture.texture.width = 0;
+    outputs.texture.texture.height = 0;
+    outputs.texture.texture.layers_or_depth = 1;
+    outputs.texture.texture.kind = halp::texture_kind::texture_2d;
+    return;
+  }
+
+  // Detect the texture shape from the live QRhiTexture's flags +
+  // dimensions. Order matters: CubeMap and ThreeDimensional are
+  // mutually exclusive by construction, but check CubeMap first as
+  // some backends may happen to set both bits on edge-case allocations.
+  auto* tex = static_cast<QRhiTexture*>(resolved);
+  const auto flags = tex->flags();
+  const QSize px = tex->pixelSize();
+
+  outputs.texture.texture.width = px.width();
+  outputs.texture.texture.height = px.height();
+
+  if(flags.testFlag(QRhiTexture::CubeMap))
+  {
+    outputs.texture.texture.kind = halp::texture_kind::cubemap;
+    outputs.texture.texture.layers_or_depth = 6;
+  }
+  else if(flags.testFlag(QRhiTexture::ThreeDimensional))
+  {
+    outputs.texture.texture.kind = halp::texture_kind::texture_3d;
+    // QRhiTexture::depth() is 0 for non-3D textures, set on allocation
+    // for 3D. Default to 1 when the backend returns 0 on a 3D texture
+    // that hasn't been filled yet — avoids an illegal 0-depth probe
+    // binding downstream.
+    outputs.texture.texture.layers_or_depth = std::max(1, tex->depth());
+  }
+  else if(flags.testFlag(QRhiTexture::TextureArray))
+  {
+    outputs.texture.texture.kind = halp::texture_kind::texture_array;
+    outputs.texture.texture.layers_or_depth = std::max(1, tex->arraySize());
+  }
+  else
+  {
+    outputs.texture.texture.kind = halp::texture_kind::texture_2d;
+    outputs.texture.texture.layers_or_depth = 1;
+  }
+
+  // Format reporting — halp's gpu_texture format taxonomy now mirrors
+  // QRhi's color + integer set, so downstream nodes that branch on
+  // format (HDR-ness, integer-vs-float for atomic-image consumers,
+  // sRGB inference) get a faithful answer instead of the previous
+  // "everything not in the float subset → RGBA8" silent miscast.
+  //
+  // QRhi version availability:
+  //   - RGBA8 / BGRA8 / R8 / RG8 / R16 / RG16 / float family / depth →
+  //     present since QRhi went public-ish (Qt 6.2 private API).
+  //   - RGB10A2 added in Qt 6.4.
+  //   - Integer family (R8UI / R32UI / RG32UI / RGBA32UI / *SI variants)
+  //     added in Qt 6.10. Guard so older Qt builds compile.
+  switch(tex->format())
+  {
+    // 8-bit unorm — Qt 6.2+
+    case QRhiTexture::RGBA8:    outputs.texture.texture.format = halp::gpu_texture::RGBA8;    break;
+    case QRhiTexture::BGRA8:    outputs.texture.texture.format = halp::gpu_texture::BGRA8;    break;
+    case QRhiTexture::R8:       outputs.texture.texture.format = halp::gpu_texture::R8;       break;
+    case QRhiTexture::RG8:      outputs.texture.texture.format = halp::gpu_texture::RG8;      break;
+
+    // 16-bit unorm — Qt 6.2+
+    case QRhiTexture::R16:      outputs.texture.texture.format = halp::gpu_texture::R16;      break;
+    case QRhiTexture::RG16:     outputs.texture.texture.format = halp::gpu_texture::RG16;     break;
+
+    // float — Qt 6.2+
+    case QRhiTexture::RGBA16F:  outputs.texture.texture.format = halp::gpu_texture::RGBA16F;  break;
+    case QRhiTexture::RGBA32F:  outputs.texture.texture.format = halp::gpu_texture::RGBA32F;  break;
+    case QRhiTexture::R16F:     outputs.texture.texture.format = halp::gpu_texture::R16F;     break;
+    case QRhiTexture::R32F:     outputs.texture.texture.format = halp::gpu_texture::R32F;     break;
+
+#if QT_VERSION >= QT_VERSION_CHECK(6, 4, 0)
+    // 10/10/10/2 packed — Qt 6.4+
+    case QRhiTexture::RGB10A2:  outputs.texture.texture.format = halp::gpu_texture::RGB10A2;  break;
+#endif
+
+#if QT_VERSION >= QT_VERSION_CHECK(6, 10, 0)
+    // Unsigned integer — Qt 6.10+. REQUIRED to be reported as such for
+    // atomic-image consumers (voxelizer occupancy grids, histogram
+    // targets, …). A miscast here would tell downstream "this is RGBA8,
+    // sample as float" and break uimage / usampler bindings on Vulkan
+    // validation.
+    case QRhiTexture::R8UI:     outputs.texture.texture.format = halp::gpu_texture::R8UI;     break;
+    case QRhiTexture::R32UI:    outputs.texture.texture.format = halp::gpu_texture::R32UI;    break;
+    case QRhiTexture::RG32UI:   outputs.texture.texture.format = halp::gpu_texture::RG32UI;   break;
+    case QRhiTexture::RGBA32UI: outputs.texture.texture.format = halp::gpu_texture::RGBA32UI; break;
+
+    // Signed integer — Qt 6.10+
+    case QRhiTexture::R8SI:     outputs.texture.texture.format = halp::gpu_texture::R8SI;     break;
+    case QRhiTexture::R32SI:    outputs.texture.texture.format = halp::gpu_texture::R32SI;    break;
+    case QRhiTexture::RG32SI:   outputs.texture.texture.format = halp::gpu_texture::RG32SI;   break;
+    case QRhiTexture::RGBA32SI: outputs.texture.texture.format = halp::gpu_texture::RGBA32SI; break;
+#endif
+
+    default:
+      // Depth, compressed, or anything halp's enum doesn't cover —
+      // safest fallback is RGBA8 so the downstream sampler binding
+      // doesn't trip a type-mismatch validation error. Downstream
+      // explicit consumers should branch on `kind` first.
+      outputs.texture.texture.format = halp::gpu_texture::RGBA8;
+      break;
+  }
+}
+
+void ExtractTexture::release(score::gfx::RenderList& /*r*/)
+{
+  m_lastHandle = nullptr;
+  m_lastName.clear();
+  outputs.texture.texture.handle = nullptr;
+  outputs.texture.texture.width = 0;
+  outputs.texture.texture.height = 0;
+  outputs.texture.texture.layers_or_depth = 1;
+  outputs.texture.texture.kind = halp::texture_kind::texture_2d;
+}
+
+} // namespace Threedim
diff --git a/src/plugins/score-plugin-threedim/Threedim/ExtractTexture.hpp b/src/plugins/score-plugin-threedim/Threedim/ExtractTexture.hpp
new file mode 100644
index 0000000000..3373fd8cf1
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/ExtractTexture.hpp
@@ -0,0 +1,79 @@
+#pragma once
+#include <halp/controls.hpp>
+#include <halp/geometry.hpp>
+#include <halp/meta.hpp>
+#include <halp/texture.hpp>
+
+#include <Gfx/Graph/RenderList.hpp>
+
+namespace Threedim
+{
+
+// Sibling to ExtractBuffer2 (name-based buffer extractor) but for
+// texture auxiliaries. Reads `inputs.geometry.mesh.auxiliary_textures`
+// (populated by the halp/ossia bridge from `ossia::geometry::
+// auxiliary_textures` — which ScenePreprocessor fills with skybox,
+// irradiance_map, prefiltered_map, brdf_lut, shadow_map_array,
+// base_color_array, metal_rough_array, normal_array, emissive_array,
+// *_Dyn0..N, and any producer-injected texture) and re-publishes the
+// named entry on a standalone gpu_texture_output.
+//
+// Runtime-detects the texture shape (2D / TextureArray / Cubemap /
+// 3D) from QRhiTexture::flags() and stamps it into the output port's
+// `kind` field so downstream nodes / shader bindings know how to bind
+// (sampler2D / sampler2DArray / samplerCube / sampler3D). Width,
+// height, and layer-or-depth count come along from pixelSize() /
+// arraySize() / depth().
+//
+// Primary use case: post-processing shaders that depend on scene
+// aux textures without going through the scene cable themselves. E.g.
+// the shaderlib/depth set wants `camera` + `camera_prev` UBOs
+// (extract via ExtractBuffer2) and sometimes a depth-texture aux
+// (this node).
+class ExtractTexture
+{
+public:
+  halp_meta(name, "Extract texture (by name)")
+  halp_meta(category, "Visuals/Utilities")
+  halp_meta(c_name, "extract_texture_by_name")
+  halp_meta(authors, "ossia team")
+  halp_meta(
+      manual_url, "https://ossia.io/score-docs/processes/extract-texture.html")
+  halp_meta(uuid, "4d8f2a6b-7c19-4e05-a3d8-1b6f5e9c2a48")
+
+  struct ins
+  {
+    struct
+    {
+      halp_meta(name, "Geometry");
+      halp::dynamic_gpu_geometry mesh;
+      float transform[16]{};
+      bool dirty_mesh = false;
+      bool dirty_transform = false;
+    } geometry;
+
+    struct : halp::lineedit<"Name", "skybox">
+    {
+      halp_meta(symbol, "name")
+    } name;
+  } inputs;
+
+  struct
+  {
+    halp::gpu_texture_output<"Texture"> texture;
+  } outputs;
+
+  void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res);
+  void update(
+      score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res,
+      score::gfx::Edge* e);
+  void release(score::gfx::RenderList& r);
+  void operator()() { }
+
+private:
+  // Last-known resolved values — used to skip work when nothing changed.
+  void* m_lastHandle{};
+  std::string m_lastName;
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/FbxParser.cpp b/src/plugins/score-plugin-threedim/Threedim/FbxParser.cpp
new file mode 100644
index 0000000000..10a0f2885c
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/FbxParser.cpp
@@ -0,0 +1,1073 @@
+#include "FbxParser.hpp"
+
+#include "TangentUtils.hpp"
+
+#include <ossia/detail/hash.hpp>
+
+#include <ufbx.h>
+
+#include <QQuaternion>
+
+#include <cmath>
+#include <cstring>
+#include <unordered_map>
+
+namespace Threedim
+{
+
+// Transform a position by a ufbx 3x4 matrix (double -> float)
+static void transform_point(
+    const ufbx_matrix& m, const ufbx_vec3& v, float& ox, float& oy, float& oz)
+{
+  ox = float(m.m00 * v.x + m.m01 * v.y + m.m02 * v.z + m.m03);
+  oy = float(m.m10 * v.x + m.m11 * v.y + m.m12 * v.z + m.m13);
+  oz = float(m.m20 * v.x + m.m21 * v.y + m.m22 * v.z + m.m23);
+}
+
+// Transform a direction by a ufbx 3x4 matrix (no translation), then normalize
+static void transform_normal(
+    const ufbx_matrix& m, const ufbx_vec3& v, float& ox, float& oy, float& oz)
+{
+  float rx = float(m.m00 * v.x + m.m01 * v.y + m.m02 * v.z);
+  float ry = float(m.m10 * v.x + m.m11 * v.y + m.m12 * v.z);
+  float rz = float(m.m20 * v.x + m.m21 * v.y + m.m22 * v.z);
+  float len = std::sqrt(rx * rx + ry * ry + rz * rz);
+  if(len > 1e-8f)
+  {
+    float inv = 1.0f / len;
+    ox = rx * inv; oy = ry * inv; oz = rz * inv;
+  }
+  else
+  {
+    ox = 0.0f; oy = 1.0f; oz = 0.0f;
+  }
+}
+
+// =============================================================================
+// Scene extractor — builds FbxParser::m_scene_nodes (hierarchical) using the
+// node's local_transform (NOT pre-transformed to world). Vertex data lives in
+// per-attribute shared buffers owned by ScenePart.
+// =============================================================================
+struct FbxSceneExtractor
+{
+  std::vector<FbxParser::SceneNode>& nodes;
+  std::vector<std::shared_ptr<ossia::material_component>>& materials;
+  std::shared_ptr<ossia::skeleton_component>& skeleton;
+  std::unordered_map<const ufbx_material*, int> material_index;
+  // bone_node → joint index within the global skeleton.
+  std::unordered_map<const ufbx_node*, int> joint_index_of;
+  std::vector<uint32_t> tri_indices;
+
+  // Return the joint index for a ufbx bone node, registering a new entry in
+  // the global skeleton on first sight. Parent chain is resolved later in a
+  // second pass (link_joint_parents).
+  int register_joint(const ufbx_node* bone)
+  {
+    if(!bone)
+      return -1;
+    auto it = joint_index_of.find(bone);
+    if(it != joint_index_of.end())
+      return it->second;
+
+    if(!skeleton)
+      skeleton = std::make_shared<ossia::skeleton_component>();
+
+    ossia::skeleton_joint j;
+    j.name = std::string(bone->name.data, bone->name.length);
+
+    // Local TRS from the bone node itself.
+    const auto& lt = bone->local_transform;
+    j.translation[0] = float(lt.translation.x);
+    j.translation[1] = float(lt.translation.y);
+    j.translation[2] = float(lt.translation.z);
+    j.rotation[0] = float(lt.rotation.x);
+    j.rotation[1] = float(lt.rotation.y);
+    j.rotation[2] = float(lt.rotation.z);
+    j.rotation[3] = float(lt.rotation.w);
+    j.scale[0] = float(lt.scale.x);
+    j.scale[1] = float(lt.scale.y);
+    j.scale[2] = float(lt.scale.z);
+
+    // Parent linked later. Identity IBM as placeholder; cluster fills it in.
+    j.parent_index = -1;
+    for(int k = 0; k < 16; ++k)
+      j.inverse_bind_matrix[k] = (k % 5 == 0) ? 1.f : 0.f;
+
+    const int idx = (int)skeleton->joints.size();
+    skeleton->joints.push_back(j);
+    joint_index_of.emplace(bone, idx);
+    return idx;
+  }
+
+  // After all bones are registered, fill in parent_index for each joint by
+  // walking the ufbx parent chain until we find another registered bone.
+  void link_joint_parents()
+  {
+    if(!skeleton)
+      return;
+    for(auto& [node, idx] : joint_index_of)
+    {
+      const ufbx_node* p = node->parent;
+      while(p)
+      {
+        auto it = joint_index_of.find(p);
+        if(it != joint_index_of.end())
+        {
+          skeleton->joints[idx].parent_index = it->second;
+          break;
+        }
+        p = p->parent;
+      }
+    }
+  }
+
+  // Convert a ufbx_material to a material_component (factors only — Stage 1b).
+  // Returns the index in `materials`, registering it on first sight.
+  int register_material(const ufbx_material* m)
+  {
+    if(!m)
+      return -1;
+    auto it = material_index.find(m);
+    if(it != material_index.end())
+      return it->second;
+
+    auto mc = std::make_shared<ossia::material_component>();
+    mc->tag = std::string(m->name.data, m->name.length);
+
+    // ufbx exposes both classical (Phong/Lambert) and PBR maps. Prefer PBR
+    // values when present; fall back to FBX classical fields otherwise.
+    const auto& pbr = m->pbr;
+    const auto& fbx = m->fbx;
+
+    // Base color
+    if(pbr.base_color.has_value)
+    {
+      mc->base_color_factor[0] = float(pbr.base_color.value_vec4.x);
+      mc->base_color_factor[1] = float(pbr.base_color.value_vec4.y);
+      mc->base_color_factor[2] = float(pbr.base_color.value_vec4.z);
+      mc->base_color_factor[3] = float(pbr.base_color.value_vec4.w);
+    }
+    else if(fbx.diffuse_color.has_value)
+    {
+      mc->base_color_factor[0] = float(fbx.diffuse_color.value_vec3.x);
+      mc->base_color_factor[1] = float(fbx.diffuse_color.value_vec3.y);
+      mc->base_color_factor[2] = float(fbx.diffuse_color.value_vec3.z);
+      mc->base_color_factor[3] = 1.0f;
+    }
+
+    // Apply scalar diffuse factor as multiplier on RGB if present.
+    if(pbr.base_factor.has_value)
+    {
+      const float k = float(pbr.base_factor.value_real);
+      mc->base_color_factor[0] *= k;
+      mc->base_color_factor[1] *= k;
+      mc->base_color_factor[2] *= k;
+    }
+
+    // Metallic / Roughness
+    mc->metallic_factor
+        = pbr.metalness.has_value ? float(pbr.metalness.value_real) : 0.0f;
+    mc->roughness_factor
+        = pbr.roughness.has_value ? float(pbr.roughness.value_real) : 0.5f;
+
+    // Emissive
+    if(pbr.emission_color.has_value)
+    {
+      mc->emissive_factor[0] = float(pbr.emission_color.value_vec3.x);
+      mc->emissive_factor[1] = float(pbr.emission_color.value_vec3.y);
+      mc->emissive_factor[2] = float(pbr.emission_color.value_vec3.z);
+    }
+    else if(fbx.emission_color.has_value)
+    {
+      mc->emissive_factor[0] = float(fbx.emission_color.value_vec3.x);
+      mc->emissive_factor[1] = float(fbx.emission_color.value_vec3.y);
+      mc->emissive_factor[2] = float(fbx.emission_color.value_vec3.z);
+    }
+    mc->emissive_strength = pbr.emission_factor.has_value
+        ? float(pbr.emission_factor.value_real) : 1.0f;
+
+    // Alpha / opacity
+    if(pbr.opacity.has_value)
+    {
+      const float op = float(pbr.opacity.value_real);
+      mc->base_color_factor[3] *= op;
+      if(op < 0.999f)
+        mc->alpha = ossia::alpha_mode::blend;
+    }
+
+    // Material features. Two-sided shading from FBX is uncommon; default false.
+    mc->double_sided = false;
+    mc->unlit = false;
+
+    // Texture extraction. ufbx_material_map.texture (when non-null) carries
+    // either an absolute filename, a relative one (resolved against the FBX
+    // file dir), or an embedded blob (`content`). We populate texture_ref
+    // with `source` so the renderer's TextureCache can lazily upload on the
+    // render thread. The `source` member is never null when a texture is
+    // present, even if the file/blob is later unreadable.
+    auto fill_texture
+        = [](ossia::texture_ref& tr, const ufbx_material_map& map) {
+            if(!map.texture)
+              return;
+            const ufbx_texture* tex = map.texture;
+            auto src = std::make_shared<ossia::texture_source>();
+            // Prefer absolute filename when present (more robust); fall back
+            // to relative + the original "filename" field.
+            if(tex->absolute_filename.length > 0)
+              src->file_path = std::string(
+                  tex->absolute_filename.data, tex->absolute_filename.length);
+            else if(tex->filename.length > 0)
+              src->file_path = std::string(tex->filename.data, tex->filename.length);
+            else if(tex->relative_filename.length > 0)
+              src->file_path = std::string(
+                  tex->relative_filename.data, tex->relative_filename.length);
+
+            if(tex->content.size > 0)
+            {
+              auto blob = std::make_shared<std::vector<uint8_t>>(
+                  reinterpret_cast<const uint8_t*>(tex->content.data),
+                  reinterpret_cast<const uint8_t*>(tex->content.data) + tex->content.size);
+              src->embedded_data = blob;
+              // ufbx exposes the file extension via the texture name path —
+              // best-effort sniff for a MIME hint. The TextureLoader uses
+              // QImage::loadFromData with this hint and falls back to header
+              // sniffing when empty/wrong.
+              auto ext_hint = [&](std::string_view path) -> std::string {
+                auto dot = path.rfind('.');
+                if(dot == std::string_view::npos)
+                  return {};
+                std::string e(path.substr(dot + 1));
+                for(auto& c : e) c = (char)std::tolower((unsigned char)c);
+                if(e == "jpg" || e == "jpeg") return "image/jpeg";
+                if(e == "png")               return "image/png";
+                if(e == "tga")               return "image/tga";
+                if(e == "tif" || e == "tiff") return "image/tiff";
+                if(e == "bmp")               return "image/bmp";
+                return {};
+              };
+              src->mime_type = ext_hint(src->file_path);
+            }
+
+            // Plan 09 S1: stamp the content hash so the preprocessor's
+            // decode cache (Gfx::AssetTable) can skip re-decoding the
+            // same image across multiple outputs / scene reloads.
+            // Prefer embedded bytes (authoritative) over path (stable
+            // fallback when the file is an external reference).
+            if(src->embedded_data && !src->embedded_data->empty())
+            {
+              src->content_hash = ossia::hash_bytes(
+                  src->embedded_data->data(),
+                  src->embedded_data->size());
+            }
+            else if(!src->file_path.empty())
+            {
+              src->content_hash = ossia::hash_bytes(
+                  src->file_path.data(), src->file_path.size());
+            }
+
+            tr.source = std::move(src);
+            tr.texcoord_set = 0;
+          };
+
+    fill_texture(mc->base_color_texture,
+                 pbr.base_color.texture ? pbr.base_color : fbx.diffuse_color);
+    fill_texture(mc->metallic_roughness_texture, pbr.metalness);
+    fill_texture(mc->normal_texture,
+                 pbr.normal_map.texture ? pbr.normal_map : fbx.normal_map);
+    fill_texture(mc->occlusion_texture, pbr.ambient_occlusion);
+    fill_texture(mc->emissive_texture,
+                 pbr.emission_color.texture ? pbr.emission_color : fbx.emission_color);
+
+    // --- OpenPBR / Arnold StandardSurface extensions --------------------
+    // ufbx exposes the full Arnold-family PBR parameter set (coat / sheen
+    // / transmission / subsurface / thin-film / anisotropic specular) on
+    // ufbx_material_pbr_maps — the same fields OpenPBR aggregates under
+    // its coat / fuzz / transmission / subsurface / thin-film lobes. The
+    // FBX PBR extension (Autodesk Standard Surface) is the predecessor of
+    // OpenPBR, so the mapping is 1:1 name-wise.
+    //
+    // Each `ufbx_material_map.has_value` tells us whether the DCC
+    // actually wrote that channel; if not we leave the material_component
+    // field at its spec default.
+
+    auto scalar = [](const ufbx_material_map& map, float fallback) -> float {
+      return map.has_value ? float(map.value_real) : fallback;
+    };
+    auto color3 = [](const ufbx_material_map& map, float (&out)[3],
+                     float fx, float fy, float fz) {
+      if(map.has_value)
+      {
+        out[0] = float(map.value_vec3.x);
+        out[1] = float(map.value_vec3.y);
+        out[2] = float(map.value_vec3.z);
+      }
+      else
+      {
+        out[0] = fx; out[1] = fy; out[2] = fz;
+      }
+    };
+
+    // Coat (KHR_materials_clearcoat equivalent).
+    mc->clearcoat.factor = scalar(pbr.coat_factor, 0.0f);
+    mc->clearcoat.roughness_factor = scalar(pbr.coat_roughness, 0.0f);
+    fill_texture(mc->clearcoat.texture,           pbr.coat_factor);
+    fill_texture(mc->clearcoat.roughness_texture, pbr.coat_roughness);
+    fill_texture(mc->clearcoat.normal_texture,    pbr.coat_normal);
+
+    // Sheen (fuzz in OpenPBR; KHR_materials_sheen).
+    mc->sheen.roughness_factor = scalar(pbr.sheen_roughness, 0.0f);
+    color3(pbr.sheen_color, mc->sheen.color_factor, 0.f, 0.f, 0.f);
+    fill_texture(mc->sheen.color_texture,     pbr.sheen_color);
+    fill_texture(mc->sheen.roughness_texture, pbr.sheen_roughness);
+
+    // Transmission (KHR_materials_transmission). The FBX path tracks
+    // thick-walled volume via transmission_depth / scatter / dispersion
+    // which we don't carry yet on material_component (see usd-openpbr
+    // analysis — volume-depth / scatter / dispersion are listed as the
+    // missing fields for full OpenPBR coverage).
+    mc->transmission.factor = scalar(pbr.transmission_factor, 0.0f);
+    fill_texture(mc->transmission.texture, pbr.transmission_factor);
+
+    // Volume (KHR_materials_volume) — attenuation color ≈ transmission_color.
+    // ufbx has no direct thicknessFactor; infer from transmission_depth.
+    mc->volume.thickness_factor = scalar(pbr.transmission_depth, 0.0f);
+    color3(
+        pbr.transmission_color, mc->volume.attenuation_color, 1.f, 1.f, 1.f);
+
+    // Specular (KHR_materials_specular) — Arnold specular_factor +
+    // specular_color; anisotropy separately.
+    mc->specular.factor = scalar(pbr.specular_factor, 1.0f);
+    color3(pbr.specular_color, mc->specular.color_factor, 1.f, 1.f, 1.f);
+    fill_texture(mc->specular.texture,       pbr.specular_factor);
+    fill_texture(mc->specular.color_texture, pbr.specular_color);
+
+    // IOR (KHR_materials_ior). Falls back to the spec default 1.5 when
+    // the FBX didn't write one.
+    mc->ior = scalar(pbr.specular_ior, 1.5f);
+
+    // Anisotropy (KHR_materials_anisotropy). ufbx splits anisotropy
+    // magnitude (specular_anisotropy) and rotation (specular_rotation).
+    mc->anisotropy.strength = scalar(pbr.specular_anisotropy, 0.0f);
+    mc->anisotropy.rotation = scalar(pbr.specular_rotation, 0.0f);
+    fill_texture(mc->anisotropy.texture, pbr.specular_anisotropy);
+
+    // Iridescence (KHR_materials_iridescence). ufbx's thin_film_*
+    // covers the same physics; min == max when ufbx provides only a
+    // single thickness value.
+    mc->iridescence.factor = scalar(pbr.thin_film_factor, 0.0f);
+    const float tf_thickness = scalar(pbr.thin_film_thickness, 400.0f);
+    mc->iridescence.thickness_min = tf_thickness;
+    mc->iridescence.thickness_max = tf_thickness;
+    mc->iridescence.ior = scalar(pbr.thin_film_ior, 1.3f);
+    fill_texture(mc->iridescence.texture, pbr.thin_film_factor);
+
+    // Subsurface as diffuse_transmission approximation. OpenPBR-style
+    // subsurface fields (weight / color / radius) aren't on our
+    // material_component yet, but we map the scalar factor +
+    // subsurface_color into diffuse_transmission as the closest
+    // available representation so the glTF-side KHR_materials_diffuse_
+    // transmission and FBX-side subsurface_factor land in the same slot.
+    mc->diffuse_transmission.factor = scalar(pbr.subsurface_factor, 0.0f);
+    color3(
+        pbr.subsurface_color, mc->diffuse_transmission.color_factor,
+        1.f, 1.f, 1.f);
+    fill_texture(mc->diffuse_transmission.texture,       pbr.subsurface_factor);
+    fill_texture(mc->diffuse_transmission.color_texture, pbr.subsurface_color);
+
+    // Thin-walled flag — Arnold exposes this as a material feature on
+    // the FBX side; mirror it to material_component for consumer
+    // shaders that want to switch back-side transmission on / off.
+    if(m->features.thin_walled.enabled)
+    {
+      // No dedicated `thin_walled` bool on material_component today;
+      // surface it via the generic property map so downstream shaders
+      // can opt-in. Key kept stable to match OpenPBR_ResolvedInputs
+      // field name.
+      mc->properties["thin_walled"] = true;
+    }
+
+    // Stable id — deterministic within this FBX load (keyed on the ufbx
+    // material's element_id when available, else the running index).
+    // Re-reads of the same asset may still mint different ids, but
+    // within-session fingerprinting stays pointer-independent.
+    mc->stable_id = (m && m->element.element_id)
+                        ? (uint64_t)m->element.element_id
+                        : ossia::mint_stable_id();
+    const int idx = (int)materials.size();
+    materials.push_back(mc);
+    material_index.emplace(m, idx);
+    return idx;
+  }
+
+  // Pull a single attribute stream into a freshly-allocated shared buffer.
+  // `floats_per_vertex` controls stride. The lambda is called per vertex with
+  // (dst_floats, source_index_in_mesh).
+  template <typename Read>
+  static std::shared_ptr<std::vector<float>> extract_attribute(
+      const ufbx_mesh* umesh, const ufbx_mesh_part& part,
+      int floats_per_vertex, std::vector<uint32_t>& tris,
+      Read&& read)
+  {
+    const int64_t num_verts = int64_t(part.num_triangles) * 3;
+    auto out = std::make_shared<std::vector<float>>(size_t(num_verts) * floats_per_vertex);
+    float* dst = out->data();
+    for(size_t fi = 0; fi < part.num_faces; fi++)
+    {
+      const uint32_t face_idx = part.face_indices.data[fi];
+      const ufbx_face face = umesh->faces.data[face_idx];
+      tris.resize(face.num_indices * 3);
+      uint32_t num_tris = ufbx_triangulate_face(tris.data(), tris.size(), umesh, face);
+      for(uint32_t ti = 0; ti < num_tris; ti++)
+      {
+        for(int vi = 0; vi < 3; vi++)
+        {
+          uint32_t idx = tris[ti * 3 + vi];
+          read(dst, idx);
+          dst += floats_per_vertex;
+        }
+      }
+    }
+    return out;
+  }
+
+  // Build a ScenePart for one (mesh, material_part) pair. Vertex data is in
+  // mesh-local space — node hierarchy carries the transform.
+  FbxParser::ScenePart extract_part(
+      const ufbx_node* node, const ufbx_mesh* umesh,
+      const ufbx_mesh_part& part)
+  {
+    FbxParser::ScenePart sp;
+    sp.vertex_count = uint32_t(part.num_triangles) * 3;
+    if(sp.vertex_count == 0)
+      return sp;
+
+    const bool has_normals = umesh->vertex_normal.exists;
+    const bool has_uv = umesh->vertex_uv.exists;
+    const bool has_colors = umesh->vertex_color.exists;
+    const bool has_tangents = umesh->vertex_tangent.exists;
+
+    sp.positions = extract_attribute(
+        umesh, part, 3, tri_indices, [umesh](float* dst, uint32_t idx) {
+          ufbx_vec3 p = umesh->vertex_position.values.data[
+              umesh->vertex_position.indices.data[idx]];
+          dst[0] = float(p.x); dst[1] = float(p.y); dst[2] = float(p.z);
+        });
+    // Local-space AABB for per-draw GPU culling. Walk the just-extracted
+    // positions once. ~10 ns/vertex — negligible at load time.
+    if(sp.positions && !sp.positions->empty())
+      sp.bounds = ossia::compute_aabb_from_positions(
+          sp.positions->data(), sp.vertex_count);
+
+    if(has_normals)
+    {
+      sp.normals = extract_attribute(
+          umesh, part, 3, tri_indices, [umesh](float* dst, uint32_t idx) {
+            ufbx_vec3 n = umesh->vertex_normal.values.data[
+                umesh->vertex_normal.indices.data[idx]];
+            float len = float(std::sqrt(n.x * n.x + n.y * n.y + n.z * n.z));
+            if(len > 1e-8f)
+            {
+              float inv = 1.f / len;
+              dst[0] = float(n.x) * inv;
+              dst[1] = float(n.y) * inv;
+              dst[2] = float(n.z) * inv;
+            }
+            else
+            {
+              dst[0] = 0.f; dst[1] = 1.f; dst[2] = 0.f;
+            }
+          });
+    }
+
+    if(has_uv)
+    {
+      sp.texcoords = extract_attribute(
+          umesh, part, 2, tri_indices, [umesh](float* dst, uint32_t idx) {
+            ufbx_vec2 uv = umesh->vertex_uv.values.data[
+                umesh->vertex_uv.indices.data[idx]];
+            dst[0] = float(uv.x); dst[1] = float(uv.y);
+          });
+    }
+
+    if(has_colors)
+    {
+      sp.colors = extract_attribute(
+          umesh, part, 4, tri_indices, [umesh](float* dst, uint32_t idx) {
+            ufbx_vec4 c = umesh->vertex_color.values.data[
+                umesh->vertex_color.indices.data[idx]];
+            dst[0] = float(c.x); dst[1] = float(c.y);
+            dst[2] = float(c.z); dst[3] = float(c.w);
+          });
+    }
+
+    if(has_tangents)
+    {
+      sp.tangents = extract_attribute(
+          umesh, part, 4, tri_indices, [umesh](float* dst, uint32_t idx) {
+            ufbx_vec3 t = umesh->vertex_tangent.values.data[
+                umesh->vertex_tangent.indices.data[idx]];
+            float len = float(std::sqrt(t.x * t.x + t.y * t.y + t.z * t.z));
+            if(len > 1e-8f)
+            {
+              float inv = 1.f / len;
+              dst[0] = float(t.x) * inv;
+              dst[1] = float(t.y) * inv;
+              dst[2] = float(t.z) * inv;
+            }
+            else
+            {
+              dst[0] = 1.f; dst[1] = 0.f; dst[2] = 0.f;
+            }
+            // Compute handedness from bitangent if present
+            if(umesh->vertex_bitangent.exists)
+            {
+              ufbx_vec3 n = umesh->vertex_normal.values.data[
+                  umesh->vertex_normal.indices.data[idx]];
+              ufbx_vec3 b = umesh->vertex_bitangent.values.data[
+                  umesh->vertex_bitangent.indices.data[idx]];
+              float cx = float(n.y * t.z - n.z * t.y);
+              float cy = float(n.z * t.x - n.x * t.z);
+              float cz = float(n.x * t.y - n.y * t.x);
+              float d = cx * float(b.x) + cy * float(b.y) + cz * float(b.z);
+              dst[3] = d < 0.f ? -1.f : 1.f;
+            }
+            else
+            {
+              dst[3] = 1.f;
+            }
+          });
+    }
+    else if(has_normals && has_uv)
+    {
+      // FBX mesh has no TANGENT channel — synthesize tangents from
+      // position / normal / UV via mikktspace so normal maps work.
+      // Extracted attributes here are already triangle-unindexed
+      // (each triangle has 3 unique vertices), so no index buffer is
+      // needed and mikktspace's contract is satisfied naturally.
+      sp.tangents = Threedim::generate_tangents_mikktspace(
+          sp.positions, sp.normals, sp.texcoords,
+          /*indices=*/nullptr, sp.vertex_count);
+    }
+
+    // Skinning: if the mesh has a skin deformer, pull top-4 (cluster, weight)
+    // pairs per vertex. ufbx sorts weights descending, so we can truncate to
+    // 4 safely. Joint indices map through register_joint into the global
+    // skeleton. The per-triangle expansion mirrors the position walk: one
+    // output entry per (face_index, triangulated_vertex).
+    if(umesh->skin_deformers.count > 0)
+    {
+      const ufbx_skin_deformer* skin = umesh->skin_deformers.data[0];
+
+      // Register all clusters' bones up front so register_joint is a plain
+      // lookup in the hot per-vertex loop below.
+      std::vector<int> cluster_to_joint(skin->clusters.count, -1);
+      for(size_t ci = 0; ci < skin->clusters.count; ci++)
+      {
+        const ufbx_skin_cluster* cl = skin->clusters.data[ci];
+        if(!cl || !cl->bone_node)
+          continue;
+        int j = register_joint(cl->bone_node);
+        cluster_to_joint[ci] = j;
+
+        // The cluster's geometry_to_bone IS the inverse-bind matrix (glTF
+        // convention): vertices in geometry-local space → bone-local. Store
+        // as column-major 4x4 (ufbx_matrix is row-major 3x4; we transpose).
+        const ufbx_matrix& m = cl->geometry_to_bone;
+        float* ibm = skeleton->joints[j].inverse_bind_matrix;
+        // Column 0: (m00, m10, m20, 0), col 1, col 2, col 3 (translation)
+        ibm[0] = float(m.m00); ibm[1] = float(m.m10); ibm[2] = float(m.m20); ibm[3] = 0.f;
+        ibm[4] = float(m.m01); ibm[5] = float(m.m11); ibm[6] = float(m.m21); ibm[7] = 0.f;
+        ibm[8] = float(m.m02); ibm[9] = float(m.m12); ibm[10] = float(m.m22); ibm[11] = 0.f;
+        ibm[12] = float(m.m03); ibm[13] = float(m.m13); ibm[14] = float(m.m23); ibm[15] = 1.f;
+      }
+
+      // Allocate joints0/weights0 per-triangle-vertex buffers. ufbx indexes
+      // skin_vertices by the base vertex (not the triangulated index), so
+      // we resolve via umesh->vertex_position.indices — same pattern as the
+      // attribute extraction above.
+      const int64_t num_verts = int64_t(part.num_triangles) * 3;
+      auto joints_buf = std::make_shared<std::vector<uint16_t>>(size_t(num_verts) * 4);
+      auto weights_buf = std::make_shared<std::vector<float>>(size_t(num_verts) * 4);
+      uint16_t* jdst = joints_buf->data();
+      float*    wdst = weights_buf->data();
+
+      for(size_t fi = 0; fi < part.num_faces; fi++)
+      {
+        const uint32_t face_idx = part.face_indices.data[fi];
+        const ufbx_face face = umesh->faces.data[face_idx];
+        tri_indices.resize(face.num_indices * 3);
+        uint32_t num_tris = ufbx_triangulate_face(
+            tri_indices.data(), tri_indices.size(), umesh, face);
+        for(uint32_t ti = 0; ti < num_tris; ti++)
+        {
+          for(int vi = 0; vi < 3; vi++)
+          {
+            uint32_t idx = tri_indices[ti * 3 + vi];
+            uint32_t base_vtx = umesh->vertex_position.indices.data[idx];
+            const ufbx_skin_vertex sv = skin->vertices.data[base_vtx];
+
+            // Pick up to 4 weights (already sorted descending by weight).
+            float w[4] = {0, 0, 0, 0};
+            uint16_t j[4] = {0, 0, 0, 0};
+            const uint32_t n = std::min<uint32_t>(sv.num_weights, 4);
+            for(uint32_t k = 0; k < n; ++k)
+            {
+              const ufbx_skin_weight sw = skin->weights.data[sv.weight_begin + k];
+              if(sw.cluster_index < cluster_to_joint.size()
+                 && cluster_to_joint[sw.cluster_index] >= 0)
+              {
+                j[k] = uint16_t(cluster_to_joint[sw.cluster_index]);
+                w[k] = float(sw.weight);
+              }
+            }
+            // Renormalise — ufbx doesn't guarantee the top-4 sum to 1.
+            float sum = w[0] + w[1] + w[2] + w[3];
+            if(sum > 1e-6f)
+            {
+              float inv = 1.f / sum;
+              w[0] *= inv; w[1] *= inv; w[2] *= inv; w[3] *= inv;
+            }
+            jdst[0] = j[0]; jdst[1] = j[1]; jdst[2] = j[2]; jdst[3] = j[3];
+            wdst[0] = w[0]; wdst[1] = w[1]; wdst[2] = w[2]; wdst[3] = w[3];
+            jdst += 4;
+            wdst += 4;
+          }
+        }
+      }
+
+      sp.joints0 = std::move(joints_buf);
+      sp.weights0 = std::move(weights_buf);
+      sp.skin_joint_count = int(skeleton ? skeleton->joints.size() : 0);
+    }
+
+    // Material assignment — prefer the per-instance node->materials list
+    // (FBX allows different node instances to override mesh materials), fall
+    // back to the mesh's own materials list, then to part.material.
+    const ufbx_material* mat = nullptr;
+    if(part.index < node->materials.count)
+      mat = node->materials.data[part.index];
+    if(!mat && part.index < umesh->materials.count)
+      mat = umesh->materials.data[part.index];
+    sp.material_index = register_material(mat);
+
+    return sp;
+  }
+
+  // Convert a ufbx_light to a populated light_component. Caller takes
+  // ownership. Returns nullptr if the light isn't representable (e.g. ufbx
+  // VOLUME type).
+  static std::shared_ptr<ossia::light_component> to_light(const ufbx_light* l)
+  {
+    if(!l)
+      return {};
+    auto lc = std::make_shared<ossia::light_component>();
+    switch(l->type)
+    {
+      case UFBX_LIGHT_DIRECTIONAL:
+        lc->type = ossia::light_type::directional; break;
+      case UFBX_LIGHT_POINT:
+        lc->type = ossia::light_type::point; break;
+      case UFBX_LIGHT_SPOT:
+        lc->type = ossia::light_type::spot; break;
+      case UFBX_LIGHT_AREA:
+        // ufbx exposes either rectangle or sphere area shape; map the common
+        // rect case, fall back to disk for sphere (close enough at v1).
+        lc->type = (l->area_shape == UFBX_LIGHT_AREA_SHAPE_RECTANGLE)
+            ? ossia::light_type::rect_area
+            : ossia::light_type::sphere_area;
+        break;
+      default: // UFBX_LIGHT_VOLUME and any future types — skip.
+        return {};
+    }
+    switch(l->decay)
+    {
+      case UFBX_LIGHT_DECAY_NONE:      lc->decay = ossia::light_decay::none; break;
+      case UFBX_LIGHT_DECAY_LINEAR:    lc->decay = ossia::light_decay::linear; break;
+      case UFBX_LIGHT_DECAY_QUADRATIC: lc->decay = ossia::light_decay::quadratic; break;
+      case UFBX_LIGHT_DECAY_CUBIC:     lc->decay = ossia::light_decay::cubic; break;
+      default: break;
+    }
+    lc->color[0] = float(l->color.x);
+    lc->color[1] = float(l->color.y);
+    lc->color[2] = float(l->color.z);
+    lc->intensity = float(l->intensity);
+    lc->inner_cone_angle = float(l->inner_angle) * float(M_PI) / 180.f;
+    lc->outer_cone_angle = float(l->outer_angle) * float(M_PI) / 180.f;
+    lc->shadow.enabled = l->cast_shadows;
+
+    // Range: FBX doesn't expose falloff distance as a first-class
+    // ufbx_light field, but the underlying FBX property `FarAttenuationEnd`
+    // (the distance past which the light contributes nothing) maps
+    // cleanly onto score's `range`. 0 = infinite, which is the ossia
+    // light_component convention for "no cutoff."  Read via the generic
+    // props accessor since ufbx pins it in `l->props`, not in the
+    // ufbx_light struct fields.
+    lc->range = float(ufbx_find_real(&l->props, "FarAttenuationEnd", 0.0));
+
+    // Area-light dimensions: FBX has no standard area_width / area_height
+    // fields in ufbx_light. Authoring tools encode area size through
+    // the node's own scale; we leave lc->width / height / radius at
+    // their defaults and let a future shader-side area sampler derive
+    // effective dimensions from the node transform when needed.
+
+    // `l->cast_light` (bool) is the "is this light emitting at all"
+    // gate in FBX. ossia::light_component has no direct equivalent —
+    // a disabled light would be culled upstream (scene_filter by visibility
+    // or a dedicated filter). Dropping a non-emitting light here keeps
+    // the RawLight arena from accumulating dead slots.
+    if(!l->cast_light)
+      return {};
+
+    return lc;
+  }
+
+  // Convert a ufbx_camera to a camera_component. Field-of-view in ufbx is
+  // degrees (vertical for "horizontal" axis); ossia stores radians.
+  static std::shared_ptr<ossia::camera_component> to_camera(const ufbx_camera* c)
+  {
+    if(!c)
+      return {};
+    auto cc = std::make_shared<ossia::camera_component>();
+    cc->projection = (c->projection_mode == UFBX_PROJECTION_MODE_ORTHOGRAPHIC)
+        ? ossia::camera_projection::orthographic
+        : ossia::camera_projection::perspective;
+    cc->yfov = float(c->field_of_view_deg.y) * float(M_PI) / 180.f;
+    cc->aspect_ratio = float(c->aspect_ratio > 0 ? c->aspect_ratio : 1.0);
+    cc->xmag = float(c->orthographic_size.x);
+    cc->ymag = float(c->orthographic_size.y);
+    cc->znear = float(c->near_plane);
+    cc->zfar  = float(c->far_plane);
+    cc->physical.focal_length = float(c->focal_length_mm);
+    cc->physical.horizontal_aperture = float(c->aperture_size_inch.x * 25.4);
+    cc->physical.vertical_aperture   = float(c->aperture_size_inch.y * 25.4);
+    return cc;
+  }
+
+  void extract_node(const ufbx_node* node, int parent_index)
+  {
+    FbxParser::SceneNode sn;
+    sn.name = std::string(node->name.data, node->name.length);
+    sn.parent_index = parent_index;
+    sn.light = to_light(node->light);
+    sn.camera = to_camera(node->camera);
+
+    // Decompose local_transform — ufbx already gives us TRS.
+    const auto& lt = node->local_transform;
+    sn.local_transform.translation[0] = float(lt.translation.x);
+    sn.local_transform.translation[1] = float(lt.translation.y);
+    sn.local_transform.translation[2] = float(lt.translation.z);
+    sn.local_transform.rotation[0] = float(lt.rotation.x);
+    sn.local_transform.rotation[1] = float(lt.rotation.y);
+    sn.local_transform.rotation[2] = float(lt.rotation.z);
+    sn.local_transform.rotation[3] = float(lt.rotation.w);
+    sn.local_transform.scale[0] = float(lt.scale.x);
+    sn.local_transform.scale[1] = float(lt.scale.y);
+    sn.local_transform.scale[2] = float(lt.scale.z);
+
+    // Extract mesh parts if this node holds a mesh.
+    if(node->mesh)
+    {
+      const ufbx_mesh* umesh = node->mesh;
+      if(umesh->material_parts.count > 0)
+      {
+        for(size_t pi = 0; pi < umesh->material_parts.count; pi++)
+        {
+          auto sp = extract_part(node, umesh, umesh->material_parts.data[pi]);
+          if(sp.vertex_count > 0)
+            sn.parts.push_back(std::move(sp));
+        }
+      }
+      else
+      {
+        ufbx_mesh_part whole{};
+        whole.num_faces = umesh->num_faces;
+        whole.num_triangles = umesh->num_triangles;
+        std::vector<uint32_t> all_faces(umesh->num_faces);
+        for(size_t i = 0; i < umesh->num_faces; i++)
+          all_faces[i] = uint32_t(i);
+        whole.face_indices.data = all_faces.data();
+        whole.face_indices.count = all_faces.size();
+        auto sp = extract_part(node, umesh, whole);
+        if(sp.vertex_count > 0)
+          sn.parts.push_back(std::move(sp));
+      }
+    }
+
+    const int self_index = (int)nodes.size();
+    nodes.push_back(std::move(sn));
+
+    // Recurse into children.
+    for(size_t ci = 0; ci < node->children.count; ci++)
+      extract_node(node->children.data[ci], self_index);
+  }
+
+  void extract_scene(const ufbx_scene* scene)
+  {
+    // Skip the synthetic root node; emit its children as actual roots.
+    if(!scene->root_node)
+      return;
+    for(size_t ci = 0; ci < scene->root_node->children.count; ci++)
+      extract_node(scene->root_node->children.data[ci], -1);
+  }
+};
+
+// =============================================================================
+// rebuild_scene — walk m_scene_nodes, build hierarchical scene_spec with
+// mesh_primitive[] (modern path; ScenePreprocessor handles both this and the
+// legacy_geometry path).
+// =============================================================================
+
+// Wrap a per-attribute float buffer as a buffer_resource_ptr suitable for
+// mesh_primitive::vertex_buffers. The data lifetime is held by the shared
+// pointer aliasing — no extra copy.
+static ossia::buffer_resource_ptr make_buffer_resource(
+    std::shared_ptr<std::vector<float>> floats)
+{
+  if(!floats || floats->empty())
+    return {};
+  auto br = std::make_shared<ossia::buffer_resource>();
+  ossia::buffer_data bd;
+  // Aliasing constructor: the resulting shared_ptr keeps `floats` alive but
+  // exposes a `const void*` pointing at the contiguous data.
+  bd.data = std::shared_ptr<const void>(floats, floats->data());
+  bd.byte_size = int64_t(floats->size() * sizeof(float));
+  bd.usage_hint = ossia::buffer_data::usage::vertex_buffer;
+  br->resource = std::move(bd);
+  br->dirty_index = 1;
+  return br;
+}
+
+// Build one mesh_primitive from a ScenePart. Each present attribute lives in
+// its own buffer (one buffer_index per attribute, one binding per attribute).
+static ossia::mesh_primitive part_to_primitive(
+    const FbxParser::ScenePart& part,
+    const std::vector<std::shared_ptr<ossia::material_component>>& mats)
+{
+  ossia::mesh_primitive mp;
+  mp.stable_id = ossia::mint_stable_id();
+  mp.topology = ossia::primitive_topology::triangles;
+  mp.index_type = ossia::index_format::none;
+  mp.vertex_count = part.vertex_count;
+  mp.index_count = 0;
+  mp.first_vertex = 0;
+  mp.first_index = 0;
+  mp.vertex_offset = 0;
+  mp.bounds = part.bounds;
+  if(part.material_index >= 0
+     && std::size_t(part.material_index) < mats.size())
+    mp.material = mats[part.material_index];
+
+  uint32_t buffer_idx = 0;
+  auto add = [&](std::shared_ptr<std::vector<float>> data, int floats_per_vertex,
+                 ossia::attribute_semantic sem, ossia::vertex_format fmt) {
+    if(!data || data->empty())
+      return;
+    mp.vertex_buffers.push_back(make_buffer_resource(std::move(data)));
+    ossia::vertex_attribute attr;
+    attr.semantic = sem;
+    attr.format = fmt;
+    attr.buffer_index = buffer_idx;
+    attr.byte_offset = 0;
+    attr.byte_stride = uint32_t(floats_per_vertex) * sizeof(float);
+    attr.rate = ossia::vertex_attribute::input_rate::per_vertex;
+    mp.attributes.push_back(attr);
+    ++buffer_idx;
+  };
+
+  add(part.positions, 3,
+      ossia::attribute_semantic::position, ossia::vertex_format::float3);
+  add(part.normals, 3,
+      ossia::attribute_semantic::normal, ossia::vertex_format::float3);
+  add(part.texcoords, 2,
+      ossia::attribute_semantic::texcoord0, ossia::vertex_format::float2);
+  add(part.colors, 4,
+      ossia::attribute_semantic::color0, ossia::vertex_format::float4);
+  add(part.tangents, 4,
+      ossia::attribute_semantic::tangent, ossia::vertex_format::float4);
+
+  // Skinning attributes. joints0 is uint16x4 (halves per-vertex storage vs
+  // uint32x4); weights0 is float4. Only emitted when the mesh has skinning.
+  if(part.joints0 && !part.joints0->empty())
+  {
+    auto joint_br = std::make_shared<ossia::buffer_resource>();
+    ossia::buffer_data bd;
+    bd.data = std::shared_ptr<const void>(part.joints0, part.joints0->data());
+    bd.byte_size = int64_t(part.joints0->size() * sizeof(uint16_t));
+    bd.usage_hint = ossia::buffer_data::usage::vertex_buffer;
+    joint_br->resource = std::move(bd);
+    joint_br->dirty_index = 1;
+    mp.vertex_buffers.push_back(joint_br);
+
+    ossia::vertex_attribute attr;
+    attr.semantic = ossia::attribute_semantic::joints0;
+    attr.format = ossia::vertex_format::uint16x4;
+    attr.buffer_index = buffer_idx++;
+    attr.byte_offset = 0;
+    attr.byte_stride = 4 * sizeof(uint16_t);
+    attr.rate = ossia::vertex_attribute::input_rate::per_vertex;
+    mp.attributes.push_back(attr);
+  }
+  if(part.weights0 && !part.weights0->empty())
+  {
+    mp.vertex_buffers.push_back(make_buffer_resource(part.weights0));
+    ossia::vertex_attribute attr;
+    attr.semantic = ossia::attribute_semantic::weights0;
+    attr.format = ossia::vertex_format::float4;
+    attr.buffer_index = buffer_idx++;
+    attr.byte_offset = 0;
+    attr.byte_stride = 4 * sizeof(float);
+    attr.rate = ossia::vertex_attribute::input_rate::per_vertex;
+    mp.attributes.push_back(attr);
+  }
+
+  return mp;
+}
+
+void FbxParser::rebuild_scene()
+{
+  if(m_scene_nodes.empty())
+    return;
+
+  // Allocate scene_node + children list shells in flat arrays first, then
+  // wire children using parent_index. Two-pass keeps the code simple and
+  // avoids any std::shared_ptr<scene_node> circular-ownership concerns.
+  const std::size_t N = m_scene_nodes.size();
+  std::vector<std::shared_ptr<ossia::scene_node>> nodes;
+  std::vector<std::shared_ptr<std::vector<ossia::scene_payload>>> children_lists;
+  nodes.reserve(N);
+  children_lists.reserve(N);
+  for(std::size_t i = 0; i < N; ++i)
+  {
+    auto n = std::make_shared<ossia::scene_node>();
+    n->name = m_scene_nodes[i].name;
+    n->visible = true;
+    nodes.push_back(std::move(n));
+    children_lists.push_back(
+        std::make_shared<std::vector<ossia::scene_payload>>());
+  }
+
+  // Per-node payload list: first the local transform (so it applies to all
+  // subsequent siblings, matching FlattenVisitor's convention), then the
+  // mesh_component (if any). Child nodes are pushed in the second pass.
+  for(std::size_t i = 0; i < N; ++i)
+  {
+    auto& src = m_scene_nodes[i];
+    auto& lst = *children_lists[i];
+
+    lst.push_back(src.local_transform);
+
+    if(!src.parts.empty())
+    {
+      auto mc = std::make_shared<ossia::mesh_component>();
+      mc->primitives.reserve(src.parts.size());
+      bool any_skinned = false;
+      for(const auto& part : src.parts)
+      {
+        mc->primitives.push_back(part_to_primitive(part, m_materials));
+        if(part.skin_joint_count > 0)
+          any_skinned = true;
+      }
+      // Attach the global skeleton when any part of this mesh is skinned.
+      if(any_skinned && m_skeleton)
+        mc->skin = ossia::skeleton_component_ptr(m_skeleton);
+      mc->dirty_index = 1;
+      lst.push_back(ossia::mesh_component_ptr(std::move(mc)));
+    }
+    if(src.light)
+      lst.push_back(ossia::light_component_ptr(src.light));
+    if(src.camera)
+      lst.push_back(ossia::camera_component_ptr(src.camera));
+  }
+
+  // Wire children (parent_index references earlier entries).
+  for(std::size_t i = 0; i < N; ++i)
+  {
+    int p = m_scene_nodes[i].parent_index;
+    if(p >= 0 && p < int(N))
+      children_lists[p]->push_back(ossia::scene_node_ptr(nodes[i]));
+  }
+  for(std::size_t i = 0; i < N; ++i)
+    nodes[i]->children = children_lists[i];
+
+  auto roots = std::make_shared<std::vector<ossia::scene_node_ptr>>();
+  for(std::size_t i = 0; i < N; ++i)
+    if(m_scene_nodes[i].parent_index < 0)
+      roots->push_back(ossia::scene_node_ptr(nodes[i]));
+
+  // Materials: publish the registered list. Const conversion happens via
+  // material_component_ptr (shared_ptr<const material_component>).
+  auto mat_list = std::make_shared<std::vector<ossia::material_component_ptr>>();
+  mat_list->reserve(m_materials.size());
+  for(auto& m : m_materials)
+    mat_list->push_back(ossia::material_component_ptr(m));
+
+  auto state = std::make_shared<ossia::scene_state>();
+  state->roots = std::move(roots);
+  state->materials = std::move(mat_list);
+  if(m_skeleton && !m_skeleton->joints.empty())
+  {
+    auto skins = std::make_shared<std::vector<ossia::skeleton_component_ptr>>();
+    skins->push_back(ossia::skeleton_component_ptr(m_skeleton));
+    state->skeletons = std::move(skins);
+  }
+  state->version = 1;
+  state->dirty_index = 1;
+
+  // AssetLoader wraps m_raw_state in a TRS payload externally; we
+  // publish only the raw scene here.
+  m_raw_state = std::move(state);
+}
+
+std::function<void(FbxParser&)> FbxParser::ins::fbx_t::process(file_type tv)
+{
+  if(tv.filename.empty())
+    return {};
+
+  ufbx_load_opts opts{};
+  opts.generate_missing_normals = true;
+  opts.normalize_normals = true;
+  opts.normalize_tangents = true;
+
+  // Convert to OpenGL coordinate system: +X right, +Y up, +Z front (= -Z forward)
+  opts.target_axes.right = UFBX_COORDINATE_AXIS_POSITIVE_X;
+  opts.target_axes.up = UFBX_COORDINATE_AXIS_POSITIVE_Y;
+  opts.target_axes.front = UFBX_COORDINATE_AXIS_POSITIVE_Z;
+  opts.target_unit_meters = 1.0;
+
+  // Bake "geometric transforms" (the non-inherited per-attachment offset) into
+  // the vertex data. This means node->geometry_transform is identity afterward
+  // and the meshes' vertex positions are in the node's local frame — exactly
+  // what we want for the hierarchical scene_spec output.
+  opts.geometry_transform_handling = UFBX_GEOMETRY_TRANSFORM_HANDLING_MODIFY_GEOMETRY;
+  opts.space_conversion = UFBX_SPACE_CONVERSION_ADJUST_TRANSFORMS;
+  opts.use_blender_pbr_material = true;
+
+  ufbx_error error{};
+  ufbx_scene* scene = ufbx_load_file(tv.filename.data(), &opts, &error);
+  if(!scene)
+    return {};
+
+  // Extract hierarchical scene (drives rebuild_scene).
+  std::vector<FbxParser::SceneNode> scene_nodes;
+  std::vector<std::shared_ptr<ossia::material_component>> materials;
+  std::shared_ptr<ossia::skeleton_component> skeleton;
+  FbxSceneExtractor scene_ex{scene_nodes, materials, skeleton, {}, {}, {}};
+  scene_ex.extract_scene(scene);
+  scene_ex.link_joint_parents();
+
+  ufbx_free_scene(scene);
+
+  if(scene_nodes.empty())
+    return {};
+
+  return [scene_nodes = std::move(scene_nodes),
+          materials = std::move(materials),
+          skeleton = std::move(skeleton)](FbxParser& o) mutable {
+    std::swap(o.m_scene_nodes, scene_nodes);
+    std::swap(o.m_materials, materials);
+    o.m_skeleton = std::move(skeleton);
+    o.rebuild_scene();
+  };
+}
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/FbxParser.hpp b/src/plugins/score-plugin-threedim/Threedim/FbxParser.hpp
new file mode 100644
index 0000000000..3cd0bc8349
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/FbxParser.hpp
@@ -0,0 +1,95 @@
+#pragma once
+#include <halp/file_port.hpp>
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <functional>
+#include <memory>
+#include <vector>
+
+namespace Threedim
+{
+
+// Internal FBX parsing class — drives ufbx + builds an ossia::scene_spec
+// out of an FBX file's bytes. Not a halp node in its own right (the
+// user-facing entry point is AssetLoader). AssetLoader calls the static
+// `ins::fbx_t::process` to obtain an apply-lambda, applies it against a
+// throwaway FbxParser instance, then copies out `m_raw_state`.
+class FbxParser
+{
+public:
+  struct ins
+  {
+    struct fbx_t : halp::file_port<"FBX file">
+    {
+      static std::function<void(FbxParser&)> process(file_type data);
+    } fbx;
+  } inputs;
+
+  void rebuild_scene();
+
+  // -- Rich scene staging (drives rebuild_scene) -----------------------------
+  // Built once per `process()` call. Lives on the execution thread; rebuilt
+  // into ossia::scene_spec by rebuild_scene().
+  struct ScenePart
+  {
+    // Per-attribute CPU buffers, one shared_ptr per stream. Each spans
+    // vertex_count elements of the matching format. Empty pointers indicate
+    // the attribute is absent on this part.
+    std::shared_ptr<std::vector<float>> positions;  // 3 floats per vertex (always present)
+    std::shared_ptr<std::vector<float>> normals;    // 3 floats per vertex
+    std::shared_ptr<std::vector<float>> texcoords;  // 2 floats per vertex
+    std::shared_ptr<std::vector<float>> colors;     // 4 floats per vertex (RGBA)
+    std::shared_ptr<std::vector<float>> tangents;   // 4 floats per vertex
+
+    // Skinning: top-4 joints + weights per vertex. joints holds uint16 per
+    // component (4 per vertex); weights holds float (4 per vertex). Both
+    // are populated iff the mesh has a skin deformer.
+    std::shared_ptr<std::vector<uint16_t>> joints0;
+    std::shared_ptr<std::vector<float>>    weights0;
+
+    uint32_t vertex_count{0};
+
+    // Index into FbxParser::m_materials. -1 = no material assigned.
+    int material_index{-1};
+
+    // Index into FbxParser::m_skeleton_joints_*, i.e. how many joints exist
+    // — stored on the ScenePart to propagate skin_index to mesh_component.
+    // 0 = no skin.
+    int skin_joint_count{0};
+
+    // Local-space AABB over `positions`. Computed once by extract_part
+    // (or whoever fills ScenePart) and carried into mesh_primitive by
+    // part_to_primitive. Empty aabb = "not yet computed"; downstream
+    // GPU culling treats empty as infinite.
+    ossia::aabb bounds{};
+  };
+
+  struct SceneNode
+  {
+    std::string name;
+    ossia::scene_transform local_transform;  // node's local TRS
+    int parent_index{-1};                    // index into m_scene_nodes (-1 = root)
+    std::vector<ScenePart> parts;            // 0..N mesh parts (one per material)
+
+    // Optional attached components — populated during extraction when the
+    // ufbx_node carries them. `rebuild_scene` adds them as scene_payloads.
+    std::shared_ptr<ossia::light_component> light;
+    std::shared_ptr<ossia::camera_component> camera;
+  };
+
+  std::vector<SceneNode> m_scene_nodes;
+  std::vector<std::shared_ptr<ossia::material_component>> m_materials;
+
+  // One global skeleton built from all skin clusters encountered. Published
+  // to scene_state.skeletons[0]; mesh_component::skin_index is 0 for any
+  // mesh that uses skinning. Empty if the FBX has no skinning.
+  std::shared_ptr<ossia::skeleton_component> m_skeleton;
+
+  // Rich scene state emitted by rebuild_scene — full hierarchy with
+  // materials, lights, cameras, skeletons. AssetLoader consumes this
+  // via the apply-lambda returned by ins::fbx_t::process.
+  std::shared_ptr<const ossia::scene_state> m_raw_state;
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/FlattenedSceneFilter/Executor.cpp b/src/plugins/score-plugin-threedim/Threedim/FlattenedSceneFilter/Executor.cpp
new file mode 100644
index 0000000000..42f2b35f4d
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/FlattenedSceneFilter/Executor.cpp
@@ -0,0 +1,81 @@
+#include "Executor.hpp"
+
+#include <Gfx/GfxApplicationPlugin.hpp>
+#include <Gfx/GfxContext.hpp>
+#include <Gfx/GfxExecNode.hpp>
+#include <Gfx/Graph/FlattenedSceneFilterNode.hpp>
+#include <Process/Dataflow/Port.hpp>
+#include <Process/ExecutionContext.hpp>
+#include <Threedim/FlattenedSceneFilter/Process.hpp>
+
+#include <ossia/dataflow/port.hpp>
+
+#include <score/document/DocumentContext.hpp>
+
+namespace Gfx::FlattenedSceneFilter
+{
+class flattened_scene_filter_exec_node final : public gfx_exec_node
+{
+public:
+  flattened_scene_filter_exec_node(GfxExecutionAction& ctx)
+      : gfx_exec_node{ctx}
+  {
+  }
+
+  void init()
+  {
+    auto node = std::make_unique<score::gfx::FlattenedSceneFilterNode>();
+    id = exec_context->ui->register_node(std::move(node));
+  }
+
+  ~flattened_scene_filter_exec_node()
+  {
+    exec_context->ui->unregister_node(id);
+  }
+
+  std::string label() const noexcept override
+  {
+    return "Gfx::FlattenedSceneFilter_node";
+  }
+};
+
+ProcessExecutorComponent::ProcessExecutorComponent(
+    Gfx::FlattenedSceneFilter::Model& element,
+    const Execution::Context& ctx,
+    QObject* parent)
+    : ProcessComponent_T{element, ctx, "flattenedSceneFilterComponent", parent}
+{
+  auto n = ossia::make_node<flattened_scene_filter_exec_node>(
+      *ctx.execState, ctx.doc.plugin<DocumentPlugin>().exec);
+
+  // Port 0: geometry input
+  n->add_geometry();
+
+  // Ports 1-3: Mode + Match (int) + Match (string) controls
+  for(std::size_t i = 1; i <= 3; i++)
+  {
+    auto ctrl = qobject_cast<Process::ControlInlet*>(element.inlets()[i]);
+    auto& p = n->add_control();
+    ctrl->setupExecution(*n->root_inputs().back(), this);
+    p->value = ctrl->value();
+    QObject::connect(
+        ctrl,
+        &Process::ControlInlet::valueChanged,
+        this,
+        con_unvalidated{ctx, i, 0, n});
+  }
+
+  // Port 0: geometry output
+  n->add_geometry_out();
+
+  n->init();
+
+  this->node = n;
+  m_ossia_process = std::make_shared<ossia::node_process>(n);
+}
+
+void ProcessExecutorComponent::cleanup()
+{
+  ProcessComponent_T::cleanup();
+}
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/FlattenedSceneFilter/Executor.hpp b/src/plugins/score-plugin-threedim/Threedim/FlattenedSceneFilter/Executor.hpp
new file mode 100644
index 0000000000..922d1060fa
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/FlattenedSceneFilter/Executor.hpp
@@ -0,0 +1,22 @@
+#pragma once
+#include <Process/Execution/ProcessComponent.hpp>
+
+#include <ossia/dataflow/node_process.hpp>
+
+namespace Gfx::FlattenedSceneFilter
+{
+class Model;
+class ProcessExecutorComponent final
+    : public Execution::
+          ProcessComponent_T<Gfx::FlattenedSceneFilter::Model, ossia::node_process>
+{
+  COMPONENT_METADATA("b6c8e2d4-9a1f-4e7b-8d3c-2f5a1b7e9c4d")
+public:
+  ProcessExecutorComponent(
+      Model& element, const Execution::Context& ctx, QObject* parent);
+  void cleanup() override;
+};
+
+using ProcessExecutorComponentFactory
+    = Execution::ProcessComponentFactory_T<ProcessExecutorComponent>;
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/FlattenedSceneFilter/Metadata.hpp b/src/plugins/score-plugin-threedim/Threedim/FlattenedSceneFilter/Metadata.hpp
new file mode 100644
index 0000000000..6f144e83f7
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/FlattenedSceneFilter/Metadata.hpp
@@ -0,0 +1,22 @@
+#pragma once
+#include <Process/ProcessMetadata.hpp>
+
+namespace Gfx::FlattenedSceneFilter
+{
+class Model;
+}
+
+PROCESS_METADATA(
+    , Gfx::FlattenedSceneFilter::Model, "7a1b3c5d-2e4f-4a6b-8c9d-1e2f3a4b5c6e",
+    "flattenedscenefilter",
+    "Flattened Scene Filter",
+    Process::ProcessCategory::Visual,
+    "Visuals/3D/Scene",
+    "Filter a flattened scene by tag or material index, per pass",
+    "ossia team",
+    (QStringList{"gfx", "scene", "filter", "3d"}),
+    {},
+    {},
+    QUrl{},
+    Process::ProcessFlags::SupportsAll
+)
diff --git a/src/plugins/score-plugin-threedim/Threedim/FlattenedSceneFilter/Process.cpp b/src/plugins/score-plugin-threedim/Threedim/FlattenedSceneFilter/Process.cpp
new file mode 100644
index 0000000000..d552fd9852
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/FlattenedSceneFilter/Process.cpp
@@ -0,0 +1,97 @@
+#include "Process.hpp"
+
+#include <score/application/ApplicationComponents.hpp>
+
+#include <Process/Dataflow/Port.hpp>
+#include <Process/Dataflow/WidgetInlets.hpp>
+
+#include <Gfx/Graph/Node.hpp>
+#include <Gfx/TexturePort.hpp>
+
+#include <wobjectimpl.h>
+
+W_OBJECT_IMPL(Gfx::FlattenedSceneFilter::Model)
+namespace Gfx::FlattenedSceneFilter
+{
+
+Model::Model(
+    const TimeVal& duration, const Id<Process::ProcessModel>& id, QObject* parent)
+    : Process::ProcessModel{duration, id, "gfxProcess", parent}
+{
+  metadata().setInstanceName(*this);
+  init();
+}
+
+Model::~Model() = default;
+
+void Model::init()
+{
+  if(m_inlets.empty() && m_outlets.empty())
+  {
+    m_inlets.push_back(new GeometryInlet{"Geometry In", Id<Process::Port>(0), this});
+
+    m_inlets.push_back(new Process::ComboBox{
+        std::vector<std::pair<QString, ossia::value>>{
+            {QStringLiteral("tag == match"),                 0},
+            {QStringLiteral("tag != match"),                 1},
+            {QStringLiteral("material_index == match"),      2},
+            {QStringLiteral("material_index != match"),      3},
+            {QStringLiteral("blend == match"),               4},
+            {QStringLiteral("blend != match"),               5},
+            {QStringLiteral("depth_write == match"),         6},
+            {QStringLiteral("depth_write != match"),         7},
+            {QStringLiteral("cull_mode == match"),           8},
+            {QStringLiteral("cull_mode != match"),           9},
+            {QStringLiteral("topology == match"),            10},
+            {QStringLiteral("topology != match"),            11},
+            {QStringLiteral("format_id == match_str"),       12},
+            {QStringLiteral("format_id != match_str"),       13}},
+        0, "Mode", Id<Process::Port>(1), this});
+
+    m_inlets.push_back(new Process::IntSpinBox{
+        -1, 2147483647, 0, "Match", Id<Process::Port>(2), this});
+
+    // Modes 12/13 read this string; other modes ignore it.
+    m_inlets.push_back(new Process::LineEdit{
+        QString{}, "Format ID", Id<Process::Port>(3), this});
+
+    m_outlets.push_back(new GeometryOutlet{"Geometry Out", Id<Process::Port>(0), this});
+  }
+}
+
+QString Model::prettyName() const noexcept
+{
+  return tr("Flattened Scene Filter");
+}
+
+}
+
+template <>
+void DataStreamReader::read(const Gfx::FlattenedSceneFilter::Model& proc)
+{
+  readPorts(*this, proc.m_inlets, proc.m_outlets);
+  insertDelimiter();
+}
+
+template <>
+void DataStreamWriter::write(Gfx::FlattenedSceneFilter::Model& proc)
+{
+  writePorts(
+      *this, components.interfaces<Process::PortFactoryList>(), proc.m_inlets,
+      proc.m_outlets, &proc);
+  checkDelimiter();
+}
+
+template <>
+void JSONReader::read(const Gfx::FlattenedSceneFilter::Model& proc)
+{
+  readPorts(*this, proc.m_inlets, proc.m_outlets);
+}
+
+template <>
+void JSONWriter::write(Gfx::FlattenedSceneFilter::Model& proc)
+{
+  writePorts(
+      *this, components.interfaces<Process::PortFactoryList>(), proc.m_inlets,
+      proc.m_outlets, &proc);
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/Splat/Process.hpp b/src/plugins/score-plugin-threedim/Threedim/FlattenedSceneFilter/Process.hpp
similarity index 52%
rename from src/plugins/score-plugin-threedim/Threedim/Splat/Process.hpp
rename to src/plugins/score-plugin-threedim/Threedim/FlattenedSceneFilter/Process.hpp
index 38d68691d4..9efaec1896 100644
--- a/src/plugins/score-plugin-threedim/Threedim/Splat/Process.hpp
+++ b/src/plugins/score-plugin-threedim/Threedim/FlattenedSceneFilter/Process.hpp
@@ -1,25 +1,22 @@
 #pragma once
-#include <Process/Drop/ProcessDropHandler.hpp>
+#include <Gfx/CommandFactory.hpp>
+#include <Threedim/FlattenedSceneFilter/Metadata.hpp>
 #include <Process/GenericProcessFactory.hpp>
 #include <Process/Process.hpp>
 
-#include <Gfx/CommandFactory.hpp>
-#include <Gfx/Graph/ImageNode.hpp>
-#include <Library/LibraryInterface.hpp>
-
-#include <score/command/PropertyCommand.hpp>
-
-#include <Threedim/Splat/Metadata.hpp>
-namespace Gfx::Splat
+namespace Gfx::FlattenedSceneFilter
 {
 class Model final : public Process::ProcessModel
 {
   SCORE_SERIALIZE_FRIENDS
-  PROCESS_METADATA_IMPL(Gfx::Splat::Model)
+  PROCESS_METADATA_IMPL(Gfx::FlattenedSceneFilter::Model)
   W_OBJECT(Model)
 
 public:
-  Model(const TimeVal& duration, const Id<Process::ProcessModel>& id, QObject* parent);
+  Model(
+      const TimeVal& duration,
+      const Id<Process::ProcessModel>& id,
+      QObject* parent);
 
   template <typename Impl>
   Model(Impl& vis, QObject* parent)
@@ -36,6 +33,5 @@ class Model final : public Process::ProcessModel
   QString prettyName() const noexcept override;
 };
 
-using ProcessFactory = Process::ProcessFactory_T<Gfx::Splat::Model>;
-
+using ProcessFactory = Process::ProcessFactory_T<Gfx::FlattenedSceneFilter::Model>;
 }
diff --git a/src/plugins/score-plugin-threedim/Threedim/ObjLoader.cpp b/src/plugins/score-plugin-threedim/Threedim/GeometryLoader.cpp
similarity index 80%
rename from src/plugins/score-plugin-threedim/Threedim/ObjLoader.cpp
rename to src/plugins/score-plugin-threedim/Threedim/GeometryLoader.cpp
index ed2f199f05..2196a2dc2d 100644
--- a/src/plugins/score-plugin-threedim/Threedim/ObjLoader.cpp
+++ b/src/plugins/score-plugin-threedim/Threedim/GeometryLoader.cpp
@@ -1,15 +1,16 @@
-#include "ObjLoader.hpp"
+#include "GeometryLoader.hpp"
 
 #include <QMatrix4x4>
 #include <QString>
 
 #include <Threedim/Debug.hpp>
 #include <Threedim/Ply.hpp>
+#include <Threedim/VcgImporters.hpp>
 
 namespace Threedim
 {
 
-void ObjLoader::rebuild_geometry()
+void GeometryLoader::rebuild_geometry()
 {
   std::vector<mesh>& new_meshes = this->meshinfo;
 
@@ -211,14 +212,21 @@ static bool check_file_extension(std::string_view filename, std::string_view exp
   return true;
 }
 
-std::function<void(ObjLoader&)> ObjLoader::ins::obj_t::process(file_type tv)
+std::function<void(GeometryLoader&)> GeometryLoader::ins::geom_t::process(file_type tv)
 {
+  // Dispatch by extension. Each branch returns a pair of
+  // (vector<Threedim::mesh>, float_vec). Empty pair = unsupported / failed
+  // parse → we return {} so the halp runtime leaves the current geometry
+  // intact rather than wiping it.
+  //
+  // The returned lambda (captured mesh list + flat float buffer) runs on
+  // the execution thread and swaps into the loader instance's members,
+  // then triggers rebuild_geometry to populate the dynamic_geometry
+  // output.
   auto upload = [](auto&& mesh, auto&& buf) {
-    return [mesh = std::move(mesh), buf = std::move(buf)](ObjLoader& o) mutable {
-      // This part happens in the execution thread
+    return [mesh = std::move(mesh), buf = std::move(buf)](GeometryLoader& o) mutable {
       std::swap(o.meshinfo, mesh);
       std::swap(o.complete, buf);
-
       o.rebuild_geometry();
     };
   };
@@ -226,20 +234,35 @@ std::function<void(ObjLoader&)> ObjLoader::ins::obj_t::process(file_type tv)
   Threedim::float_vec buf;
   if(check_file_extension(tv.filename, "obj"))
   {
-    // This part happens in a separate thread
     if(auto mesh = Threedim::ObjFromString(tv.bytes, buf); !mesh.empty())
-    {
       return upload(std::move(mesh), std::move(buf));
-    }
   }
   else if(check_file_extension(tv.filename, "ply"))
   {
-    // This part happens in a separate thread
     if(auto mesh = Threedim::PlyFromFile(tv.filename, buf); !mesh.empty())
-    {
       return upload(std::move(mesh), std::move(buf));
-    }
+  }
+  else if(check_file_extension(tv.filename, "stl"))
+  {
+    if(auto mesh = Threedim::StlFromFile(tv.filename, buf); !mesh.empty())
+      return upload(std::move(mesh), std::move(buf));
+  }
+  else if(check_file_extension(tv.filename, "off"))
+  {
+    if(auto mesh = Threedim::OffFromFile(tv.filename, buf); !mesh.empty())
+      return upload(std::move(mesh), std::move(buf));
   }
   return {};
 }
+
+void GeometryLoader::operator()()
+{
+  // Compute TRS matrix from position/rotation/scale into
+  // halp::mesh::transform[16]. dirty_transform fires only on actual
+  // change so downstream's transform binding rebuild is skipped on
+  // idle frames.
+  outputs.geometry.dirty_transform
+      = computeTRSMatrix(inputs, outputs.geometry.transform, m_cachedTRS);
+}
+
 }
diff --git a/src/plugins/score-plugin-threedim/Threedim/GeometryLoader.hpp b/src/plugins/score-plugin-threedim/Threedim/GeometryLoader.hpp
new file mode 100644
index 0000000000..278619d183
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/GeometryLoader.hpp
@@ -0,0 +1,72 @@
+#pragma once
+#include "TransformHelper.hpp"
+
+#include <Threedim/TinyObj.hpp>
+#include <halp/controls.hpp>
+#include <halp/file_port.hpp>
+#include <halp/geometry.hpp>
+#include <halp/meta.hpp>
+#include <ossia/detail/mutex.hpp>
+
+namespace Threedim
+{
+
+// Geometry-only file loader. Dispatches by extension to the right parser
+// and emits a halp::dynamic_geometry output — one draw-ready mesh per
+// file part, no scene graph, no materials, no lights. Use AssetLoader
+// for the full-scene variant (FBX / glTF also go through a
+// geometry+materials+hierarchy scene_spec pipeline there).
+//
+// Supported extensions: .obj, .ply, .stl, .off. STL + OFF go through
+// the vcglib importers; OBJ + PLY through tinyobj / miniply. All four
+// funnel into the same `Threedim::mesh` + `float_vec` representation
+// so `rebuild_geometry` sees one uniform input format.
+//
+// This is the TD-equivalent of a geometry-specific SOP-style loader —
+// simpler output, no material / skeleton / animation carry-along. When
+// users want the full content (PBR materials, skeletons, anim clips)
+// they reach for AssetLoader instead.
+class GeometryLoader
+{
+public:
+  halp_meta(name, "Geometry Loader")
+  halp_meta(category, "Visuals/Meshes")
+  halp_meta(c_name, "geometry_loader")
+  halp_meta(
+      authors,
+      "Jean-Michaël Celerier, TinyOBJ authors, miniPLY authors, vcglib authors, Eigen authors")
+  halp_meta(manual_url, "https://ossia.io/score-docs/processes/meshes.html#geometry-loader")
+  halp_meta(uuid, "5df71765-505f-4ab7-98c1-f305d10a01ef")
+
+  struct ins
+  {
+    struct geom_t : halp::file_port<"3D file">
+    {
+      halp_meta(extensions, "3D files (*.obj *.ply *.stl *.off)");
+      static std::function<void(GeometryLoader&)> process(file_type data);
+    } geom;
+    PositionControl position;
+    RotationControl rotation;
+    ScaleControl scale;
+  } inputs;
+
+  struct
+  {
+    struct : halp::mesh
+    {
+      halp_meta(name, "Geometry");
+      std::vector<halp::dynamic_geometry> mesh;
+    } geometry;
+  } outputs;
+
+  void rebuild_geometry();
+  void operator()();
+
+  std::vector<mesh> meshinfo{};
+  float_vec complete;
+
+  // Per-frame TRS matrix cache (see TransformHelper.hpp).
+  CachedTRS m_cachedTRS{};
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/GltfParser.cpp b/src/plugins/score-plugin-threedim/Threedim/GltfParser.cpp
new file mode 100644
index 0000000000..2cb1775d45
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/GltfParser.cpp
@@ -0,0 +1,1041 @@
+#include "GltfParser.hpp"
+
+#include "TangentUtils.hpp"
+
+#include <ossia/detail/hash.hpp>
+
+#include <fastgltf/core.hpp>
+#include <fastgltf/math.hpp>
+#include <fastgltf/tools.hpp>
+#include <fastgltf/types.hpp>
+
+#include <QMatrix3x3>
+#include <QQuaternion>
+#include <QString>
+#include <QVector3D>
+
+#include <cmath>
+#include <cstring>
+#include <variant>
+
+namespace Threedim
+{
+
+namespace
+{
+
+// glTF TRS decomposition. With Options::DecomposeNodeMatrices we get TRS
+// directly; otherwise we'd need to decompose the 4x4. fastgltf gives us a
+// std::variant<TRS, fmat4x4> — handle both paths.
+static ossia::scene_transform to_transform(const fastgltf::Node& n)
+{
+  ossia::scene_transform t{};
+
+  if(const auto* trs = std::get_if<fastgltf::TRS>(&n.transform))
+  {
+    t.translation[0] = float(trs->translation[0]);
+    t.translation[1] = float(trs->translation[1]);
+    t.translation[2] = float(trs->translation[2]);
+    t.rotation[0]    = float(trs->rotation[0]);
+    t.rotation[1]    = float(trs->rotation[1]);
+    t.rotation[2]    = float(trs->rotation[2]);
+    t.rotation[3]    = float(trs->rotation[3]);
+    t.scale[0]       = float(trs->scale[0]);
+    t.scale[1]       = float(trs->scale[1]);
+    t.scale[2]       = float(trs->scale[2]);
+  }
+  else if(const auto* m = std::get_if<fastgltf::math::fmat4x4>(&n.transform))
+  {
+    // Matrix form — full TRS decomposition. We pass
+    // Options::DecomposeNodeMatrices so fastgltf SHOULD have already
+    // converted to TRS upfront, but this branch still fires for
+    // matrices that fastgltf flags as non-decomposable (negative
+    // scale, near-degenerate, library version differences). The
+    // previous translation-only fallback silently dropped rotation
+    // and scale, which broke any glTF authored matrix-only — like
+    // VirtualCity (193/234 nodes use matrix form encoding rotation
+    // and uniform scale).
+    //
+    // Algorithm: T = column 3; per-column lengths give scale; reflect
+    // one axis when det < 0; normalised 3×3 → quaternion via the
+    // standard branch-on-trace method.
+    const auto& M = *m;
+    t.translation[0] = M[3][0];
+    t.translation[1] = M[3][1];
+    t.translation[2] = M[3][2];
+
+    QVector3D c0(M[0][0], M[0][1], M[0][2]);
+    QVector3D c1(M[1][0], M[1][1], M[1][2]);
+    QVector3D c2(M[2][0], M[2][1], M[2][2]);
+
+    float sx = c0.length();
+    float sy = c1.length();
+    float sz = c2.length();
+
+    // Flip one axis when determinant is negative (reflection encoded
+    // as negative scale on one axis). Without this, the quaternion
+    // extraction below trips on a left-handed basis and yields garbage.
+    const float det
+        = c0.x() * (c1.y() * c2.z() - c1.z() * c2.y())
+        - c0.y() * (c1.x() * c2.z() - c1.z() * c2.x())
+        + c0.z() * (c1.x() * c2.y() - c1.y() * c2.x());
+    if(det < 0.f)
+    {
+      sx = -sx;
+      c0 = -c0;
+    }
+
+    t.scale[0] = sx;
+    t.scale[1] = sy;
+    t.scale[2] = sz;
+
+    if(sx > 1e-6f) c0 /= sx;
+    if(sy > 1e-6f) c1 /= sy;
+    if(sz > 1e-6f) c2 /= sz;
+
+    QMatrix3x3 R;
+    R(0, 0) = c0.x(); R(1, 0) = c0.y(); R(2, 0) = c0.z();
+    R(0, 1) = c1.x(); R(1, 1) = c1.y(); R(2, 1) = c1.z();
+    R(0, 2) = c2.x(); R(1, 2) = c2.y(); R(2, 2) = c2.z();
+    QQuaternion q = QQuaternion::fromRotationMatrix(R);
+    t.rotation[0] = q.x();
+    t.rotation[1] = q.y();
+    t.rotation[2] = q.z();
+    t.rotation[3] = q.scalar();
+  }
+  return t;
+}
+
+// Translate a glTF Material into material_component (factors + base color
+// texture path). `dir` is the glTF file's parent directory — external
+// image URIs are relative to it.
+static std::shared_ptr<ossia::material_component> to_material(
+    const fastgltf::Asset& asset, const fastgltf::Material& m,
+    const std::filesystem::path& dir)
+{
+  auto mc = std::make_shared<ossia::material_component>();
+  mc->tag = std::string(m.name);
+
+  // Base color (pbrMetallicRoughness factor + texture)
+  mc->base_color_factor[0] = float(m.pbrData.baseColorFactor[0]);
+  mc->base_color_factor[1] = float(m.pbrData.baseColorFactor[1]);
+  mc->base_color_factor[2] = float(m.pbrData.baseColorFactor[2]);
+  mc->base_color_factor[3] = float(m.pbrData.baseColorFactor[3]);
+  mc->metallic_factor   = float(m.pbrData.metallicFactor);
+  mc->roughness_factor  = float(m.pbrData.roughnessFactor);
+
+  mc->emissive_factor[0] = float(m.emissiveFactor[0]);
+  mc->emissive_factor[1] = float(m.emissiveFactor[1]);
+  mc->emissive_factor[2] = float(m.emissiveFactor[2]);
+  mc->emissive_strength  = float(m.emissiveStrength);
+
+  switch(m.alphaMode)
+  {
+    case fastgltf::AlphaMode::Opaque: mc->alpha = ossia::alpha_mode::opaque_; break;
+    case fastgltf::AlphaMode::Mask:   mc->alpha = ossia::alpha_mode::mask;    break;
+    case fastgltf::AlphaMode::Blend:  mc->alpha = ossia::alpha_mode::blend;   break;
+  }
+  mc->alpha_cutoff = float(m.alphaCutoff);
+  mc->double_sided = m.doubleSided;
+  mc->unlit = m.unlit;
+
+  // Resolve a glTF texture slot to an ossia texture_ref with source populated
+  // (filesystem path or embedded blob). The image may be external (URI), a
+  // buffer view into the main glTF buffer, or an inline array.
+  auto fill_tex = [&](ossia::texture_ref& tr, const fastgltf::TextureInfo& ti) {
+    if(ti.textureIndex >= asset.textures.size())
+      return;
+    const auto& tex = asset.textures[ti.textureIndex];
+    if(!tex.imageIndex.has_value())
+      return;
+    const auto& img = asset.images[tex.imageIndex.value()];
+    auto src = std::make_shared<ossia::texture_source>();
+    std::visit(
+        [&](const auto& data) {
+          using T = std::decay_t<decltype(data)>;
+          if constexpr(std::is_same_v<T, fastgltf::sources::URI>)
+          {
+            // Relative URI → join with the glTF file's parent dir.
+            auto p = dir / std::filesystem::path(std::string_view(
+                data.uri.path()));
+            src->file_path = p.lexically_normal().string();
+          }
+          else if constexpr(std::is_same_v<T, fastgltf::sources::Array>)
+          {
+            auto blob = std::make_shared<std::vector<uint8_t>>(
+                (const uint8_t*)data.bytes.data(),
+                (const uint8_t*)data.bytes.data() + data.bytes.size());
+            src->embedded_data = blob;
+            src->mime_type = std::string(fastgltf::getMimeTypeString(data.mimeType));
+          }
+          else if constexpr(std::is_same_v<T, fastgltf::sources::BufferView>)
+          {
+            if(data.bufferViewIndex >= asset.bufferViews.size())
+              return;
+            const auto& bv = asset.bufferViews[data.bufferViewIndex];
+            if(bv.bufferIndex >= asset.buffers.size())
+              return;
+            const auto& buf = asset.buffers[bv.bufferIndex];
+            const auto* arr = std::get_if<fastgltf::sources::Array>(&buf.data);
+            if(!arr)
+              return;
+            auto blob = std::make_shared<std::vector<uint8_t>>(
+                (const uint8_t*)arr->bytes.data() + bv.byteOffset,
+                (const uint8_t*)arr->bytes.data() + bv.byteOffset + bv.byteLength);
+            src->embedded_data = blob;
+            src->mime_type = std::string(fastgltf::getMimeTypeString(data.mimeType));
+          }
+          // sources::Vector / sources::Fallback / sources::CustomBuffer not
+          // handled in v1 — most files use one of the three above.
+        },
+        img.data);
+
+    // Plan 09 S1: content-hash for cross-output / cross-reload decode
+    // dedup. Prefer hashing the embedded bytes — it's the decoded
+    // payload contents that matter, not the file path (two different
+    // files can embed the same JPEG). Fall back to hashing the path
+    // string when no embedded data (URI → we'll read the file on
+    // demand inside the preprocessor, hashing the path is a stable
+    // proxy for session-scope dedup).
+    if(src->embedded_data && !src->embedded_data->empty())
+    {
+      src->content_hash = ossia::hash_bytes(
+          src->embedded_data->data(), src->embedded_data->size());
+    }
+    else if(!src->file_path.empty())
+    {
+      src->content_hash = ossia::hash_bytes(
+          src->file_path.data(), src->file_path.size());
+    }
+
+    tr.source = std::move(src);
+    tr.texcoord_set = uint32_t(ti.texCoordIndex);
+
+    // KHR_texture_transform: per-texture-info UV transform. The
+    // extension overrides the texture-info texCoordIndex when set
+    // (spec) — honour that. Defaults are identity (offset=0, scale=1,
+    // rot=0), so leaving uv_transform at default for textures without
+    // the extension is correct.
+    if(ti.transform)
+    {
+      tr.uv_transform.offset[0] = float(ti.transform->uvOffset.x());
+      tr.uv_transform.offset[1] = float(ti.transform->uvOffset.y());
+      tr.uv_transform.scale[0]  = float(ti.transform->uvScale.x());
+      tr.uv_transform.scale[1]  = float(ti.transform->uvScale.y());
+      tr.uv_transform.rotation  = float(ti.transform->rotation);
+      if(ti.transform->texCoordIndex.has_value())
+        tr.texcoord_set = uint32_t(*ti.transform->texCoordIndex);
+    }
+
+    // glTF per-texture sampler. Each texture optionally references a
+    // sampler index in `asset.samplers`. Default (when absent or
+    // unreferenced) is REPEAT/REPEAT/LINEAR/LINEAR/LINEAR_MIPMAP per
+    // glTF spec — which matches the texture_sampler_config defaults.
+    auto wrap_to_ossia = [](fastgltf::Wrap w) {
+      switch(w)
+      {
+        case fastgltf::Wrap::ClampToEdge:    return ossia::CLAMP_TO_EDGE;
+        case fastgltf::Wrap::MirroredRepeat: return ossia::MIRROR;
+        case fastgltf::Wrap::Repeat:         return ossia::REPEAT;
+      }
+      return ossia::REPEAT;
+    };
+    auto filter_to_ossia = [](fastgltf::Filter f, ossia::texture_filter& base,
+                              ossia::texture_filter& mip) {
+      // glTF combined min-filter encodes both the base filter and the
+      // mipmap mode (e.g. LinearMipMapNearest = LINEAR base + NEAREST
+      // mipmap). Decode both axes.
+      switch(f)
+      {
+        case fastgltf::Filter::Nearest:
+          base = ossia::NEAREST; mip = ossia::NONE; break;
+        case fastgltf::Filter::Linear:
+          base = ossia::LINEAR;  mip = ossia::NONE; break;
+        case fastgltf::Filter::NearestMipMapNearest:
+          base = ossia::NEAREST; mip = ossia::NEAREST; break;
+        case fastgltf::Filter::LinearMipMapNearest:
+          base = ossia::LINEAR;  mip = ossia::NEAREST; break;
+        case fastgltf::Filter::NearestMipMapLinear:
+          base = ossia::NEAREST; mip = ossia::LINEAR; break;
+        case fastgltf::Filter::LinearMipMapLinear:
+          base = ossia::LINEAR;  mip = ossia::LINEAR; break;
+      }
+    };
+    if(tex.samplerIndex.has_value()
+       && *tex.samplerIndex < asset.samplers.size())
+    {
+      const auto& s = asset.samplers[*tex.samplerIndex];
+      tr.sampler.wrap_s = wrap_to_ossia(s.wrapS);
+      tr.sampler.wrap_t = wrap_to_ossia(s.wrapT);
+      ossia::texture_filter mag_base = ossia::LINEAR, mag_mip = ossia::NONE;
+      ossia::texture_filter min_base = ossia::LINEAR, min_mip = ossia::LINEAR;
+      if(s.magFilter.has_value())
+        filter_to_ossia(*s.magFilter, mag_base, mag_mip);
+      if(s.minFilter.has_value())
+        filter_to_ossia(*s.minFilter, min_base, min_mip);
+      tr.sampler.mag_filter = mag_base;
+      tr.sampler.min_filter = min_base;
+      tr.sampler.mipmap_mode = min_mip;  // mip mode comes from minFilter
+    }
+  };
+
+  if(m.pbrData.baseColorTexture)
+    fill_tex(mc->base_color_texture, *m.pbrData.baseColorTexture);
+  if(m.pbrData.metallicRoughnessTexture)
+    fill_tex(mc->metallic_roughness_texture, *m.pbrData.metallicRoughnessTexture);
+  if(m.normalTexture)
+    fill_tex(mc->normal_texture, *m.normalTexture);
+  if(m.occlusionTexture)
+    fill_tex(mc->occlusion_texture, *m.occlusionTexture);
+  if(m.emissiveTexture)
+    fill_tex(mc->emissive_texture, *m.emissiveTexture);
+
+  // --- KHR material extensions ------------------------------------------
+  // fastgltf parses every extension we've enabled in the Extensions mask
+  // at parse time (see loadFromFile() below). What was missing here is NOT
+  // the parse — fastgltf already gave us the fields — but the copy into
+  // ossia::material_component. Each KHR_* that sets material_component
+  // fields gets a matching block below.
+  //
+  // Each material.<extension> is a unique_ptr; nullptr means the file
+  // didn't declare that extension on this material. We leave the
+  // material_component sub-struct at its spec defaults (factor=0 /
+  // factor=1 depending on the field) in that case.
+
+  // KHR_materials_ior — scalar IOR override; default 1.5 matches spec.
+  mc->ior = float(m.ior);
+
+  // KHR_materials_clearcoat — second thin dielectric specular lobe.
+  if(m.clearcoat)
+  {
+    mc->clearcoat.factor = float(m.clearcoat->clearcoatFactor);
+    mc->clearcoat.roughness_factor = float(m.clearcoat->clearcoatRoughnessFactor);
+    if(m.clearcoat->clearcoatTexture)
+      fill_tex(mc->clearcoat.texture, *m.clearcoat->clearcoatTexture);
+    if(m.clearcoat->clearcoatRoughnessTexture)
+      fill_tex(
+          mc->clearcoat.roughness_texture,
+          *m.clearcoat->clearcoatRoughnessTexture);
+    if(m.clearcoat->clearcoatNormalTexture)
+      fill_tex(
+          mc->clearcoat.normal_texture, *m.clearcoat->clearcoatNormalTexture);
+  }
+
+  // KHR_materials_sheen — fabric / velvet / brushed surfaces.
+  if(m.sheen)
+  {
+    mc->sheen.color_factor[0] = float(m.sheen->sheenColorFactor.x());
+    mc->sheen.color_factor[1] = float(m.sheen->sheenColorFactor.y());
+    mc->sheen.color_factor[2] = float(m.sheen->sheenColorFactor.z());
+    mc->sheen.roughness_factor = float(m.sheen->sheenRoughnessFactor);
+    if(m.sheen->sheenColorTexture)
+      fill_tex(mc->sheen.color_texture, *m.sheen->sheenColorTexture);
+    if(m.sheen->sheenRoughnessTexture)
+      fill_tex(mc->sheen.roughness_texture, *m.sheen->sheenRoughnessTexture);
+  }
+
+  // KHR_materials_transmission — thin-walled refraction weight.
+  if(m.transmission)
+  {
+    mc->transmission.factor = float(m.transmission->transmissionFactor);
+    if(m.transmission->transmissionTexture)
+      fill_tex(mc->transmission.texture, *m.transmission->transmissionTexture);
+  }
+
+  // KHR_materials_volume — thick-walled absorption + attenuation.
+  if(m.volume)
+  {
+    mc->volume.thickness_factor = float(m.volume->thicknessFactor);
+    mc->volume.attenuation_distance = float(m.volume->attenuationDistance);
+    mc->volume.attenuation_color[0] = float(m.volume->attenuationColor.x());
+    mc->volume.attenuation_color[1] = float(m.volume->attenuationColor.y());
+    mc->volume.attenuation_color[2] = float(m.volume->attenuationColor.z());
+    if(m.volume->thicknessTexture)
+      fill_tex(mc->volume.thickness_texture, *m.volume->thicknessTexture);
+  }
+
+  // KHR_materials_specular — dielectric F0 override + tint.
+  if(m.specular)
+  {
+    mc->specular.factor = float(m.specular->specularFactor);
+    mc->specular.color_factor[0] = float(m.specular->specularColorFactor.x());
+    mc->specular.color_factor[1] = float(m.specular->specularColorFactor.y());
+    mc->specular.color_factor[2] = float(m.specular->specularColorFactor.z());
+    if(m.specular->specularTexture)
+      fill_tex(mc->specular.texture, *m.specular->specularTexture);
+    if(m.specular->specularColorTexture)
+      fill_tex(mc->specular.color_texture, *m.specular->specularColorTexture);
+  }
+
+  // KHR_materials_iridescence — thin-film interference.
+  if(m.iridescence)
+  {
+    mc->iridescence.factor = float(m.iridescence->iridescenceFactor);
+    mc->iridescence.ior = float(m.iridescence->iridescenceIor);
+    mc->iridescence.thickness_min
+        = float(m.iridescence->iridescenceThicknessMinimum);
+    mc->iridescence.thickness_max
+        = float(m.iridescence->iridescenceThicknessMaximum);
+    if(m.iridescence->iridescenceTexture)
+      fill_tex(mc->iridescence.texture, *m.iridescence->iridescenceTexture);
+    if(m.iridescence->iridescenceThicknessTexture)
+      fill_tex(
+          mc->iridescence.thickness_texture,
+          *m.iridescence->iridescenceThicknessTexture);
+  }
+
+  // KHR_materials_anisotropy — directional specular stretch.
+  if(m.anisotropy)
+  {
+    mc->anisotropy.strength = float(m.anisotropy->anisotropyStrength);
+    mc->anisotropy.rotation = float(m.anisotropy->anisotropyRotation);
+    if(m.anisotropy->anisotropyTexture)
+      fill_tex(mc->anisotropy.texture, *m.anisotropy->anisotropyTexture);
+  }
+
+  // KHR_materials_diffuse_transmission — translucent surfaces (paper,
+  // leaves, lampshades).
+  if(m.diffuseTransmission)
+  {
+    mc->diffuse_transmission.factor
+        = float(m.diffuseTransmission->diffuseTransmissionFactor);
+    mc->diffuse_transmission.color_factor[0]
+        = float(m.diffuseTransmission->diffuseTransmissionColorFactor.x());
+    mc->diffuse_transmission.color_factor[1]
+        = float(m.diffuseTransmission->diffuseTransmissionColorFactor.y());
+    mc->diffuse_transmission.color_factor[2]
+        = float(m.diffuseTransmission->diffuseTransmissionColorFactor.z());
+    if(m.diffuseTransmission->diffuseTransmissionTexture)
+      fill_tex(
+          mc->diffuse_transmission.texture,
+          *m.diffuseTransmission->diffuseTransmissionTexture);
+    if(m.diffuseTransmission->diffuseTransmissionColorTexture)
+      fill_tex(
+          mc->diffuse_transmission.color_texture,
+          *m.diffuseTransmission->diffuseTransmissionColorTexture);
+  }
+
+  return mc;
+}
+
+// Translate a glTF Light (KHR_lights_punctual) to ossia::light_component.
+static std::shared_ptr<ossia::light_component> to_light(const fastgltf::Light& l)
+{
+  auto lc = std::make_shared<ossia::light_component>();
+  switch(l.type)
+  {
+    case fastgltf::LightType::Directional:
+      lc->type = ossia::light_type::directional; break;
+    case fastgltf::LightType::Point:
+      lc->type = ossia::light_type::point; break;
+    case fastgltf::LightType::Spot:
+      lc->type = ossia::light_type::spot; break;
+  }
+  lc->color[0]  = float(l.color[0]);
+  lc->color[1]  = float(l.color[1]);
+  lc->color[2]  = float(l.color[2]);
+  lc->intensity = float(l.intensity);
+  lc->range     = l.range.value_or(0.f);
+  lc->inner_cone_angle = float(l.innerConeAngle.value_or(0.f));
+  lc->outer_cone_angle = float(l.outerConeAngle.value_or(float(M_PI) / 4.f));
+  lc->decay = ossia::light_decay::quadratic;
+  return lc;
+}
+
+// Translate a glTF Camera.
+static std::shared_ptr<ossia::camera_component> to_camera(const fastgltf::Camera& c)
+{
+  auto cc = std::make_shared<ossia::camera_component>();
+  if(const auto* p = std::get_if<fastgltf::Camera::Perspective>(&c.camera))
+  {
+    cc->projection   = ossia::camera_projection::perspective;
+    cc->yfov         = float(p->yfov);
+    cc->aspect_ratio = p->aspectRatio.value_or(1.f);
+    cc->znear        = float(p->znear);
+    cc->zfar         = float(p->zfar.value_or(1000.f));
+  }
+  else if(const auto* o = std::get_if<fastgltf::Camera::Orthographic>(&c.camera))
+  {
+    cc->projection = ossia::camera_projection::orthographic;
+    cc->xmag  = float(o->xmag);
+    cc->ymag  = float(o->ymag);
+    cc->znear = float(o->znear);
+    cc->zfar  = float(o->zfar);
+  }
+  return cc;
+}
+
+// Pull one accessor into a float vector. `components` is the number of
+// floats per element (1/2/3/4). fastgltf's iterator handles all component
+// types (byte/short/int/float) with automatic widening to float.
+template <int Components>
+static std::shared_ptr<std::vector<float>> read_float_accessor(
+    const fastgltf::Asset& asset, const fastgltf::Accessor& acc)
+{
+  auto out = std::make_shared<std::vector<float>>(acc.count * Components);
+  float* dst = out->data();
+  if constexpr(Components == 2)
+  {
+    fastgltf::iterateAccessor<fastgltf::math::fvec2>(
+        asset, acc, [&](fastgltf::math::fvec2 v) {
+          dst[0] = v.x(); dst[1] = v.y(); dst += 2;
+        });
+  }
+  else if constexpr(Components == 3)
+  {
+    fastgltf::iterateAccessor<fastgltf::math::fvec3>(
+        asset, acc, [&](fastgltf::math::fvec3 v) {
+          dst[0] = v.x(); dst[1] = v.y(); dst[2] = v.z(); dst += 3;
+        });
+  }
+  else if constexpr(Components == 4)
+  {
+    fastgltf::iterateAccessor<fastgltf::math::fvec4>(
+        asset, acc, [&](fastgltf::math::fvec4 v) {
+          dst[0] = v.x(); dst[1] = v.y(); dst[2] = v.z(); dst[3] = v.w(); dst += 4;
+        });
+  }
+  return out;
+}
+
+// Pull indices (whatever the glTF component type) into a flat uint32 buffer.
+static std::shared_ptr<std::vector<uint32_t>> read_indices(
+    const fastgltf::Asset& asset, const fastgltf::Accessor& acc)
+{
+  auto out = std::make_shared<std::vector<uint32_t>>(acc.count);
+  uint32_t* dst = out->data();
+  fastgltf::iterateAccessor<std::uint32_t>(
+      asset, acc, [&](std::uint32_t v) { *dst++ = v; });
+  return out;
+}
+
+// Pull POSITION, NORMAL, TEXCOORD_0, COLOR_0, TANGENT for a primitive into a
+// ScenePart. Missing attributes leave the matching shared_ptr empty.
+static GltfParser::ScenePart extract_primitive(
+    const fastgltf::Asset& asset, const fastgltf::Primitive& prim,
+    const std::vector<int>& material_index_remap)
+{
+  GltfParser::ScenePart sp;
+
+  auto get_accessor
+      = [&](std::string_view name) -> const fastgltf::Accessor* {
+    for(const auto& a : prim.attributes)
+      if(a.name == name)
+        return &asset.accessors[a.accessorIndex];
+    return nullptr;
+  };
+
+  if(auto* a = get_accessor("POSITION"))
+  {
+    sp.vertex_count = uint32_t(a->count);
+    sp.positions = read_float_accessor<3>(asset, *a);
+    // Local-space AABB. glTF requires min/max on the POSITION accessor,
+    // but rather than chase fastgltf's accessor-specific variant API we
+    // just walk the decoded float stream — same cost as one extra pass
+    // on load (negligible compared to asset I/O), and trivially uniform
+    // with the FBX / procedural code paths.
+    if(sp.positions && !sp.positions->empty())
+      sp.bounds = ossia::compute_aabb_from_positions(
+          sp.positions->data(), sp.vertex_count);
+  }
+  if(auto* a = get_accessor("NORMAL"))
+    sp.normals = read_float_accessor<3>(asset, *a);
+  if(auto* a = get_accessor("TEXCOORD_0"))
+    sp.texcoords = read_float_accessor<2>(asset, *a);
+  if(auto* a = get_accessor("TEXCOORD_1"))
+    sp.texcoords1 = read_float_accessor<2>(asset, *a);
+  if(auto* a = get_accessor("COLOR_0"))
+  {
+    // COLOR_0 may be vec3 or vec4 — peek at component count.
+    if(a->type == fastgltf::AccessorType::Vec4)
+      sp.colors = read_float_accessor<4>(asset, *a);
+    else if(a->type == fastgltf::AccessorType::Vec3)
+    {
+      // Pad to RGBA.
+      auto rgb = read_float_accessor<3>(asset, *a);
+      auto rgba = std::make_shared<std::vector<float>>(a->count * 4);
+      for(std::size_t i = 0; i < a->count; ++i)
+      {
+        (*rgba)[i * 4 + 0] = (*rgb)[i * 3 + 0];
+        (*rgba)[i * 4 + 1] = (*rgb)[i * 3 + 1];
+        (*rgba)[i * 4 + 2] = (*rgb)[i * 3 + 2];
+        (*rgba)[i * 4 + 3] = 1.f;
+      }
+      sp.colors = std::move(rgba);
+    }
+  }
+  if(auto* a = get_accessor("TANGENT"))
+    sp.tangents = read_float_accessor<4>(asset, *a);
+
+  // Skinning attributes. glTF spec stores JOINTS_0 as UNSIGNED_BYTE or
+  // UNSIGNED_SHORT vec4 — widen to uint32 here so the vertex shader can
+  // bind a uniform uvec4 format regardless of source file. WEIGHTS_0 is
+  // always float vec4 per glTF normative spec.
+  if(auto* a = get_accessor("JOINTS_0"))
+  {
+    auto joints = std::make_shared<std::vector<uint32_t>>(a->count * 4);
+    uint32_t* dst = joints->data();
+    fastgltf::iterateAccessor<fastgltf::math::u16vec4>(
+        asset, *a, [&](fastgltf::math::u16vec4 v) {
+          *dst++ = uint32_t(v[0]);
+          *dst++ = uint32_t(v[1]);
+          *dst++ = uint32_t(v[2]);
+          *dst++ = uint32_t(v[3]);
+        });
+    sp.joints0 = std::move(joints);
+  }
+  if(auto* a = get_accessor("WEIGHTS_0"))
+    sp.weights0 = read_float_accessor<4>(asset, *a);
+
+  if(prim.indicesAccessor.has_value())
+  {
+    const auto& ia = asset.accessors[*prim.indicesAccessor];
+    sp.indices = read_indices(asset, ia);
+    sp.index_count = uint32_t(ia.count);
+  }
+
+  if(prim.materialIndex.has_value())
+  {
+    const std::size_t gltf_idx = *prim.materialIndex;
+    if(gltf_idx < material_index_remap.size())
+      sp.material_index = material_index_remap[gltf_idx];
+  }
+
+  // KHR_materials_variants mapping. fastgltf stores it pre-indexed by
+  // variant index → Optional<material_index>. Translate to our
+  // remapped material indices with -1 for "no override".
+  if(!prim.mappings.empty())
+  {
+    sp.variant_material_indices.resize(prim.mappings.size(), -1);
+    for(std::size_t v = 0; v < prim.mappings.size(); ++v)
+    {
+      if(prim.mappings[v].has_value())
+      {
+        const std::size_t mi = *prim.mappings[v];
+        if(mi < material_index_remap.size())
+          sp.variant_material_indices[v] = material_index_remap[mi];
+      }
+    }
+  }
+
+  // Generate tangents via mikktspace when the glTF mesh didn't ship
+  // them. Required for normal-mapped PBR: the fragment shader rebuilds
+  // the TBN basis from (normal, tangent.xyz, cross(normal, tangent.xyz) *
+  // tangent.w) before unpacking the sampled normal. Skipped when any
+  // prerequisite stream is missing (no UVs → no normal mapping anyway).
+  if(!sp.tangents && sp.positions && sp.normals && sp.texcoords)
+  {
+    sp.tangents = Threedim::generate_tangents_mikktspace(
+        sp.positions, sp.normals, sp.texcoords, sp.indices,
+        sp.vertex_count);
+  }
+  return sp;
+}
+
+// Convert a ScenePart to mesh_primitive (mirrors FbxParser::part_to_primitive
+// but with index-buffer support — glTF exposes indexed meshes).
+static ossia::buffer_resource_ptr make_buffer_resource_f(
+    std::shared_ptr<std::vector<float>> floats)
+{
+  if(!floats || floats->empty())
+    return {};
+  auto br = std::make_shared<ossia::buffer_resource>();
+  ossia::buffer_data bd;
+  bd.data = std::shared_ptr<const void>(floats, floats->data());
+  bd.byte_size = int64_t(floats->size() * sizeof(float));
+  bd.usage_hint = ossia::buffer_data::usage::vertex_buffer;
+  br->resource = std::move(bd);
+  br->dirty_index = 1;
+  return br;
+}
+static ossia::buffer_resource_ptr make_buffer_resource_u32(
+    std::shared_ptr<std::vector<uint32_t>> ints)
+{
+  if(!ints || ints->empty())
+    return {};
+  auto br = std::make_shared<ossia::buffer_resource>();
+  ossia::buffer_data bd;
+  bd.data = std::shared_ptr<const void>(ints, ints->data());
+  bd.byte_size = int64_t(ints->size() * sizeof(uint32_t));
+  bd.usage_hint = ossia::buffer_data::usage::index_buffer;
+  br->resource = std::move(bd);
+  br->dirty_index = 1;
+  return br;
+}
+
+static ossia::mesh_primitive part_to_primitive(
+    const GltfParser::ScenePart& p,
+    const std::vector<std::shared_ptr<ossia::material_component>>& mats)
+{
+  ossia::mesh_primitive mp;
+  // Per-primitive id — not deterministic across reloads (part_to_primitive
+  // is called from the scene walk where the source asset path isn't
+  // threaded in), so mint a fresh id. Sessions with the same model file
+  // reloaded will see different ids, which is acceptable: the preprocessor
+  // rebuilds on material/mesh fingerprint changes anyway, and stable-id
+  // stability is only critical for the material / transform fingerprints
+  // which ARE deterministic via the file-path hash.
+  mp.stable_id = ossia::mint_stable_id();
+  mp.topology    = ossia::primitive_topology::triangles;
+  mp.index_type  = p.indices ? ossia::index_format::uint32 : ossia::index_format::none;
+  mp.vertex_count = p.vertex_count;
+  mp.index_count  = p.index_count;
+  mp.first_vertex = 0;
+  mp.first_index  = 0;
+  mp.vertex_offset = 0;
+  mp.bounds = p.bounds;
+  if(p.material_index >= 0
+     && std::size_t(p.material_index) < mats.size())
+    mp.material = mats[p.material_index];
+
+  // KHR_materials_variants: per-variant material override. Index V
+  // → null = "use default", else the variant's material_component_ptr.
+  if(!p.variant_material_indices.empty())
+  {
+    mp.material_variants.resize(p.variant_material_indices.size());
+    for(std::size_t v = 0; v < p.variant_material_indices.size(); ++v)
+    {
+      const int mi = p.variant_material_indices[v];
+      if(mi >= 0 && std::size_t(mi) < mats.size())
+        mp.material_variants[v]
+            = ossia::material_component_ptr(mats[mi]);
+    }
+  }
+
+  uint32_t bi = 0;
+  auto add = [&](auto buf, int floats_per_vertex,
+                 ossia::attribute_semantic sem, ossia::vertex_format fmt) {
+    if(!buf || buf->empty())
+      return;
+    mp.vertex_buffers.push_back(make_buffer_resource_f(buf));
+    ossia::vertex_attribute attr;
+    attr.semantic = sem;
+    attr.format = fmt;
+    attr.buffer_index = bi++;
+    attr.byte_offset = 0;
+    attr.byte_stride = uint32_t(floats_per_vertex) * sizeof(float);
+    attr.rate = ossia::vertex_attribute::input_rate::per_vertex;
+    mp.attributes.push_back(attr);
+  };
+
+  add(p.positions, 3, ossia::attribute_semantic::position,  ossia::vertex_format::float3);
+  add(p.normals,   3, ossia::attribute_semantic::normal,    ossia::vertex_format::float3);
+  add(p.texcoords,  2, ossia::attribute_semantic::texcoord0, ossia::vertex_format::float2);
+  add(p.texcoords1, 2, ossia::attribute_semantic::texcoord1, ossia::vertex_format::float2);
+  add(p.colors,     4, ossia::attribute_semantic::color0,    ossia::vertex_format::float4);
+  add(p.tangents,   4, ossia::attribute_semantic::tangent,   ossia::vertex_format::float4);
+
+  // Skinning attributes — uvec4 joints + vec4 weights, one set per vertex.
+  if(p.joints0)
+  {
+    auto br = std::make_shared<ossia::buffer_resource>();
+    ossia::buffer_data bd;
+    bd.data = std::shared_ptr<const void>(p.joints0, p.joints0->data());
+    bd.byte_size = int64_t(p.joints0->size() * sizeof(uint32_t));
+    bd.usage_hint = ossia::buffer_data::usage::vertex_buffer;
+    br->resource = std::move(bd);
+    br->dirty_index = 1;
+    mp.vertex_buffers.push_back(std::move(br));
+    ossia::vertex_attribute attr;
+    attr.semantic = ossia::attribute_semantic::joints0;
+    attr.format = ossia::vertex_format::uint32x4;
+    attr.buffer_index = bi++;
+    attr.byte_offset = 0;
+    attr.byte_stride = 4 * sizeof(uint32_t);
+    attr.rate = ossia::vertex_attribute::input_rate::per_vertex;
+    mp.attributes.push_back(attr);
+  }
+  add(p.weights0, 4, ossia::attribute_semantic::weights0, ossia::vertex_format::float4);
+
+  if(p.indices)
+    mp.index_buffer = make_buffer_resource_u32(p.indices);
+
+  return mp;
+}
+
+// Walk glTF scene hierarchy into FbxParser::SceneNode-like flat array.
+// Returns the index of the emitted root-most parent for the given glTF node
+// index, or -1 if unused.
+static int emit_node(
+    const fastgltf::Asset& asset, std::size_t nodeIdx, int parent_index,
+    std::vector<GltfParser::SceneNode>& out,
+    const std::vector<int>& material_index_remap)
+{
+  const auto& n = asset.nodes[nodeIdx];
+
+  GltfParser::SceneNode sn;
+  sn.name = std::string(n.name);
+  sn.parent_index = parent_index;
+  sn.local_transform = to_transform(n);
+  // Stable ID = glTF node index + 1 (0 is the "unset" sentinel). Lets
+  // AnimationPlayer and skeleton_component::joint_node_ids resolve
+  // scene_node_id back to the emitted node.
+  sn.stable_id = std::uint64_t(nodeIdx) + 1;
+
+  // glTF skin association — when the node references a skin, stamp its
+  // index so the downstream mesh_component inherits it.
+  if(n.skinIndex.has_value())
+    sn.skin_index = int32_t(*n.skinIndex);
+
+  if(n.meshIndex.has_value())
+  {
+    const auto& mesh = asset.meshes[*n.meshIndex];
+    sn.parts.reserve(mesh.primitives.size());
+    for(const auto& prim : mesh.primitives)
+    {
+      auto sp = extract_primitive(asset, prim, material_index_remap);
+      if(sp.vertex_count > 0)
+        sn.parts.push_back(std::move(sp));
+    }
+  }
+  if(n.lightIndex.has_value() && *n.lightIndex < asset.lights.size())
+    sn.light = to_light(asset.lights[*n.lightIndex]);
+  if(n.cameraIndex.has_value() && *n.cameraIndex < asset.cameras.size())
+    sn.camera = to_camera(asset.cameras[*n.cameraIndex]);
+
+  const int self = (int)out.size();
+  out.push_back(std::move(sn));
+  for(std::size_t ci : asset.nodes[nodeIdx].children)
+    emit_node(asset, ci, self, out, material_index_remap);
+  return self;
+}
+
+}  // namespace
+
+// =============================================================================
+// rebuild_scene — same pattern as FbxParser::rebuild_scene.
+// =============================================================================
+void GltfParser::rebuild_scene()
+{
+  if(m_scene_nodes.empty())
+    return;
+
+  const std::size_t N = m_scene_nodes.size();
+  std::vector<std::shared_ptr<ossia::scene_node>> nodes(N);
+  std::vector<std::shared_ptr<std::vector<ossia::scene_payload>>> children(N);
+  for(std::size_t i = 0; i < N; ++i)
+  {
+    nodes[i] = std::make_shared<ossia::scene_node>();
+    nodes[i]->name = m_scene_nodes[i].name;
+    nodes[i]->visible = true;
+    nodes[i]->id.value = m_scene_nodes[i].stable_id;
+    children[i] = std::make_shared<std::vector<ossia::scene_payload>>();
+  }
+  for(std::size_t i = 0; i < N; ++i)
+  {
+    auto& src = m_scene_nodes[i];
+    auto& lst = *children[i];
+    lst.push_back(src.local_transform);
+    if(!src.parts.empty())
+    {
+      auto mc = std::make_shared<ossia::mesh_component>();
+      mc->primitives.reserve(src.parts.size());
+      for(const auto& p : src.parts)
+        mc->primitives.push_back(part_to_primitive(p, m_materials));
+      // Direct skeleton pointer (glTF node.skin index → m_skeletons).
+      if(src.skin_index >= 0
+         && std::size_t(src.skin_index) < m_skeletons.size())
+        mc->skin = ossia::skeleton_component_ptr(m_skeletons[src.skin_index]);
+      mc->dirty_index = 1;
+      lst.push_back(ossia::mesh_component_ptr(std::move(mc)));
+    }
+    if(src.light)
+      lst.push_back(ossia::light_component_ptr(src.light));
+    if(src.camera)
+      lst.push_back(ossia::camera_component_ptr(src.camera));
+  }
+  for(std::size_t i = 0; i < N; ++i)
+  {
+    int p = m_scene_nodes[i].parent_index;
+    if(p >= 0 && p < (int)N)
+      children[p]->push_back(ossia::scene_node_ptr(nodes[i]));
+  }
+  for(std::size_t i = 0; i < N; ++i)
+    nodes[i]->children = children[i];
+
+  auto roots = std::make_shared<std::vector<ossia::scene_node_ptr>>();
+  for(std::size_t i = 0; i < N; ++i)
+    if(m_scene_nodes[i].parent_index < 0)
+      roots->push_back(ossia::scene_node_ptr(nodes[i]));
+
+  auto mat_list = std::make_shared<std::vector<ossia::material_component_ptr>>();
+  mat_list->reserve(m_materials.size());
+  for(auto& m : m_materials)
+    mat_list->push_back(ossia::material_component_ptr(m));
+
+  auto state = std::make_shared<ossia::scene_state>();
+  state->roots = std::move(roots);
+  state->materials = std::move(mat_list);
+  if(!m_skeletons.empty())
+  {
+    auto skel_list
+        = std::make_shared<std::vector<ossia::skeleton_component_ptr>>();
+    skel_list->reserve(m_skeletons.size());
+    for(auto& s : m_skeletons)
+      skel_list->push_back(ossia::skeleton_component_ptr(s));
+    state->skeletons = std::move(skel_list);
+  }
+  state->version = 1;
+  state->dirty_index = 1;
+
+  // Expose asset-scope variant names for UI / controls. active_variant
+  // starts at -1 (use each primitive's default material).
+  if(!m_variant_names.empty())
+  {
+    state->variant_names.assign(
+        m_variant_names.begin(), m_variant_names.end());
+    state->active_variant_index = -1;
+  }
+
+  // AssetLoader wraps m_raw_state in a TRS payload externally; we
+  // publish only the raw scene here.
+  m_raw_state = std::move(state);
+}
+
+std::function<void(GltfParser&)> GltfParser::ins::gltf_t::process(file_type tv)
+{
+  if(tv.filename.empty())
+    return {};
+
+  const std::filesystem::path path(tv.filename);
+  if(!std::filesystem::exists(path))
+    return {};
+
+  // Enable every extension we can usefully translate. Unknown required
+  // extensions make fastgltf refuse the file; we intentionally enable more
+  // than we consume to avoid that (data we don't translate is ignored).
+  constexpr auto extensions =
+      fastgltf::Extensions::KHR_mesh_quantization
+      | fastgltf::Extensions::KHR_texture_transform
+      | fastgltf::Extensions::KHR_lights_punctual
+      | fastgltf::Extensions::KHR_materials_emissive_strength
+      | fastgltf::Extensions::KHR_materials_unlit
+      | fastgltf::Extensions::KHR_materials_ior
+      | fastgltf::Extensions::KHR_materials_specular
+      | fastgltf::Extensions::KHR_materials_transmission
+      | fastgltf::Extensions::KHR_materials_volume
+      | fastgltf::Extensions::KHR_materials_clearcoat
+      | fastgltf::Extensions::KHR_materials_sheen
+      | fastgltf::Extensions::KHR_materials_iridescence
+      | fastgltf::Extensions::KHR_materials_anisotropy
+      | fastgltf::Extensions::KHR_materials_diffuse_transmission
+      | fastgltf::Extensions::KHR_materials_variants;
+
+  fastgltf::Parser parser(extensions);
+
+  constexpr auto gltfOptions
+      = fastgltf::Options::DontRequireValidAssetMember
+        | fastgltf::Options::AllowDouble
+        | fastgltf::Options::LoadExternalBuffers
+        | fastgltf::Options::LoadExternalImages
+        | fastgltf::Options::GenerateMeshIndices
+        | fastgltf::Options::DecomposeNodeMatrices;
+
+  auto gltfFile = fastgltf::GltfDataBuffer::FromPath(path);
+  if(!bool(gltfFile))
+    return {};
+
+  auto assetE = parser.loadGltf(
+      gltfFile.get(), path.parent_path(), gltfOptions);
+  if(assetE.error() != fastgltf::Error::None)
+    return {};
+  fastgltf::Asset asset = std::move(assetE.get());
+
+  // Materials first so primitives can remap their material indices.
+  std::vector<std::shared_ptr<ossia::material_component>> materials;
+  std::vector<int> material_index_remap(asset.materials.size(), -1);
+  for(std::size_t i = 0; i < asset.materials.size(); ++i)
+  {
+    material_index_remap[i] = (int)materials.size();
+    auto mat = to_material(asset, asset.materials[i], path.parent_path());
+    // Deterministic id keyed on (asset path, "mat", index) — re-reads of the
+    // same asset file give the same material their same stable_id, so
+    // downstream caches survive asset reloads.
+    mat->stable_id = ossia::scene_node_id::from_parent(
+        ossia::scene_node_id::from_path(path.string()),
+        std::string("mat/") + std::to_string(i)).value;
+    materials.push_back(std::move(mat));
+  }
+
+  // Scene — walk the default scene's roots. glTF allows multiple scenes but
+  // only one is "active"; pick asset.defaultScene or the first.
+  std::vector<GltfParser::SceneNode> scene_nodes;
+  const std::size_t sceneIdx
+      = asset.defaultScene.value_or(asset.scenes.empty() ? 0 : 0);
+  if(sceneIdx < asset.scenes.size())
+  {
+    for(std::size_t rootIdx : asset.scenes[sceneIdx].nodeIndices)
+      emit_node(asset, rootIdx, -1, scene_nodes, material_index_remap);
+  }
+
+  if(scene_nodes.empty())
+    return {};
+
+  // Skins — parse joint node list + inverse-bind matrices per skin.
+  // Joint transforms themselves live on the scene_node's local_transform
+  // (set during emit_node). AnimationPlayer consumes this skeleton data
+  // to produce per-frame world-space joint matrices.
+  std::vector<std::shared_ptr<ossia::skeleton_component>> skeletons;
+  skeletons.reserve(asset.skins.size());
+  for(const auto& sk : asset.skins)
+  {
+    auto skel = std::make_shared<ossia::skeleton_component>();
+    // Inverse-bind matrices are optional in glTF; default is identity.
+    std::vector<float> ibms;
+    if(sk.inverseBindMatrices.has_value())
+    {
+      const auto& ibmAcc = asset.accessors[*sk.inverseBindMatrices];
+      ibms.resize(ibmAcc.count * 16);
+      std::size_t i = 0;
+      fastgltf::iterateAccessor<fastgltf::math::fmat4x4>(
+          asset, ibmAcc, [&](fastgltf::math::fmat4x4 m) {
+            for(int c = 0; c < 4; ++c)
+              for(int r = 0; r < 4; ++r)
+                ibms[i++] = m[c][r];
+          });
+    }
+    skel->joints.reserve(sk.joints.size());
+    skel->joint_node_ids.reserve(sk.joints.size());
+    for(std::size_t j = 0; j < sk.joints.size(); ++j)
+    {
+      ossia::skeleton_joint sj;
+      const auto nodeIdx = sk.joints[j];
+      if(nodeIdx < asset.nodes.size())
+        sj.name = std::string(asset.nodes[nodeIdx].name);
+      sj.parent_index = -1; // resolved from node hierarchy at use-time
+      if(j * 16 + 15 < ibms.size())
+        std::memcpy(
+            sj.inverse_bind_matrix, ibms.data() + j * 16,
+            sizeof(float) * 16);
+      skel->joints.push_back(std::move(sj));
+      // Stable node_id derived from the glTF node index (+1 because 0
+      // means "unset" per scene_node_id convention). Matches the IDs
+      // assigned to emitted scene_nodes in rebuild_scene below.
+      ossia::scene_node_id nid;
+      nid.value = std::uint64_t(nodeIdx) + 1;
+      skel->joint_node_ids.push_back(nid);
+    }
+    skel->dirty_index = 1;
+    skeletons.push_back(std::move(skel));
+  }
+
+  // KHR_materials_variants: asset-scope variant name list. Carried
+  // alongside m_materials/skeletons into the parser so rebuild_scene
+  // can copy it into scene_state. Capture the asset's materialVariants
+  // by value so the lambda doesn't depend on the asset's lifetime.
+  std::vector<std::string> variant_names(
+      asset.materialVariants.begin(), asset.materialVariants.end());
+
+  return [scene_nodes = std::move(scene_nodes),
+          materials = std::move(materials),
+          skeletons = std::move(skeletons),
+          variant_names = std::move(variant_names)](GltfParser& o) mutable {
+    std::swap(o.m_scene_nodes, scene_nodes);
+    std::swap(o.m_materials, materials);
+    std::swap(o.m_skeletons, skeletons);
+    std::swap(o.m_variant_names, variant_names);
+    o.rebuild_scene();
+  };
+}
+
+}  // namespace Threedim
diff --git a/src/plugins/score-plugin-threedim/Threedim/GltfParser.hpp b/src/plugins/score-plugin-threedim/Threedim/GltfParser.hpp
new file mode 100644
index 0000000000..580e7e92fb
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/GltfParser.hpp
@@ -0,0 +1,96 @@
+#pragma once
+#include <halp/file_port.hpp>
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <functional>
+#include <memory>
+#include <vector>
+
+namespace Threedim
+{
+
+// Internal glTF 2.0 parsing class — uses fastgltf + simdjson to parse
+// .gltf / .glb. Not a halp node itself; AssetLoader is the user-facing
+// entry point. AssetLoader calls the static `ins::gltf_t::process` to
+// obtain an apply-lambda, applies it against a throwaway GltfParser
+// instance, then copies out `m_raw_state`.
+class GltfParser
+{
+public:
+  struct ins
+  {
+    struct gltf_t : halp::file_port<"glTF file">
+    {
+      static std::function<void(GltfParser&)> process(file_type data);
+    } gltf;
+  } inputs;
+
+  void rebuild_scene();
+
+  // Rich scene staging. Same schema as FbxParser (kept in sync so a future
+  // shared helper can consume both).
+  struct ScenePart
+  {
+    std::shared_ptr<std::vector<float>> positions;
+    std::shared_ptr<std::vector<float>> normals;
+    std::shared_ptr<std::vector<float>> texcoords;
+    std::shared_ptr<std::vector<float>> texcoords1;  // glTF TEXCOORD_1
+    std::shared_ptr<std::vector<float>> colors;
+    std::shared_ptr<std::vector<float>> tangents;
+    // Skinning attributes (present when the primitive references a skin).
+    // joints: uvec4 bone indices packed as uint32 x 4 per vertex.
+    // weights: vec4 bone weights per vertex.
+    std::shared_ptr<std::vector<uint32_t>> joints0;
+    std::shared_ptr<std::vector<float>> weights0;
+    std::shared_ptr<std::vector<uint32_t>> indices; // optional
+    uint32_t vertex_count{0};
+    uint32_t index_count{0};
+    int material_index{-1};
+    // Local-space AABB over the POSITION stream. Populated by
+    // extract_primitive from the glTF POSITION accessor's min/max when
+    // present (spec-required but optionally trusted); otherwise derived
+    // by walking positions. Empty aabb = "not yet computed"; downstream
+    // GPU culling treats empty as infinite (never cull).
+    ossia::aabb bounds{};
+    // KHR_materials_variants: per-variant material override index.
+    // Indexed by variant (parallel to scene_state::variant_names).
+    // -1 at a position = "no override for this variant, use default".
+    std::vector<int> variant_material_indices;
+  };
+
+  struct SceneNode
+  {
+    std::string name;
+    ossia::scene_transform local_transform;
+    int parent_index{-1};
+    std::vector<ScenePart> parts;
+    std::shared_ptr<ossia::light_component> light;
+    std::shared_ptr<ossia::camera_component> camera;
+    // glTF skin index. -1 = not skinned. When ≥ 0, the mesh_component
+    // emitted from this node's parts gets stamped with skin_index so
+    // ScenePreprocessor binds the matching skeleton's joint_matrices
+    // auxiliary buffer for the skinning vertex shader to read.
+    int32_t skin_index{-1};
+    // Stable node_id, derived from the glTF node index + 1. Used by
+    // AnimationPlayer to find the node via channel.target_node_id, and
+    // by skeleton_component::joint_node_ids to resolve each joint to
+    // its node's world transform.
+    std::uint64_t stable_id{0};
+  };
+
+  std::vector<SceneNode> m_scene_nodes;
+  std::vector<std::shared_ptr<ossia::material_component>> m_materials;
+  std::vector<std::shared_ptr<ossia::skeleton_component>> m_skeletons;
+  // KHR_materials_variants: names (UI-facing) declared at asset scope.
+  // Parallel to mesh_primitive::material_variants and
+  // scene_state::active_variant_index.
+  std::vector<std::string> m_variant_names;
+
+  // Rich scene state emitted by rebuild_scene — full hierarchy with
+  // materials, lights, cameras, skeletons. AssetLoader consumes this
+  // via the apply-lambda returned by ins::gltf_t::process.
+  std::shared_ptr<const ossia::scene_state> m_raw_state;
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/HumanoidPose.hpp b/src/plugins/score-plugin-threedim/Threedim/HumanoidPose.hpp
new file mode 100644
index 0000000000..9c1570c822
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/HumanoidPose.hpp
@@ -0,0 +1,183 @@
+#pragma once
+
+// humanoid_pose — canonical intermediate for live mocap → rigged-character
+// retargeting. One fixed set of 22 bones that every source adapter
+// (PoseKeypointsToHumanoid, TrackedBonesToHumanoid, future Kinect / Xsens
+// adapters) populates, and that HumanoidRetarget consumes.
+//
+// Design notes:
+//   - Flows through halp's structured-value port machinery, matching the
+//     existing DetectedPose pattern from score-addon-onnx. No new libossia
+//     port type.
+//   - Rotations are local parent-relative quaternions (x, y, z, w). Adapters
+//     responsible for converting their source's native representation
+//     (bone-direction vectors, world-space trackers, etc.) into this shape.
+//   - `validity` is 0..1 — a per-bone confidence that downstream can use
+//     to skip updates on unreliable landmarks (BlazePose visibility,
+//     tracker occlusion, etc.). 1.0 = fully trusted; 0.0 = drop / freeze
+//     at previous rotation.
+//   - `hip_position` is the only world-space translation that flows
+//     through; every other bone's position is implied by target rig
+//     proportions. Used only when HumanoidRetarget's root-motion toggle
+//     is on.
+
+#include <halp/controls.hpp>
+
+#include <array>
+#include <cstdint>
+#include <vector>
+
+namespace Threedim
+{
+
+// Canonical bone set. Indexed access via the enum; iterate with
+// humanoid_bone_index::Count. Order is stable — adapters and retargeter
+// presets both depend on it.
+enum class humanoid_bone_index : uint8_t
+{
+  Hips = 0,
+  Spine,
+  Chest,
+  Neck,
+  Head,
+
+  LeftShoulder,
+  LeftUpperArm,
+  LeftLowerArm,
+  LeftHand,
+
+  RightShoulder,
+  RightUpperArm,
+  RightLowerArm,
+  RightHand,
+
+  LeftUpperLeg,
+  LeftLowerLeg,
+  LeftFoot,
+  LeftToes,
+
+  RightUpperLeg,
+  RightLowerLeg,
+  RightFoot,
+  RightToes,
+
+  Count
+};
+
+// Per-bone pose. 20-byte halp-structured record (5 floats).
+struct humanoid_bone
+{
+  // Parent-relative rotation quaternion, (x, y, z, w). Identity = {0,0,0,1}.
+  float qx{0.f};
+  float qy{0.f};
+  float qz{0.f};
+  float qw{1.f};
+
+  // 0..1 confidence. 0 means "no reliable data for this bone, retargeter
+  // should ignore this frame for this bone". 1 = fully trusted.
+  float validity{1.f};
+
+  halp_field_names(qx, qy, qz, qw, validity);
+};
+
+// Fixed-size bone array — std::array plays nicely with halp serialization
+// (same way DetectedPose uses std::vector, except the size is known and
+// we can index by enum without a lookup).
+struct humanoid_pose
+{
+  std::array<humanoid_bone, std::size_t(humanoid_bone_index::Count)> bones{};
+
+  // World-space translation of the hip (Hips) root. Only consumed when
+  // root-motion is enabled on HumanoidRetarget; otherwise ignored.
+  float hip_x{0.f};
+  float hip_y{0.f};
+  float hip_z{0.f};
+
+  // Wall-clock frame counter. Increments on every adapter emit. Used by
+  // consumers for dirty tracking (skip work when version hasn't advanced).
+  int64_t version{0};
+
+  // Convenience: access a bone by enum.
+  humanoid_bone& operator[](humanoid_bone_index b) noexcept
+  {
+    return bones[std::size_t(b)];
+  }
+  const humanoid_bone& operator[](humanoid_bone_index b) const noexcept
+  {
+    return bones[std::size_t(b)];
+  }
+
+  halp_field_names(bones, hip_x, hip_y, hip_z, version);
+};
+
+// =============================================================================
+// Keypoint ingestion type — structurally compatible with the DetectedPose
+// struct from score-addon-onnx (same field names, same layout) so halp's
+// field-name-based port marshalling can carry a DetectedPose through a
+// port typed as keypoint_stream without cross-addon header dependency.
+//
+// Kept in Threedim deliberately: HumanoidRetarget consumes it, but we
+// don't want score-plugin-threedim to link against score-addon-onnx.
+// =============================================================================
+struct keypoint_3d
+{
+  float x{0.f};
+  float y{0.f};
+  float z{0.f};
+  float confidence{0.f};
+
+  halp_field_names(x, y, z, confidence);
+};
+
+struct keypoint_stream
+{
+  std::vector<keypoint_3d> keypoints;
+  float mean_confidence{0.f};
+
+  halp_field_names(keypoints, mean_confidence);
+};
+
+// =============================================================================
+// Tracker bundle — 6 slots matching a common VR / optical-mocap full-body
+// layout (head + hips + 2 hands + 2 feet). Each slot carries a world-space
+// position, a world-space quaternion, and a per-tracker validity so lost
+// tracking (tracker occluded / battery dead) can gracefully skip instead
+// of slamming the character to the origin.
+//
+// Additional tracker layouts (10-point Vive Full-Body, Xsens 17-IMU,
+// OptiTrack marker sets) can be added as additional bundle_N struct types
+// in future passes. v1 covers the most common consumer setup; users with
+// richer rigs can still drive the 6 slots from the subset they trust.
+// =============================================================================
+struct tracker_pose
+{
+  // World-space translation.
+  float x{0.f};
+  float y{0.f};
+  float z{0.f};
+
+  // World-space quaternion (x, y, z, w). Identity = {0, 0, 0, 1}.
+  float qx{0.f};
+  float qy{0.f};
+  float qz{0.f};
+  float qw{1.f};
+
+  // 0..1 tracking confidence. 0 = "tracker offline, ignore this frame".
+  float validity{0.f};
+
+  halp_field_names(x, y, z, qx, qy, qz, qw, validity);
+};
+
+struct tracker_bundle_6
+{
+  tracker_pose head;
+  tracker_pose hips;
+  tracker_pose left_hand;
+  tracker_pose right_hand;
+  tracker_pose left_foot;
+  tracker_pose right_foot;
+
+  halp_field_names(head, hips, left_hand, right_hand, left_foot, right_foot);
+};
+
+} // namespace Threedim
diff --git a/src/plugins/score-plugin-threedim/Threedim/HumanoidPresets.hpp b/src/plugins/score-plugin-threedim/Threedim/HumanoidPresets.hpp
new file mode 100644
index 0000000000..f22eeaf09a
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/HumanoidPresets.hpp
@@ -0,0 +1,157 @@
+#pragma once
+
+// Target rig presets for HumanoidRetarget: compile-time tables mapping
+// canonical humanoid_bone_index → the target skeleton's joint name for
+// three common conventions:
+//
+//   - Mixamo (mixamorig:*) — ubiquitous for indie / live / education
+//   - VRM — VTubing standard; Ready Player Me derivatives all use this
+//     spec's bone names (the VRM humanoid bone list)
+//   - Unreal Mannequin — game-dev convention; also matches the
+//     output of many BVH-to-FBX converters and most "clean" glTF rigs
+//
+// Tables are std::array<std::string_view>, compile-time, zero-overhead.
+// If an entry is empty the target rig doesn't have a corresponding bone
+// and HumanoidRetarget will silently skip it (e.g. Mixamo has no explicit
+// Toes bone so LeftToes / RightToes are empty).
+//
+// Not user-editable by design (see project_decisions.md): if a rig doesn't
+// fit these three presets, add a fourth preset in code rather than
+// exposing a JSON text-input escape hatch that tends to silently half-work.
+
+#include <Threedim/HumanoidPose.hpp>
+
+#include <array>
+#include <string_view>
+
+namespace Threedim
+{
+
+using HumanoidBoneMap = std::array<
+    std::string_view,
+    std::size_t(humanoid_bone_index::Count)>;
+
+enum class HumanoidRigPreset : uint8_t
+{
+  Mixamo = 0,
+  VRM,
+  UnrealMannequin,
+  Count
+};
+
+// Mixamo — "mixamorig:" prefix, title-cased component names.
+// Spine / Spine1 / Spine2 are three bones; we map the canonical
+// Spine→Spine, Chest→Spine1, (no UpperChest) and Neck/Head directly.
+// Mixamo has no explicit Toes bones; we map to *ToeBase which is the
+// closest equivalent (foot → toe-base is enough for live retargeting).
+inline constexpr HumanoidBoneMap kMixamoBoneMap = {
+    "mixamorig:Hips",               // Hips
+    "mixamorig:Spine",              // Spine
+    "mixamorig:Spine1",             // Chest
+    "mixamorig:Neck",               // Neck
+    "mixamorig:Head",               // Head
+
+    "mixamorig:LeftShoulder",       // LeftShoulder
+    "mixamorig:LeftArm",            // LeftUpperArm
+    "mixamorig:LeftForeArm",        // LeftLowerArm
+    "mixamorig:LeftHand",           // LeftHand
+
+    "mixamorig:RightShoulder",      // RightShoulder
+    "mixamorig:RightArm",           // RightUpperArm
+    "mixamorig:RightForeArm",       // RightLowerArm
+    "mixamorig:RightHand",          // RightHand
+
+    "mixamorig:LeftUpLeg",          // LeftUpperLeg
+    "mixamorig:LeftLeg",            // LeftLowerLeg
+    "mixamorig:LeftFoot",           // LeftFoot
+    "mixamorig:LeftToeBase",        // LeftToes
+
+    "mixamorig:RightUpLeg",         // RightUpperLeg
+    "mixamorig:RightLeg",           // RightLowerLeg
+    "mixamorig:RightFoot",          // RightFoot
+    "mixamorig:RightToeBase",       // RightToes
+};
+
+// VRM — per the VRM humanoid spec bone names. Ready Player Me avatars
+// also use this naming. Toes are not part of the mandatory VRM bone
+// list but commonly present; we map to the optional "LeftToes"/"RightToes"
+// which RPM and most VRM exports populate.
+inline constexpr HumanoidBoneMap kVRMBoneMap = {
+    "Hips",                  // Hips
+    "Spine",                 // Spine
+    "Chest",                 // Chest
+    "Neck",                  // Neck
+    "Head",                  // Head
+
+    "LeftShoulder",          // LeftShoulder
+    "LeftUpperArm",          // LeftUpperArm
+    "LeftLowerArm",          // LeftLowerArm
+    "LeftHand",              // LeftHand
+
+    "RightShoulder",         // RightShoulder
+    "RightUpperArm",         // RightUpperArm
+    "RightLowerArm",         // RightLowerArm
+    "RightHand",             // RightHand
+
+    "LeftUpperLeg",          // LeftUpperLeg
+    "LeftLowerLeg",          // LeftLowerLeg
+    "LeftFoot",              // LeftFoot
+    "LeftToes",              // LeftToes
+
+    "RightUpperLeg",         // RightUpperLeg
+    "RightLowerLeg",         // RightLowerLeg
+    "RightFoot",             // RightFoot
+    "RightToes",             // RightToes
+};
+
+// Unreal Mannequin — snake_case with "_l"/"_r" suffix. Spine is
+// spine_01/02/03; we map Spine→spine_01, Chest→spine_02 (the visible
+// chest bone). UE mannequin has no UpperChest; Spine→spine_03 would
+// be closer if the rig has one authored. ball_l/r is the UE name for
+// toes-equivalent.
+inline constexpr HumanoidBoneMap kUnrealMannequinBoneMap = {
+    "pelvis",                // Hips
+    "spine_01",              // Spine
+    "spine_02",              // Chest
+    "neck_01",               // Neck
+    "head",                  // Head
+
+    "clavicle_l",            // LeftShoulder
+    "upperarm_l",            // LeftUpperArm
+    "lowerarm_l",            // LeftLowerArm
+    "hand_l",                // LeftHand
+
+    "clavicle_r",            // RightShoulder
+    "upperarm_r",            // RightUpperArm
+    "lowerarm_r",            // RightLowerArm
+    "hand_r",                // RightHand
+
+    "thigh_l",               // LeftUpperLeg
+    "calf_l",                // LeftLowerLeg
+    "foot_l",                // LeftFoot
+    "ball_l",                // LeftToes
+
+    "thigh_r",               // RightUpperLeg
+    "calf_r",                // RightLowerLeg
+    "foot_r",                // RightFoot
+    "ball_r",                // RightToes
+};
+
+inline constexpr const HumanoidBoneMap&
+humanoidBoneMap(HumanoidRigPreset preset) noexcept
+{
+  switch(preset)
+  {
+    case HumanoidRigPreset::Mixamo:
+      return kMixamoBoneMap;
+    case HumanoidRigPreset::VRM:
+      return kVRMBoneMap;
+    case HumanoidRigPreset::UnrealMannequin:
+      return kUnrealMannequinBoneMap;
+    case HumanoidRigPreset::Count:
+      break;
+  }
+  return kMixamoBoneMap;
+}
+
+} // namespace Threedim
diff --git a/src/plugins/score-plugin-threedim/Threedim/HumanoidRetarget.hpp b/src/plugins/score-plugin-threedim/Threedim/HumanoidRetarget.hpp
new file mode 100644
index 0000000000..e5aa9ea680
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/HumanoidRetarget.hpp
@@ -0,0 +1,440 @@
+#pragma once
+
+// Threedim::HumanoidRetarget — live scene filter that drives a rigged
+// model's skeleton from a humanoid_pose stream.
+//
+// Inputs:
+//   - scene_in: an ossia::scene_spec carrying a rigged asset (at least one
+//     skeleton_component). Typically comes from Threedim::AssetLoader.
+//   - pose_in:  std::optional<humanoid_pose> from a source adapter (e.g.
+//     PoseKeypointsToHumanoid wrapped around an ONNX PoseDetector, or
+//     TrackedBonesToHumanoid over PSN/RTTrP trackers).
+//
+// Controls:
+//   - Target rig preset: Mixamo / VRM / Unreal Mannequin bone-name
+//     convention. Selects which joint names we look up against the
+//     scene's skeleton_component.
+//   - Capture rest pose (impulse): snapshot both sides' current state as
+//     the retarget reference. Required before any motion transfers.
+//   - Root motion (toggle) + Root scale: optional Hips translation
+//     driven by the source's hip_position delta from rest.
+//
+// Output:
+//   - scene_out: the incoming scene_spec with ONLY the mapped joints'
+//     rotations (and optionally Hips translation) replaced. Every other
+//     joint, every mesh, every material, the scene hierarchy, version
+//     counters on other state — all passed through unchanged.
+//
+// Math (Offset / delta-from-rest mode, the default and correct choice
+// when source and target rigs have different axis conventions):
+//
+//     q_tgt_cur = q_tgt_rest * ( inverse(q_src_rest) * q_src_cur )
+//
+// Calibration (both sides at once) captures q_src_rest per canonical
+// bone and q_tgt_rest per resolved target joint. The delta is then a
+// parent-relative quaternion that transfers cleanly even if the source
+// is, say, a BlazePose landmark graph and the target is a Mixamo FBX —
+// as long as the adapter produces parent-relative rotations, the math
+// works. Per-bone axis correction matrices are a follow-up (needed for
+// some exotic rigs; not a v1 concern).
+//
+// No smoothing here — smoothing belongs in the adapter, pre-pose_spec.
+// No IK here — chain `InverseKinematics` after this process for
+// hand/foot-prop reach; the two compose cleanly on scene_spec.
+
+#include <Threedim/HumanoidPose.hpp>
+#include <Threedim/HumanoidPresets.hpp>
+#include <Threedim/HumanoidSourceAdapters.hpp>
+#include <Threedim/HumanoidSourceMaps.hpp>
+
+#include <halp/controls.hpp>
+#include <halp/controls.buttons.hpp>
+#include <halp/meta.hpp>
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <array>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <optional>
+
+namespace Threedim
+{
+
+// Picks which input shape the retargeter consumes this frame. The
+// matching input ports are always present on the process (halp doesn't
+// hide ports conditionally); the combobox just tells the dispatch which
+// one to translate into humanoid_pose.
+enum class HumanoidSourceType : uint8_t
+{
+  Off = 0,         // Passthrough (no motion applied)
+  BlazePose,       // keypoints_in, BlazePose 33-landmark ordering
+  Coco17,          // keypoints_in, COCO-17 (YOLO-pose / ViTPose / RTMPose_COCO)
+  RTMPoseWhole,    // keypoints_in, RTMPose_Whole (body subset of 133)
+  Trackers6,       // trackers_in, 6 DOF (head / hips / 2 hands / 2 feet)
+  Count
+};
+
+class HumanoidRetarget
+{
+public:
+  halp_meta(name, "Humanoid Retarget")
+  halp_meta(category, "Visuals/3D/Scene")
+  halp_meta(c_name, "humanoid_retarget")
+  halp_meta(authors, "ossia team")
+  halp_meta(
+      manual_url,
+      "https://ossia.io/score-docs/processes/humanoid-retarget.html")
+  halp_meta(uuid, "7e1f4d8a-2c6b-4e7f-9a35-6c4b8d2e0f1a")
+
+  struct ins
+  {
+    struct
+    {
+      halp_meta(name, "Scene In");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_in;
+
+    // Keypoint input — populated when Source is a PoseDetector workflow.
+    // Structurally compatible with score-addon-onnx's DetectedPose
+    // (matching halp_field_names), so a DetectedPose port wires directly.
+    struct
+    {
+      halp_meta(name, "Keypoints");
+      std::optional<keypoint_stream> value;
+    } keypoints_in;
+
+    // Tracker input — populated when Source is Trackers6. The user wires
+    // OSC-emitted xyz+quat streams from a PSN/RTTrP/VRPN device into the
+    // matching tracker_pose slots of the bundle.
+    struct
+    {
+      halp_meta(name, "Trackers");
+      std::optional<tracker_bundle_6> value;
+    } trackers_in;
+
+    struct : halp::combobox_t<"Source", HumanoidSourceType>
+    {
+      struct range
+      {
+        std::string_view values[5]{
+            "Off", "BlazePose", "COCO-17", "RTMPose Whole", "6DOF Trackers"};
+        int init{0};
+      };
+      void update(HumanoidRetarget& self)
+      {
+        // Source-shape change invalidates the captured source rest pose;
+        // the map of landmark→bone (and bone→tracker) differs, so previous
+        // "rest" values aren't meaningful under the new source.
+        self.m_calibrated = false;
+      }
+    } source;
+
+    struct : halp::hslider_f32<"Confidence", halp::range{0.f, 1.f, 0.5f}>
+    {
+      halp_meta(description, "Per-keypoint confidence threshold");
+    } confidence_threshold;
+
+    struct : halp::combobox_t<"Target rig", HumanoidRigPreset>
+    {
+      struct range
+      {
+        std::string_view values[3]{"Mixamo", "VRM", "Unreal Mannequin"};
+        int init{0};
+      };
+      void update(HumanoidRetarget& self)
+      {
+        // Bone-name table change invalidates the cached joint index
+        // lookups and the captured target rest pose; force a fresh
+        // calibration on the next frame that has both inputs.
+        self.m_calibrated = false;
+      }
+    } preset;
+
+    halp::toggle<"Root motion"> root_motion;
+
+    struct : halp::hslider_f32<"Root scale", halp::range{0.01f, 10.f, 1.f}>
+    {
+    } root_scale;
+
+    struct : halp::impulse_button<"Capture rest pose">
+    {
+      void update(HumanoidRetarget& self) { self.m_need_calibrate = true; }
+    } calibrate;
+  } inputs;
+
+  struct outs
+  {
+    struct
+    {
+      halp_meta(name, "Scene Out");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_out;
+  } outputs;
+
+  void operator()()
+  {
+    const auto& in = inputs.scene_in.scene;
+    if(!in.state || !in.state->roots)
+    {
+      outputs.scene_out.scene.state.reset();
+      outputs.scene_out.dirty = 0;
+      return;
+    }
+
+    // Translate the selected source into a humanoid_pose. Off mode and
+    // "source has no fresh data" both fall through to a clean passthrough
+    // so downstream nodes see the input unchanged until motion starts.
+    std::optional<humanoid_pose> maybe_pose
+        = composeSourcePose(inputs.confidence_threshold.value);
+    if(!maybe_pose)
+    {
+      outputs.scene_out.scene = in;
+      outputs.scene_out.dirty = 0;
+      return;
+    }
+
+    const auto& pose = *maybe_pose;
+
+    // Resolve the skeleton — first entry in the scene's skeletons list.
+    // Multi-skeleton assets (per-skin glTF) are a follow-up: v1 retargets
+    // the first one, which covers 100% of Mixamo / VRM / single-rig
+    // scenes.
+    if(!in.state->skeletons || in.state->skeletons->empty())
+    {
+      outputs.scene_out.scene = in;
+      outputs.scene_out.dirty = 0;
+      return;
+    }
+    const auto& srcSkel = *(*in.state->skeletons)[0];
+    if(srcSkel.joints.empty())
+    {
+      outputs.scene_out.scene = in;
+      outputs.scene_out.dirty = 0;
+      return;
+    }
+
+    // Calibrate on demand. Two triggers:
+    //   - user pressed "Capture rest pose"
+    //   - preset combobox changed (invalidates previous joint lookups)
+    if(m_need_calibrate || !m_calibrated)
+    {
+      calibrate(srcSkel, pose);
+      m_need_calibrate = false;
+    }
+
+    // Clone the skeleton so other consumers of the input scene don't see
+    // our mutations. This is the same pattern InverseKinematics uses.
+    auto newSkel = std::make_shared<ossia::skeleton_component>(srcSkel);
+
+    // Per-bone offset-mode retarget:
+    //   q_tgt_new = q_tgt_rest * ( inverse(q_src_rest) * q_src_cur )
+    for(std::size_t b = 0; b < std::size_t(humanoid_bone_index::Count); ++b)
+    {
+      const int32_t tgt = m_target_joint_indices[b];
+      if(tgt < 0 || tgt >= int32_t(newSkel->joints.size()))
+        continue;
+
+      const auto& src_cur = pose.bones[b];
+      if(src_cur.validity < kValidityThreshold)
+        continue; // trust the target's current rotation (kept from clone)
+
+      const float src_cur_q[4] = {
+          src_cur.qx, src_cur.qy, src_cur.qz, src_cur.qw};
+      float inv_src_rest[4];
+      quat_inv(m_source_rest[b], inv_src_rest);
+
+      float delta[4];
+      quat_mul(inv_src_rest, src_cur_q, delta);
+
+      float out[4];
+      quat_mul(m_target_rest[b], delta, out);
+
+      auto& tgtJoint = newSkel->joints[tgt];
+      tgtJoint.rotation[0] = out[0];
+      tgtJoint.rotation[1] = out[1];
+      tgtJoint.rotation[2] = out[2];
+      tgtJoint.rotation[3] = out[3];
+    }
+
+    // Root motion — apply source hip delta to target hip translation,
+    // scaled by the user control. Off by default (most live scenes want
+    // animate-in-place; locomotion is a deliberate choice).
+    if(inputs.root_motion.value)
+    {
+      const int32_t hipsIdx
+          = m_target_joint_indices[std::size_t(humanoid_bone_index::Hips)];
+      if(hipsIdx >= 0 && hipsIdx < int32_t(newSkel->joints.size()))
+      {
+        const float s = inputs.root_scale.value;
+        auto& hip = newSkel->joints[hipsIdx];
+        hip.translation[0]
+            = m_target_rest_hip_tr[0] + (pose.hip_x - m_source_rest_hip[0]) * s;
+        hip.translation[1]
+            = m_target_rest_hip_tr[1] + (pose.hip_y - m_source_rest_hip[1]) * s;
+        hip.translation[2]
+            = m_target_rest_hip_tr[2] + (pose.hip_z - m_source_rest_hip[2]) * s;
+      }
+    }
+
+    newSkel->dirty_index++;
+
+    // Emit a fresh scene_state that shares everything with the input
+    // except the skeletons vector.
+    auto state = std::make_shared<ossia::scene_state>(*in.state);
+    auto skels
+        = std::make_shared<std::vector<ossia::skeleton_component_ptr>>();
+    skels->reserve(in.state->skeletons->size());
+    for(std::size_t i = 0; i < in.state->skeletons->size(); ++i)
+      skels->push_back(
+          i == 0 ? ossia::skeleton_component_ptr(newSkel)
+                 : (*in.state->skeletons)[i]);
+    state->skeletons = std::move(skels);
+    state->version = ++m_version_counter;
+    state->dirty_index = in.state->dirty_index + 1;
+
+    m_state = std::move(state);
+    outputs.scene_out.scene.state = m_state;
+    outputs.scene_out.dirty = ossia::scene_port::dirty_transform;
+  }
+
+private:
+  // Rotation confidence below which we don't override the target bone.
+  // Adapters default bone validity to 1.0; BlazePose maps landmark
+  // visibility into [0, 1]. 0.5 is a reasonable "believe this" line.
+  static constexpr float kValidityThreshold = 0.5f;
+
+  // Hamilton quaternion multiply. (x, y, z, w) ordering.
+  static void quat_mul(const float a[4], const float b[4], float out[4]) noexcept
+  {
+    const float x = a[3] * b[0] + a[0] * b[3] + a[1] * b[2] - a[2] * b[1];
+    const float y = a[3] * b[1] - a[0] * b[2] + a[1] * b[3] + a[2] * b[0];
+    const float z = a[3] * b[2] + a[0] * b[1] - a[1] * b[0] + a[2] * b[3];
+    const float w = a[3] * b[3] - a[0] * b[0] - a[1] * b[1] - a[2] * b[2];
+    out[0] = x;
+    out[1] = y;
+    out[2] = z;
+    out[3] = w;
+  }
+
+  // Inverse of a unit quaternion = conjugate. Adapters should be
+  // emitting normalized rotations; if they drift, the math still
+  // produces a stable result but scale factors creep in.
+  static void quat_inv(const float q[4], float out[4]) noexcept
+  {
+    out[0] = -q[0];
+    out[1] = -q[1];
+    out[2] = -q[2];
+    out[3] = q[3];
+  }
+
+  void calibrate(
+      const ossia::skeleton_component& skel,
+      const humanoid_pose& pose) noexcept
+  {
+    const auto& map = humanoidBoneMap(inputs.preset.value);
+
+    for(std::size_t b = 0; b < std::size_t(humanoid_bone_index::Count); ++b)
+    {
+      // Snapshot source rest pose quaternion (identity-ish if adapter
+      // hasn't moved yet; whatever is there is what "neutral" means
+      // for this capture).
+      m_source_rest[b][0] = pose.bones[b].qx;
+      m_source_rest[b][1] = pose.bones[b].qy;
+      m_source_rest[b][2] = pose.bones[b].qz;
+      m_source_rest[b][3] = pose.bones[b].qw;
+
+      m_target_joint_indices[b] = -1;
+      if(map[b].empty())
+        continue; // preset intentionally skips this bone (e.g. UpperChest)
+
+      const int32_t idx = skel.find_joint(map[b]);
+      if(idx < 0)
+        continue;
+      m_target_joint_indices[b] = idx;
+
+      // Snapshot target rest rotation.
+      const auto& j = skel.joints[std::size_t(idx)];
+      m_target_rest[b][0] = j.rotation[0];
+      m_target_rest[b][1] = j.rotation[1];
+      m_target_rest[b][2] = j.rotation[2];
+      m_target_rest[b][3] = j.rotation[3];
+
+      if(b == std::size_t(humanoid_bone_index::Hips))
+      {
+        m_target_rest_hip_tr[0] = j.translation[0];
+        m_target_rest_hip_tr[1] = j.translation[1];
+        m_target_rest_hip_tr[2] = j.translation[2];
+      }
+    }
+
+    m_source_rest_hip[0] = pose.hip_x;
+    m_source_rest_hip[1] = pose.hip_y;
+    m_source_rest_hip[2] = pose.hip_z;
+
+    m_calibrated = true;
+  }
+
+  // Dispatch the selected source toggle into a humanoid_pose. Returns
+  // nullopt when the source is Off or no fresh data is present — in that
+  // case operator() passes the input scene through unchanged.
+  std::optional<humanoid_pose>
+  composeSourcePose(float confidence_threshold) noexcept
+  {
+    const auto src = inputs.source.value;
+    switch(src)
+    {
+      case HumanoidSourceType::Off:
+      case HumanoidSourceType::Count:
+        return std::nullopt;
+
+      case HumanoidSourceType::BlazePose:
+        if(!inputs.keypoints_in.value
+           || inputs.keypoints_in.value->keypoints.empty())
+          return std::nullopt;
+        return keypoints_to_humanoid_pose(
+            *inputs.keypoints_in.value, kBlazePoseMap, confidence_threshold);
+
+      case HumanoidSourceType::Coco17:
+        if(!inputs.keypoints_in.value
+           || inputs.keypoints_in.value->keypoints.empty())
+          return std::nullopt;
+        return keypoints_to_humanoid_pose(
+            *inputs.keypoints_in.value, kCoco17Map, confidence_threshold);
+
+      case HumanoidSourceType::RTMPoseWhole:
+        if(!inputs.keypoints_in.value
+           || inputs.keypoints_in.value->keypoints.empty())
+          return std::nullopt;
+        return keypoints_to_humanoid_pose(
+            *inputs.keypoints_in.value, kRTMPoseWholeMap,
+            confidence_threshold);
+
+      case HumanoidSourceType::Trackers6:
+        if(!inputs.trackers_in.value)
+          return std::nullopt;
+        return trackers_to_humanoid_pose(*inputs.trackers_in.value);
+    }
+    return std::nullopt;
+  }
+
+public:
+  // Persisted across score-document saves (serialized with process state).
+  bool m_calibrated{false};
+  std::array<float[4], std::size_t(humanoid_bone_index::Count)> m_source_rest{};
+  std::array<float[4], std::size_t(humanoid_bone_index::Count)> m_target_rest{};
+  std::array<int32_t, std::size_t(humanoid_bone_index::Count)>
+      m_target_joint_indices{};
+  float m_target_rest_hip_tr[3]{0.f, 0.f, 0.f};
+  float m_source_rest_hip[3]{0.f, 0.f, 0.f};
+
+  // Ephemeral.
+  bool m_need_calibrate{false};
+  std::shared_ptr<ossia::scene_state> m_state;
+  int64_t m_version_counter{0};
+};
+
+} // namespace Threedim
diff --git a/src/plugins/score-plugin-threedim/Threedim/HumanoidSourceAdapters.hpp b/src/plugins/score-plugin-threedim/Threedim/HumanoidSourceAdapters.hpp
new file mode 100644
index 0000000000..0301a422a5
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/HumanoidSourceAdapters.hpp
@@ -0,0 +1,326 @@
+#pragma once
+
+// Free functions that convert from the two source-data shapes
+// HumanoidRetarget accepts (keypoint_stream from an ONNX PoseDetector,
+// tracker_bundle_6 from a mocap / tracking-protocol device) into the
+// canonical humanoid_pose. Separate header to keep HumanoidRetarget.hpp
+// focused on orchestration + Offset-mode retargeting math.
+//
+// Both paths produce PARENT-LOCAL quaternions — that's the invariant
+// HumanoidRetarget depends on. See the file comment at the top of
+// HumanoidRetarget.hpp for why.
+
+#include <Threedim/HumanoidPose.hpp>
+#include <Threedim/HumanoidSourceMaps.hpp>
+
+#include <array>
+#include <cmath>
+
+namespace Threedim
+{
+
+// ---------------------------------------------------------------------------
+// Small quaternion helpers. Inline and header-only for zero TU overhead.
+// (x, y, z, w) layout, matching ossia::skeleton_joint::rotation and
+// humanoid_bone::q*.
+// ---------------------------------------------------------------------------
+inline void quat_mul_xyzw(
+    const float a[4], const float b[4], float out[4]) noexcept
+{
+  const float x = a[3] * b[0] + a[0] * b[3] + a[1] * b[2] - a[2] * b[1];
+  const float y = a[3] * b[1] - a[0] * b[2] + a[1] * b[3] + a[2] * b[0];
+  const float z = a[3] * b[2] + a[0] * b[1] - a[1] * b[0] + a[2] * b[3];
+  const float w = a[3] * b[3] - a[0] * b[0] - a[1] * b[1] - a[2] * b[2];
+  out[0] = x; out[1] = y; out[2] = z; out[3] = w;
+}
+
+inline void quat_inv_xyzw(const float q[4], float out[4]) noexcept
+{
+  // Inverse of a unit quaternion = conjugate.
+  out[0] = -q[0]; out[1] = -q[1]; out[2] = -q[2]; out[3] = q[3];
+}
+
+// Shortest-arc rotation from unit vector `from` to unit vector `to`.
+// Produces the quaternion q such that q·from = to. Used to turn a
+// canonical T-pose bone axis into the observed bone direction; this is
+// inherently a 2-DoF answer (the twist around the bone's own length is
+// undefined by just two direction endpoints). That's a hard limit of
+// single-camera keypoint mocap; professional suits add IMU twist.
+inline void shortest_arc(
+    const float from[3], const float to[3], float out[4]) noexcept
+{
+  const float d = from[0] * to[0] + from[1] * to[1] + from[2] * to[2];
+  const float eps = 1e-6f;
+
+  if(d >= 1.f - eps)
+  {
+    // Aligned — identity.
+    out[0] = 0.f; out[1] = 0.f; out[2] = 0.f; out[3] = 1.f;
+    return;
+  }
+  if(d <= -1.f + eps)
+  {
+    // Antiparallel — 180° around ANY perpendicular axis. Pick one that
+    // isn't (near-)parallel to `from` for numerical stability.
+    float axis[3];
+    if(std::fabs(from[0]) < 0.9f)
+    {
+      axis[0] = 1.f - from[0] * from[0];
+      axis[1] = -from[0] * from[1];
+      axis[2] = -from[0] * from[2];
+    }
+    else
+    {
+      axis[0] = -from[1] * from[0];
+      axis[1] = 1.f - from[1] * from[1];
+      axis[2] = -from[1] * from[2];
+    }
+    const float len
+        = std::sqrt(axis[0] * axis[0] + axis[1] * axis[1] + axis[2] * axis[2]);
+    if(len > eps)
+    {
+      const float inv = 1.f / len;
+      out[0] = axis[0] * inv;
+      out[1] = axis[1] * inv;
+      out[2] = axis[2] * inv;
+    }
+    else
+    {
+      out[0] = 1.f; out[1] = 0.f; out[2] = 0.f;
+    }
+    out[3] = 0.f;
+    return;
+  }
+
+  // General case — half-vector formulation for numerical stability.
+  const float cross[3] = {
+      from[1] * to[2] - from[2] * to[1],
+      from[2] * to[0] - from[0] * to[2],
+      from[0] * to[1] - from[1] * to[0]};
+  const float s = std::sqrt((1.f + d) * 2.f);
+  const float invs = 1.f / s;
+  out[0] = cross[0] * invs;
+  out[1] = cross[1] * invs;
+  out[2] = cross[2] * invs;
+  out[3] = s * 0.5f;
+}
+
+// ---------------------------------------------------------------------------
+// keypoints → humanoid_pose.
+//
+// Algorithm:
+//   1. For each bone with a valid (parent_idx, child_idx) edge in the map
+//      AND both keypoints' confidence ≥ threshold:
+//        d_world[b] = normalize(kp[child] - kp[parent])
+//        q_world[b] = shortestArc(kRestAxis[b], d_world[b])
+//   2. Walk bones in topological order (enum order is already topological
+//      because each bone's parent has a lower index). For each bone b:
+//        - if no world rotation was computed, validity = 0
+//        - if parent has no world rotation, emit q_world[b] as local
+//          (root-relative behaviour — good fallback when upper chain data
+//          is missing)
+//        - else q_local[b] = inv(q_world[parent(b)]) * q_world[b]
+//   3. Copy Hips world position from whatever landmark best represents it
+//      (for BlazePose / COCO the hip midpoint; approximated as left_hip).
+//      Used only by the root-motion toggle downstream.
+// ---------------------------------------------------------------------------
+inline humanoid_pose keypoints_to_humanoid_pose(
+    const keypoint_stream& stream,
+    const HumanoidKeypointMap& map,
+    float confidence_threshold = 0.5f) noexcept
+{
+  humanoid_pose out{};
+
+  // Step 1: per-bone world rotations.
+  constexpr std::size_t N = std::size_t(humanoid_bone_index::Count);
+  std::array<std::array<float, 4>, N> q_world{};
+  std::array<bool, N> has_world{};
+
+  const auto& kps = stream.keypoints;
+  const int K = int(kps.size());
+
+  for(std::size_t b = 0; b < N; ++b)
+  {
+    has_world[b] = false;
+    q_world[b] = {0.f, 0.f, 0.f, 1.f};
+
+    const auto& edge = map[b];
+    if(!edge.valid() || edge.parent_idx == edge.child_idx)
+      continue;
+    if(edge.parent_idx >= K || edge.child_idx >= K)
+      continue;
+
+    const auto& p = kps[std::size_t(edge.parent_idx)];
+    const auto& c = kps[std::size_t(edge.child_idx)];
+    if(p.confidence < confidence_threshold
+       || c.confidence < confidence_threshold)
+      continue;
+
+    float d[3] = {c.x - p.x, c.y - p.y, c.z - p.z};
+    const float len = std::sqrt(d[0] * d[0] + d[1] * d[1] + d[2] * d[2]);
+    if(len < 1e-6f)
+      continue;
+    const float inv = 1.f / len;
+    d[0] *= inv; d[1] *= inv; d[2] *= inv;
+
+    const auto& rest = kHumanoidRestAxis[b];
+    shortest_arc(rest.data(), d, q_world[b].data());
+    has_world[b] = true;
+  }
+
+  // Step 2: world → parent-local. Enum order is topological: each bone's
+  // parent has a strictly lower index, so a single forward pass is safe.
+  for(std::size_t b = 0; b < N; ++b)
+  {
+    auto& bone = out.bones[b];
+    if(!has_world[b])
+    {
+      bone.validity = 0.f;
+      bone.qx = 0.f; bone.qy = 0.f; bone.qz = 0.f; bone.qw = 1.f;
+      continue;
+    }
+
+    const auto parent_idx = kHumanoidParent[b];
+    if(parent_idx == humanoid_bone_index::Count
+       || !has_world[std::size_t(parent_idx)])
+    {
+      // Root bone OR parent's world rotation is unknown — emit our world
+      // rotation as local. For root this is correct; for a bone whose
+      // parent failed to resolve this is a reasonable degradation (the
+      // bone will orient absolutely rather than relative to a missing
+      // parent, which at least keeps it visible).
+      bone.qx = q_world[b][0];
+      bone.qy = q_world[b][1];
+      bone.qz = q_world[b][2];
+      bone.qw = q_world[b][3];
+    }
+    else
+    {
+      float inv_parent[4];
+      quat_inv_xyzw(q_world[std::size_t(parent_idx)].data(), inv_parent);
+      float local[4];
+      quat_mul_xyzw(inv_parent, q_world[b].data(), local);
+      bone.qx = local[0]; bone.qy = local[1];
+      bone.qz = local[2]; bone.qw = local[3];
+    }
+    bone.validity = 1.f;
+  }
+
+  // Hip translation — grab the parent keypoint of the Spine edge as the
+  // best "pelvis" proxy (BlazePose landmark 23 = left_hip, COCO 11 =
+  // left_hip). Not the true midpoint, but close enough for single-camera
+  // root motion; users who need precision should use a tracker workflow.
+  const auto& spine_edge = map[std::size_t(humanoid_bone_index::Spine)];
+  if(spine_edge.parent_idx >= 0 && spine_edge.parent_idx < K)
+  {
+    const auto& hip_kp = kps[std::size_t(spine_edge.parent_idx)];
+    if(hip_kp.confidence >= confidence_threshold)
+    {
+      out.hip_x = hip_kp.x;
+      out.hip_y = hip_kp.y;
+      out.hip_z = hip_kp.z;
+    }
+  }
+
+  return out;
+}
+
+// ---------------------------------------------------------------------------
+// trackers → humanoid_pose.
+//
+// With only 6 trackers (head, hips, 2 hands, 2 feet) we directly drive
+// those 6 bones and leave the intermediate bones (spine, shoulders,
+// elbows, knees) at their retarget rest. Getting those bones to follow
+// realistically needs either more trackers (10-point Vive Full-Body) or
+// a downstream 2-bone IK chain (InverseKinematics process) keyed on
+// shoulder + wrist tracker positions as (root, target). v1 keeps the
+// retargeter unopinionated — we fill what we're given.
+//
+// Tracker quaternions are world-space by convention (PSN, OSC, VRPN all
+// report world transforms). Parent-local is produced by inverting the
+// parent bone's tracker rotation if that parent also has a tracker;
+// otherwise the bone inherits the world rotation directly.
+// ---------------------------------------------------------------------------
+inline humanoid_pose trackers_to_humanoid_pose(
+    const tracker_bundle_6& t) noexcept
+{
+  humanoid_pose out{};
+
+  // Slot 1:1 mapping — which canonical bone gets which tracker.
+  struct Slot
+  {
+    humanoid_bone_index bone;
+    const tracker_pose* tr;
+  };
+  const Slot slots[] = {
+      {humanoid_bone_index::Hips, &t.hips},
+      {humanoid_bone_index::Head, &t.head},
+      {humanoid_bone_index::LeftHand, &t.left_hand},
+      {humanoid_bone_index::RightHand, &t.right_hand},
+      {humanoid_bone_index::LeftFoot, &t.left_foot},
+      {humanoid_bone_index::RightFoot, &t.right_foot},
+  };
+
+  // Gather world rotations.
+  constexpr std::size_t N = std::size_t(humanoid_bone_index::Count);
+  std::array<std::array<float, 4>, N> q_world{};
+  std::array<bool, N> has_world{};
+  for(std::size_t b = 0; b < N; ++b)
+  {
+    q_world[b] = {0.f, 0.f, 0.f, 1.f};
+    has_world[b] = false;
+  }
+
+  for(const auto& slot : slots)
+  {
+    if(slot.tr->validity < 0.5f)
+      continue;
+    const std::size_t idx = std::size_t(slot.bone);
+    q_world[idx] = {slot.tr->qx, slot.tr->qy, slot.tr->qz, slot.tr->qw};
+    has_world[idx] = true;
+  }
+
+  // World → parent-local, same pattern as the keypoint path. Bones whose
+  // parent has no tracker fall through to "emit world as local", which
+  // makes them pose relative to the world origin — correct for Head /
+  // Hands when their parent chain (Neck, LowerArm) isn't tracker-driven.
+  for(std::size_t b = 0; b < N; ++b)
+  {
+    auto& bone = out.bones[b];
+    if(!has_world[b])
+    {
+      bone.validity = 0.f;
+      continue;
+    }
+
+    const auto parent_idx = kHumanoidParent[b];
+    if(parent_idx == humanoid_bone_index::Count
+       || !has_world[std::size_t(parent_idx)])
+    {
+      bone.qx = q_world[b][0]; bone.qy = q_world[b][1];
+      bone.qz = q_world[b][2]; bone.qw = q_world[b][3];
+    }
+    else
+    {
+      float inv_parent[4];
+      quat_inv_xyzw(q_world[std::size_t(parent_idx)].data(), inv_parent);
+      float local[4];
+      quat_mul_xyzw(inv_parent, q_world[b].data(), local);
+      bone.qx = local[0]; bone.qy = local[1];
+      bone.qz = local[2]; bone.qw = local[3];
+    }
+    bone.validity = 1.f;
+  }
+
+  // Hip position = hips tracker position (if tracking).
+  if(t.hips.validity >= 0.5f)
+  {
+    out.hip_x = t.hips.x;
+    out.hip_y = t.hips.y;
+    out.hip_z = t.hips.z;
+  }
+
+  return out;
+}
+
+} // namespace Threedim
diff --git a/src/plugins/score-plugin-threedim/Threedim/HumanoidSourceMaps.hpp b/src/plugins/score-plugin-threedim/Threedim/HumanoidSourceMaps.hpp
new file mode 100644
index 0000000000..1ca66a67d8
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/HumanoidSourceMaps.hpp
@@ -0,0 +1,233 @@
+#pragma once
+
+// Source-side tables used by HumanoidRetarget's conversion step:
+//
+//   - per-workflow keypoint→bone mapping (BlazePose 33, COCO-17, RTMPose
+//     Whole 133) — each entry says "bone B's direction is landmark parent_idx
+//     to child_idx in this workflow"
+//   - canonical T-pose bone axes — the world-space direction each bone
+//     points in the canonical T-pose (e.g. LeftUpperArm is -X). Used as
+//     the "rest direction" of each bone for the shortest-arc computation.
+//   - bone hierarchy (parent-of-bone) — needed to convert world rotations
+//     to parent-relative quaternions after the shortest-arc pass.
+//
+// Kept separate from HumanoidRetarget.hpp to keep the retargeter file
+// focused on orchestration + math.
+
+#include <Threedim/HumanoidPose.hpp>
+
+#include <array>
+#include <cstdint>
+
+namespace Threedim
+{
+
+// ---------------------------------------------------------------------------
+// Bone tree: for each canonical bone, its parent bone (or Count if root).
+// Matches the humanoid_bone_index enum order.
+// ---------------------------------------------------------------------------
+inline constexpr std::array<
+    humanoid_bone_index,
+    std::size_t(humanoid_bone_index::Count)>
+    kHumanoidParent = {{
+        humanoid_bone_index::Count,            // Hips (root)
+        humanoid_bone_index::Hips,             // Spine
+        humanoid_bone_index::Spine,            // Chest
+        humanoid_bone_index::Chest,            // Neck
+        humanoid_bone_index::Neck,             // Head
+
+        humanoid_bone_index::Chest,            // LeftShoulder
+        humanoid_bone_index::LeftShoulder,     // LeftUpperArm
+        humanoid_bone_index::LeftUpperArm,     // LeftLowerArm
+        humanoid_bone_index::LeftLowerArm,     // LeftHand
+
+        humanoid_bone_index::Chest,            // RightShoulder
+        humanoid_bone_index::RightShoulder,    // RightUpperArm
+        humanoid_bone_index::RightUpperArm,    // RightLowerArm
+        humanoid_bone_index::RightLowerArm,    // RightHand
+
+        humanoid_bone_index::Hips,             // LeftUpperLeg
+        humanoid_bone_index::LeftUpperLeg,     // LeftLowerLeg
+        humanoid_bone_index::LeftLowerLeg,     // LeftFoot
+        humanoid_bone_index::LeftFoot,         // LeftToes
+
+        humanoid_bone_index::Hips,             // RightUpperLeg
+        humanoid_bone_index::RightUpperLeg,    // RightLowerLeg
+        humanoid_bone_index::RightLowerLeg,    // RightFoot
+        humanoid_bone_index::RightFoot,        // RightToes
+    }};
+
+// ---------------------------------------------------------------------------
+// Canonical T-pose bone axes. Y-up, right-handed, model facing +Z.
+//
+// Each entry is the world-space unit direction the bone's parent→child
+// segment points in the canonical T-pose. The retargeter uses these as
+// the "from" vector in the shortest-arc rotation that aligns the bone
+// with the current landmark-derived direction.
+//
+// Conventions:
+//   - Spine / Neck / Head chain points up (+Y)
+//   - Arms point outward (-X for left, +X for right) along the horizontal
+//   - Legs point down (-Y)
+//   - Toes point forward (+Z)
+//   - Shoulders are small bones from spine to upper-arm root; treat as
+//     pointing toward the upper-arm (horizontal left/right)
+//   - Hips bone itself is the root; no direction (identity).
+// ---------------------------------------------------------------------------
+inline constexpr std::array<
+    std::array<float, 3>,
+    std::size_t(humanoid_bone_index::Count)>
+    kHumanoidRestAxis = {{
+        {0.f, 0.f, 0.f},   // Hips — root, no direction
+        {0.f, 1.f, 0.f},   // Spine +Y
+        {0.f, 1.f, 0.f},   // Chest +Y
+        {0.f, 1.f, 0.f},   // Neck +Y
+        {0.f, 1.f, 0.f},   // Head +Y
+
+        {-1.f, 0.f, 0.f},  // LeftShoulder -X
+        {-1.f, 0.f, 0.f},  // LeftUpperArm -X
+        {-1.f, 0.f, 0.f},  // LeftLowerArm -X
+        {-1.f, 0.f, 0.f},  // LeftHand -X
+
+        {1.f, 0.f, 0.f},   // RightShoulder +X
+        {1.f, 0.f, 0.f},   // RightUpperArm +X
+        {1.f, 0.f, 0.f},   // RightLowerArm +X
+        {1.f, 0.f, 0.f},   // RightHand +X
+
+        {0.f, -1.f, 0.f},  // LeftUpperLeg -Y
+        {0.f, -1.f, 0.f},  // LeftLowerLeg -Y
+        {0.f, -1.f, 0.f},  // LeftFoot -Y
+        {0.f, 0.f, 1.f},   // LeftToes +Z
+
+        {0.f, -1.f, 0.f},  // RightUpperLeg -Y
+        {0.f, -1.f, 0.f},  // RightLowerLeg -Y
+        {0.f, -1.f, 0.f},  // RightFoot -Y
+        {0.f, 0.f, 1.f},   // RightToes +Z
+    }};
+
+// ---------------------------------------------------------------------------
+// Keypoint mapping: for each canonical bone, (parent_keypoint_idx,
+// child_keypoint_idx) into the workflow's keypoint array. -1 means this
+// bone isn't derivable from this workflow (the adapter will skip it,
+// keeping the target bone at its rest rotation).
+// ---------------------------------------------------------------------------
+struct HumanoidKeypointEdge
+{
+  int16_t parent_idx{-1};
+  int16_t child_idx{-1};
+  bool valid() const noexcept { return parent_idx >= 0 && child_idx >= 0; }
+};
+
+using HumanoidKeypointMap = std::array<
+    HumanoidKeypointEdge,
+    std::size_t(humanoid_bone_index::Count)>;
+
+// ---------------------------------------------------------------------------
+// BlazePose (33 landmarks).
+// Index reference:
+//   0: nose, 1: left_eye_inner, 2: left_eye, 3: left_eye_outer,
+//   4: right_eye_inner, 5: right_eye, 6: right_eye_outer,
+//   7: left_ear, 8: right_ear,
+//   9: mouth_left, 10: mouth_right,
+//   11: left_shoulder, 12: right_shoulder,
+//   13: left_elbow, 14: right_elbow,
+//   15: left_wrist, 16: right_wrist,
+//   17..22: left/right pinky/index/thumb (hand subdetail)
+//   23: left_hip, 24: right_hip,
+//   25: left_knee, 26: right_knee,
+//   27: left_ankle, 28: right_ankle,
+//   29: left_heel, 30: right_heel,
+//   31: left_foot_index, 32: right_foot_index
+//
+// Bone directions are parent_kp → child_kp:
+//   - Spine: midpoint(hips) → midpoint(shoulders). Approximated as
+//     left_hip → left_shoulder (an acceptable approximation for a
+//     single-segment spine; precise midpoint handling would need
+//     a helper with synthesized virtual landmarks).
+//   - Chest / Neck approximated similarly.
+//   - Shoulders (the bone from spine to upper-arm root) are treated as
+//     midpoint(shoulders) → shoulder. Again approximated directly.
+//   - Toes: ankle → foot_index
+// ---------------------------------------------------------------------------
+inline constexpr HumanoidKeypointMap kBlazePoseMap = {{
+    {-1, -1},              // Hips (root)
+    {23, 11},              // Spine: left_hip → left_shoulder
+    {11, 12},              // Chest: shoulders pair  (approximation)
+    {11, 0},               // Neck:  left_shoulder → nose (approx)
+    {0, 2},                // Head:  nose → left_eye (approx)
+
+    {11, 11},              // LeftShoulder (collar): degenerate — map skipped by validity
+    {11, 13},              // LeftUpperArm: left_shoulder → left_elbow
+    {13, 15},              // LeftLowerArm: left_elbow → left_wrist
+    {15, 19},              // LeftHand:     left_wrist → left_index
+
+    {12, 12},              // RightShoulder (collar): skipped
+    {12, 14},              // RightUpperArm
+    {14, 16},              // RightLowerArm
+    {16, 20},              // RightHand
+
+    {23, 25},              // LeftUpperLeg
+    {25, 27},              // LeftLowerLeg
+    {27, 29},              // LeftFoot
+    {27, 31},              // LeftToes:     ankle → foot_index
+
+    {24, 26},              // RightUpperLeg
+    {26, 28},              // RightLowerLeg
+    {28, 30},              // RightFoot
+    {28, 32},              // RightToes
+}};
+
+// ---------------------------------------------------------------------------
+// COCO-17 layout (YOLO-pose, ViTPose, RTMPose_COCO).
+// Index reference:
+//   0: nose, 1: left_eye, 2: right_eye, 3: left_ear, 4: right_ear,
+//   5: left_shoulder, 6: right_shoulder,
+//   7: left_elbow, 8: right_elbow,
+//   9: left_wrist, 10: right_wrist,
+//   11: left_hip, 12: right_hip,
+//   13: left_knee, 14: right_knee,
+//   15: left_ankle, 16: right_ankle
+//
+// No toes / feet detail, no fingers — those bones are flagged as
+// unmappable and will keep their target rest rotation.
+// ---------------------------------------------------------------------------
+inline constexpr HumanoidKeypointMap kCoco17Map = {{
+    {-1, -1},              // Hips
+    {11, 5},               // Spine:  left_hip → left_shoulder (approx)
+    {5, 6},                // Chest:  shoulders (approx)
+    {5, 0},                // Neck:   shoulder → nose (approx)
+    {0, 1},                // Head:   nose → left_eye
+
+    {-1, -1},              // LeftShoulder — no dedicated landmark
+    {5, 7},                // LeftUpperArm
+    {7, 9},                // LeftLowerArm
+    {-1, -1},              // LeftHand — no wrist-to-hand direction in COCO
+
+    {-1, -1},              // RightShoulder
+    {6, 8},                // RightUpperArm
+    {8, 10},               // RightLowerArm
+    {-1, -1},              // RightHand
+
+    {11, 13},              // LeftUpperLeg
+    {13, 15},              // LeftLowerLeg
+    {-1, -1},              // LeftFoot — ankle only
+    {-1, -1},              // LeftToes
+
+    {12, 14},              // RightUpperLeg
+    {14, 16},              // RightLowerLeg
+    {-1, -1},              // RightFoot
+    {-1, -1},              // RightToes
+}};
+
+// ---------------------------------------------------------------------------
+// RTMPose Whole-body 133 keypoints — first 17 match COCO, 17..22 face,
+// 23..90 face mesh, 91..132 hands. For body retargeting we reuse the
+// first 17 (same as COCO), and optionally pull finger landmarks for a
+// richer hand (Hand bone direction = wrist → middle_finger_mcp).
+//
+// v1: use only the COCO subset. Hands would require a 21-landmark map
+// (follow-up).
+// ---------------------------------------------------------------------------
+inline constexpr HumanoidKeypointMap kRTMPoseWholeMap = kCoco17Map;
+
+} // namespace Threedim
diff --git a/src/plugins/score-plugin-threedim/Threedim/ImageLoader.cpp b/src/plugins/score-plugin-threedim/Threedim/ImageLoader.cpp
new file mode 100644
index 0000000000..bd9c99b0d5
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/ImageLoader.cpp
@@ -0,0 +1,85 @@
+#include "ImageLoader.hpp"
+
+namespace Threedim
+{
+
+void ImageLoader::init(
+    score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res)
+{
+  // RenderList rebuild (e.g. viewport resize) calls release() which
+  // drops m_tex, then init() against the new RenderList. Without this
+  // re-stage the user would have to re-trigger the file-port to get
+  // their texture back. Stage the kept CPU image into m_pendingImage
+  // so the next update() pass uploads it to the freshly-allocated
+  // QRhiTexture against the new rhi.
+  if(!m_keptImage.isNull())
+  {
+    m_pendingImage = m_keptImage;
+    m_changed = true;
+  }
+}
+
+void ImageLoader::update(
+    score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res,
+    score::gfx::Edge* e)
+{
+  if(!m_changed || m_pendingImage.isNull())
+    return;
+
+  auto& rhi = *renderer.state.rhi;
+  const QSize sz = m_pendingImage.size();
+
+  // (Re)create texture when the stored one's size doesn't match.
+  // RGBA8 — LDR loader. The HDR variant lives in a sibling plug-in
+  // that links against OpenImageIO and produces RGBA16F/RGBA32F.
+  if(!m_tex || m_tex->pixelSize() != sz)
+  {
+    if(m_tex)
+      m_tex->deleteLater();
+    m_tex = rhi.newTexture(QRhiTexture::RGBA8, sz, 1, QRhiTexture::Flag{});
+    if(!m_tex || !m_tex->create())
+    {
+      if(m_tex)
+      {
+        m_tex->deleteLater();
+        m_tex = nullptr;
+      }
+      return;
+    }
+  }
+
+  res.uploadTexture(m_tex, m_pendingImage);
+
+  outputs.texture.texture.handle = m_tex;
+  outputs.texture.texture.width = sz.width();
+  outputs.texture.texture.height = sz.height();
+  // Format defaults to RGBA8 on construction; explicit for clarity.
+  outputs.texture.texture.format = halp::gpu_texture::RGBA8;
+
+  // Persist the CPU copy across RenderList rebuilds so init() can
+  // re-stage on the next resize. Move-from m_pendingImage to keep
+  // the upload's already-detached QImage data without copying.
+  m_keptImage = std::move(m_pendingImage);
+  m_pendingImage = QImage{};
+  m_changed = false;
+}
+
+void ImageLoader::release(score::gfx::RenderList& r)
+{
+  if(m_tex)
+  {
+    m_tex->deleteLater();
+    m_tex = nullptr;
+  }
+  outputs.texture.texture.handle = nullptr;
+  outputs.texture.texture.width = 0;
+  outputs.texture.texture.height = 0;
+}
+
+void ImageLoader::runInitialPasses(
+    score::gfx::RenderList&, QRhiCommandBuffer&,
+    QRhiResourceUpdateBatch*&, score::gfx::Edge&)
+{
+}
+
+} // namespace Threedim
diff --git a/src/plugins/score-plugin-threedim/Threedim/ImageLoader.hpp b/src/plugins/score-plugin-threedim/Threedim/ImageLoader.hpp
new file mode 100644
index 0000000000..049fe00b6d
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/ImageLoader.hpp
@@ -0,0 +1,101 @@
+#pragma once
+#include <Gfx/Graph/Node.hpp>
+#include <Gfx/Graph/NodeRenderer.hpp>
+#include <Gfx/Graph/RenderList.hpp>
+
+#include <halp/controls.hpp>
+#include <halp/file_port.hpp>
+#include <halp/meta.hpp>
+#include <halp/texture.hpp>
+
+#include <QImage>
+
+namespace Threedim
+{
+
+// Lightweight LDR image-to-GPU-texture loader. Sibling to BufferLoader
+// but for 2D textures. Sits alongside the main OpenImageIO-backed
+// ImageLoader in a sibling plug-in, usable when OIIO isn't linked in
+// and the image is a plain QImage-supported format (PNG / JPG / BMP /
+// …). HDR formats (.hdr / .exr) require the OIIO path.
+//
+// Primary use: feeds the pure-shader cubemap pipeline
+//   ImageLoader(path) → cubemap_from_source → SceneResourceRoute(Skybox)
+// superseding the bespoke equirect/cross/strip code in CubemapLoader.
+class ImageLoader
+{
+public:
+  halp_meta(name, "Image loader (LDR)")
+  halp_meta(category, "Visuals")
+  halp_meta(c_name, "image_loader_ldr")
+  halp_meta(authors, "ossia team")
+  halp_meta(
+      manual_url,
+      "https://ossia.io/score-docs/processes/image-loader.html")
+  halp_meta(description,
+      "Loads a 2D image file (PNG / JPG / BMP / …) to a GPU RGBA8 texture")
+  halp_meta(uuid, "e6b2c1d8-3f45-4a92-8b17-9c4e0d5a6f3b")
+
+  struct ins
+  {
+    // File-port boilerplate — same pattern as SplatLoader's obj_t.
+    // process() runs on the file-load thread, decodes the image,
+    // returns a lambda that stages the result onto the node from the
+    // execution thread.
+    struct image_t : halp::file_port<"Image", halp::mmap_file_view>
+    {
+      halp_meta(extensions,
+          "Images (*.png *.jpg *.jpeg *.bmp *.tga *.webp *.tif *.tiff)");
+      static std::function<void(ImageLoader&)> process(file_type data)
+      {
+        QImage img;
+        if(!data.bytes.empty())
+        {
+          img.loadFromData(
+              reinterpret_cast<const uchar*>(data.bytes.data()),
+              (int)data.bytes.size());
+        }
+        if(img.isNull() && !data.filename.empty())
+        {
+          img = QImage(data.filename.data());
+        }
+        if(!img.isNull() && img.format() != QImage::Format_RGBA8888)
+          img = img.convertToFormat(QImage::Format_RGBA8888);
+        return [img = std::move(img)](ImageLoader& self) mutable {
+          self.m_pendingImage = std::move(img);
+          self.m_changed = true;
+        };
+      }
+    } image;
+  } inputs;
+
+  struct
+  {
+    halp::gpu_texture_output<"Texture"> texture;
+  } outputs;
+
+  void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res);
+  void update(
+      score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res,
+      score::gfx::Edge* e);
+  void release(score::gfx::RenderList& r);
+  void runInitialPasses(
+      score::gfx::RenderList& renderer, QRhiCommandBuffer& commands,
+      QRhiResourceUpdateBatch*& res, score::gfx::Edge& edge);
+
+  void operator()() { }
+
+  QImage m_pendingImage;
+  // Persistent CPU copy of the last successfully uploaded image. Kept
+  // alive across RenderList rebuilds (resize) so that init() can
+  // re-upload to the freshly allocated QRhiTexture without needing the
+  // user to re-trigger the file-port. Without this, release() drops
+  // m_tex AND clears m_pendingImage in update() — the next init() has
+  // nothing to upload, the texture port stays bound to the empty
+  // placeholder for the rest of the session.
+  QImage m_keptImage;
+  QRhiTexture* m_tex{};
+  bool m_changed{};
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/InjectBuffer.cpp b/src/plugins/score-plugin-threedim/Threedim/InjectBuffer.cpp
new file mode 100644
index 0000000000..2768eb3d9a
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/InjectBuffer.cpp
@@ -0,0 +1,74 @@
+#include "InjectBuffer.hpp"
+
+#include <algorithm>
+
+namespace Threedim
+{
+
+void InjectBuffer::rebuild()
+{
+  const auto& in = inputs.scene_in.scene;
+  const ossia::scene_state* in_state = in.state.get();
+  const int64_t in_version = in_state ? in_state->version : -1;
+  void* cur_handle = inputs.buffer.buffer.handle;
+  const int64_t cur_bytes = inputs.buffer.buffer.byte_size;
+  const auto& cur_name = inputs.aux_name.value;
+
+  m_cached_in_state = in_state;
+  m_cached_in_version = in_version;
+  m_cached_handle = cur_handle;
+  m_cached_byte_size = cur_bytes;
+  m_cached_name = cur_name;
+
+  // Unwired / incomplete controls → pass-through. Safe to drop in a
+  // pipeline before the Buffer is connected.
+  if(!cur_handle || cur_name.empty() || !in_state)
+  {
+    m_cached_out = in.state;
+    m_pending_dirty = 0xFF;
+    return;
+  }
+
+  // Clone the scene_state (cheap — it's shallow pointers to shared
+  // sub-vectors) and append the injection. Existing entries with the
+  // same name are removed first so a later InjectBuffer in the chain
+  // always wins.
+  auto state = std::make_shared<ossia::scene_state>(*in_state);
+  state->inject_buffers.erase(
+      std::remove_if(
+          state->inject_buffers.begin(), state->inject_buffers.end(),
+          [&](const ossia::aux_inject_buffer& ab) { return ab.name == cur_name; }),
+      state->inject_buffers.end());
+  state->inject_buffers.push_back(
+      {.name = cur_name,
+       .native_handle = cur_handle,
+       .byte_size = cur_bytes});
+  state->version = ++m_version_counter;
+  state->dirty_index = m_version_counter;
+
+  m_cached_out = state;
+  m_pending_dirty = 0xFF;
+}
+
+void InjectBuffer::operator()()
+{
+  // Upstream scene_state + live buffer handle can change without a
+  // port-update event; detect and trigger rebuild. aux_name changes
+  // come via the control update() callback.
+  const auto* in_state = inputs.scene_in.scene.state.get();
+  const int64_t in_version = in_state ? in_state->version : -1;
+  void* cur_handle = inputs.buffer.buffer.handle;
+  const int64_t cur_bytes = inputs.buffer.buffer.byte_size;
+  const bool upstream_changed
+      = m_cached_in_state != in_state
+        || m_cached_in_version != in_version
+        || m_cached_handle != cur_handle
+        || m_cached_byte_size != cur_bytes;
+  if(!m_cached_out || upstream_changed)
+    rebuild();
+  outputs.scene_out.scene.state = m_cached_out;
+  outputs.scene_out.dirty = m_pending_dirty;
+  m_pending_dirty = 0;
+}
+
+} // namespace Threedim
diff --git a/src/plugins/score-plugin-threedim/Threedim/InjectBuffer.hpp b/src/plugins/score-plugin-threedim/Threedim/InjectBuffer.hpp
new file mode 100644
index 0000000000..971de57b32
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/InjectBuffer.hpp
@@ -0,0 +1,90 @@
+#pragma once
+#include <halp/buffer.hpp>
+#include <halp/controls.hpp>
+#include <halp/meta.hpp>
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+namespace Threedim
+{
+
+// Mid-pipeline aux-buffer injection. Takes a scene_spec passthrough cable
+// plus a live GPU buffer from an upstream producer (CSF output, another
+// aux node, etc.) and attaches it to the scene as a pending injection
+// under a caller-supplied name. ScenePreprocessor consumes
+// `scene_state::inject_buffers` at flatten-time and writes matching
+// `auxiliary_buffer` entries onto every output geometry — so the live
+// handle ends up bound to any downstream consumer shader that declares
+// an AUXILIARY entry with the same name (SSBO or UBO kind).
+//
+// Wiring:
+//   CSFProducer → InjectBuffer(name="scene_params", is_uniform=true)
+//                → ScenePreprocessor → classic_pbr_full
+//
+// Name collisions with existing auxes published by the scene producers
+// (e.g., ScenePreprocessor's own scene_lights / scene_materials) follow
+// last-wins — the injection appended after flatten overrides the
+// flatten-time entry. Use this to selectively replace standard auxes
+// with custom data without forking the preprocessor.
+class InjectBuffer
+{
+public:
+  halp_meta(name, "Inject Buffer")
+  halp_meta(category, "Visuals/3D/Scene")
+  halp_meta(c_name, "inject_buffer")
+  halp_meta(authors, "ossia team")
+  halp_meta(
+      manual_url,
+      "https://ossia.io/score-docs/processes/inject-buffer.html")
+  halp_meta(uuid, "4f9a6e2d-7c83-4b5d-9e1f-8a3c5d6b2f4e")
+
+  struct ins
+  {
+    struct
+    {
+      halp_meta(name, "Scene In");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_in;
+
+    // Port-driven rebuild: aux_name triggers rebuild(). scene_in +
+    // buffer handle changes are detected in operator()() because they
+    // can change without a port-update event.
+    // Live GPU buffer from an upstream producer. Null handle → the
+    // injection is skipped (passthrough), so unwiring is safe.
+    halp::gpu_buffer_input<"Buffer"> buffer;
+
+    struct : halp::lineedit<"Aux name", "">
+    { void update(InjectBuffer& n) { n.rebuild(); } } aux_name;
+  } inputs;
+
+  struct outs
+  {
+    struct
+    {
+      halp_meta(name, "Scene Out");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_out;
+  } outputs;
+
+  void rebuild();
+  void operator()();
+
+  // Stable shared_ptr cached while inputs are unchanged — keeps
+  // ScenePreprocessor's fingerprint fast-path warm.
+  std::shared_ptr<const ossia::scene_state> m_cached_out;
+  uint8_t m_pending_dirty{0xFF};
+  const ossia::scene_state* m_cached_in_state{};
+  int64_t m_cached_in_version{-1};
+  std::string m_cached_name;
+  void* m_cached_handle{};
+  int64_t m_cached_byte_size{};
+  int64_t m_version_counter{0};
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/InjectTexture.cpp b/src/plugins/score-plugin-threedim/Threedim/InjectTexture.cpp
new file mode 100644
index 0000000000..9459fb4b7a
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/InjectTexture.cpp
@@ -0,0 +1,61 @@
+#include "InjectTexture.hpp"
+
+#include <algorithm>
+
+namespace Threedim
+{
+
+void InjectTexture::rebuild()
+{
+  const auto& in = inputs.scene_in.scene;
+  const ossia::scene_state* in_state = in.state.get();
+  const int64_t in_version = in_state ? in_state->version : -1;
+  void* cur_handle = inputs.texture.texture.handle;
+  const auto& cur_name = inputs.aux_name.value;
+
+  m_cached_in_state = in_state;
+  m_cached_in_version = in_version;
+  m_cached_handle = cur_handle;
+  m_cached_name = cur_name;
+
+  if(!cur_handle || cur_name.empty() || !in_state)
+  {
+    m_cached_out = in.state;
+    m_pending_dirty = 0xFF;
+    return;
+  }
+
+  auto state = std::make_shared<ossia::scene_state>(*in_state);
+  state->inject_textures.erase(
+      std::remove_if(
+          state->inject_textures.begin(), state->inject_textures.end(),
+          [&](const ossia::aux_inject_texture& at) { return at.name == cur_name; }),
+      state->inject_textures.end());
+  state->inject_textures.push_back(
+      {.name = cur_name, .native_handle = cur_handle});
+  state->version = ++m_version_counter;
+  state->dirty_index = m_version_counter;
+
+  m_cached_out = state;
+  m_pending_dirty = 0xFF;
+}
+
+void InjectTexture::operator()()
+{
+  // Upstream scene_state + live texture handle can change mid-stream;
+  // detect and rebuild.
+  const auto* in_state = inputs.scene_in.scene.state.get();
+  const int64_t in_version = in_state ? in_state->version : -1;
+  void* cur_handle = inputs.texture.texture.handle;
+  const bool upstream_changed
+      = m_cached_in_state != in_state
+        || m_cached_in_version != in_version
+        || m_cached_handle != cur_handle;
+  if(!m_cached_out || upstream_changed)
+    rebuild();
+  outputs.scene_out.scene.state = m_cached_out;
+  outputs.scene_out.dirty = m_pending_dirty;
+  m_pending_dirty = 0;
+}
+
+} // namespace Threedim
diff --git a/src/plugins/score-plugin-threedim/Threedim/InjectTexture.hpp b/src/plugins/score-plugin-threedim/Threedim/InjectTexture.hpp
new file mode 100644
index 0000000000..038591611b
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/InjectTexture.hpp
@@ -0,0 +1,86 @@
+#pragma once
+#include <halp/controls.hpp>
+#include <halp/meta.hpp>
+#include <halp/texture.hpp>
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+namespace Threedim
+{
+
+// Mid-pipeline aux-texture injection. Takes a scene_spec passthrough
+// cable plus a live GPU texture from an upstream producer (video node,
+// ISF output, CSF image, etc.) and attaches it under a caller-supplied
+// name. ScenePreprocessor consumes `scene_state::inject_textures` and
+// writes matching `auxiliary_texture` entries onto its output
+// geometry — so the live handle flows to any downstream consumer
+// shader that declares an AUXILIARY texture entry with the same name.
+//
+// Texture handles are routed via halp::gpu_texture_input, which goes
+// through the Graph's TextureInlet / updateInputTexture() path — a
+// fundamentally different mechanism from InjectBuffer's
+// halp::gpu_buffer_input (which goes through bufferForInput / Output).
+// Hence the split into two distinct node types.
+//
+// Wiring:
+//   VideoProducer → InjectTexture(name="base_color_dyn0")
+//                 → ScenePreprocessor → classic_pbr_full
+class InjectTexture
+{
+public:
+  halp_meta(name, "Inject Texture")
+  halp_meta(category, "Visuals/3D/Scene")
+  halp_meta(c_name, "inject_texture")
+  halp_meta(authors, "ossia team")
+  halp_meta(
+      manual_url,
+      "https://ossia.io/score-docs/processes/inject-texture.html")
+  halp_meta(uuid, "3b8d2f7c-9a5e-4f1d-a4c6-6e2d9c4f8a1b")
+
+  struct ins
+  {
+    struct
+    {
+      halp_meta(name, "Scene In");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_in;
+
+    // Port-driven rebuild: aux_name triggers rebuild(). scene_in +
+    // texture handle changes detected in operator()() (no port-update
+    // event fires when a native handle is swapped).
+    // Live GPU texture from an upstream producer. Null handle → the
+    // injection is skipped (passthrough).
+    halp::gpu_texture_input<"Texture"> texture;
+
+    struct : halp::lineedit<"Aux name", "">
+    { void update(InjectTexture& n) { n.rebuild(); } } aux_name;
+  } inputs;
+
+  struct outs
+  {
+    struct
+    {
+      halp_meta(name, "Scene Out");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_out;
+  } outputs;
+
+  void rebuild();
+  void operator()();
+
+  std::shared_ptr<const ossia::scene_state> m_cached_out;
+  uint8_t m_pending_dirty{0xFF};
+  const ossia::scene_state* m_cached_in_state{};
+  int64_t m_cached_in_version{-1};
+  std::string m_cached_name;
+  void* m_cached_handle{};
+  int64_t m_version_counter{0};
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/Instancer.cpp b/src/plugins/score-plugin-threedim/Threedim/Instancer.cpp
new file mode 100644
index 0000000000..7d7385b5bf
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/Instancer.cpp
@@ -0,0 +1,518 @@
+#include "Instancer.hpp"
+
+#include <Gfx/Graph/RenderList.hpp>
+#include <Gfx/Graph/SceneGPUState.hpp>
+
+#include <QMatrix3x3>
+#include <QMatrix4x4>
+#include <QQuaternion>
+#include <QVector3D>
+
+#include <cstring>
+
+namespace Threedim
+{
+
+namespace
+{
+
+// Extract the first mesh_component found in a scene tree (depth-first),
+// alongside the accumulated `scene_transform` composition encountered
+// along the path from `node` to that mesh. The composition is what
+// upstream producers use to position their meshes (a glTF root node's
+// scale, a Primitive's TRS, etc.); without it, instancing a Duck.gltf
+// would draw at the model's intrinsic origin / scale even when the
+// upstream node was visibly scaled by the user.
+//
+// Two behaviours intentionally preserved:
+//   - First-mesh-only: subtree may contain many meshes; only the first
+//     in depth-first order is instanced. (The "instance all meshes"
+//     combobox mode is a future feature.)
+//   - Sibling scene_transforms BEFORE the mesh ARE composed (matches
+//     the FlattenVisitor's "transform applies to subsequent siblings"
+//     contract). Sibling transforms AFTER the mesh would only affect
+//     later siblings and are correctly ignored here.
+struct PrototypeWithTransform
+{
+  ossia::mesh_component_ptr mesh;
+  QMatrix4x4                world;  // accumulated TRS from `node` down to `mesh`
+};
+
+namespace
+{
+QMatrix4x4 transformToMatrix(const ossia::scene_transform& t) noexcept
+{
+  QMatrix4x4 m;
+  m.setToIdentity();
+  m.translate(t.translation[0], t.translation[1], t.translation[2]);
+  m.rotate(QQuaternion(t.rotation[3], t.rotation[0], t.rotation[1], t.rotation[2]));
+  m.scale(t.scale[0], t.scale[1], t.scale[2]);
+  return m;
+}
+}
+
+PrototypeWithTransform
+findFirstMesh(const ossia::scene_node& node, QMatrix4x4 parent = QMatrix4x4{}) noexcept
+{
+  PrototypeWithTransform out{nullptr, parent};
+  if(!node.has_children())
+    return out;
+
+  QMatrix4x4 acc = parent;
+  for(const auto& payload : *node.children)
+  {
+    // scene_transform among siblings updates the running composition
+    // for any subsequent sibling — matching the FlattenVisitor's
+    // semantics. (See SceneGPUState.cpp:visitPayload scene_transform
+    // branch.)
+    if(auto* xform = ossia::get_if<ossia::scene_transform>(&payload))
+    {
+      acc = acc * transformToMatrix(*xform);
+      continue;
+    }
+
+    if(auto* m = ossia::get_if<ossia::mesh_component_ptr>(&payload))
+    {
+      if(*m)
+      {
+        out.mesh = *m;
+        out.world = acc;
+        return out;
+      }
+    }
+    if(auto* sub = ossia::get_if<ossia::scene_node_ptr>(&payload))
+    {
+      if(*sub)
+      {
+        auto found = findFirstMesh(**sub, acc);
+        if(found.mesh)
+          return found;
+      }
+    }
+  }
+  return out;
+}
+
+// Wrap a halp::gpu_buffer (a thin {handle, byte_size, byte_offset}
+// struct) into an ossia::buffer_resource_ptr carrying a
+// gpu_buffer_handle variant. Returns null when the input handle is
+// null (e.g., no edge wired into that port), letting callers skip
+// that slot.
+ossia::buffer_resource_ptr
+wrapGpuBuffer(const halp::gpu_buffer& buf) noexcept
+{
+  if(!buf.handle)
+    return nullptr;
+  ossia::gpu_buffer_handle gh;
+  gh.native_handle = buf.handle;
+  gh.byte_size = buf.byte_size;
+  gh.byte_offset = buf.byte_offset;
+  auto res = std::make_shared<ossia::buffer_resource>();
+  res->resource = gh;
+  res->dirty_index = 1;
+  return res;
+}
+
+// Result of walking a halp::dynamic_gpu_geometry for the attributes
+// Instancer knows how to consume. Any slot without a matching
+// attribute stays null and falls back to the raw buffer inputs.
+struct PointCloudRouting
+{
+  ossia::buffer_resource_ptr transforms; // translation or transform_matrix
+  ossia::buffer_resource_ptr colors;     // color0
+  bool has_matrix{false};                // true if transform_matrix found
+  int instance_count{-1};                // geometry.vertices, or -1
+};
+
+// Resolve a geometry attribute to its source {handle, byte_offset}
+// by chasing attribute → input[binding] → buffers[input.buffer]. The
+// byte offsets in the attribute and the input add; the final byte
+// offset lives on the wrapped buffer_resource.
+ossia::buffer_resource_ptr
+wrapAttributeAsBuffer(const halp::dynamic_gpu_geometry& mesh,
+                      const halp::geometry_attribute& attr) noexcept
+{
+  if(attr.binding < 0 || attr.binding >= (int)mesh.input.size())
+    return nullptr;
+  const auto& in = mesh.input[attr.binding];
+  if(in.buffer < 0 || in.buffer >= (int)mesh.buffers.size())
+    return nullptr;
+  const auto& b = mesh.buffers[in.buffer];
+  if(!b.handle)
+    return nullptr;
+  ossia::gpu_buffer_handle gh;
+  gh.native_handle = b.handle;
+  gh.byte_size = b.byte_size;
+  gh.byte_offset = in.byte_offset + attr.byte_offset;
+  auto res = std::make_shared<ossia::buffer_resource>();
+  res->resource = gh;
+  res->dirty_index = 1;
+  return res;
+}
+
+PointCloudRouting extractPointCloud(
+    const halp::dynamic_gpu_geometry& mesh) noexcept
+{
+  PointCloudRouting out;
+  if(mesh.buffers.empty() || mesh.attributes.empty())
+    return out;
+  for(const auto& attr : mesh.attributes)
+  {
+    using S = halp::attribute_semantic;
+    switch(attr.semantic)
+    {
+      // transform_matrix takes precedence over translation/position
+      // because it carries the full TRS.
+      case S::transform_matrix:
+        out.transforms = wrapAttributeAsBuffer(mesh, attr);
+        out.has_matrix = true;
+        break;
+      case S::translation:
+      case S::position:
+        if(!out.has_matrix && !out.transforms)
+          out.transforms = wrapAttributeAsBuffer(mesh, attr);
+        break;
+      case S::color0:
+        if(!out.colors)
+          out.colors = wrapAttributeAsBuffer(mesh, attr);
+        break;
+      default:
+        break;
+    }
+  }
+  out.instance_count = mesh.vertices;
+  return out;
+}
+
+} // namespace
+
+void Instancer::rebuild()
+{
+  const auto& in = inputs.scene_in.scene;
+  const ossia::scene_state* in_state = in.state.get();
+
+  // Find the prototype mesh in the incoming scene, alongside the
+  // composed scene_transform from each ancestor walked along the way.
+  // The composed transform feeds into the wrapped scene_node below
+  // so the instance cloud honours the upstream's authored TRS (e.g.
+  // a Primitive node's scale, a glTF root's positioning) rather than
+  // dropping it on extraction.
+  ossia::mesh_component_ptr proto;
+  QMatrix4x4 protoWorld;
+  protoWorld.setToIdentity();
+  if(in.state && in.state->roots)
+  {
+    for(const auto& r : *in.state->roots)
+    {
+      if(!r)
+        continue;
+      auto found = findFirstMesh(*r);
+      if(found.mesh)
+      {
+        proto = found.mesh;
+        protoWorld = found.world;
+        break;
+      }
+    }
+  }
+
+  // Point-cloud input takes precedence over the raw buffer inlets
+  // when it's wired. We detect "wired" as "at least one buffer with
+  // a non-null handle in the points mesh". The routing struct
+  // populates transforms / colors from the matching attribute
+  // semantics; empty routing falls back to the raw buffer ports.
+  const bool has_points_input
+      = !inputs.points.mesh.buffers.empty()
+        && std::any_of(
+               inputs.points.mesh.buffers.begin(),
+               inputs.points.mesh.buffers.end(),
+               [](const halp::geometry_gpu_buffer& b) { return b.handle; });
+  PointCloudRouting routing;
+  if(has_points_input)
+    routing = extractPointCloud(inputs.points.mesh);
+  void* points_primary
+      = has_points_input && !inputs.points.mesh.buffers.empty()
+          ? inputs.points.mesh.buffers[0].handle
+          : nullptr;
+  const int effective_count
+      = routing.instance_count > 0 ? routing.instance_count
+                                   : inputs.count.value;
+
+  // TRS recomputed; we reuse computeTRSMatrix from TransformHelper
+  // even though we're not targeting a halp::mesh — the cache keeps the
+  // update hooks simple.
+  float scratch[16];
+  CachedTRS xformCache = m_cachedTRS;
+  computeTRSMatrix(inputs, scratch, xformCache);
+  m_cachedTRS = xformCache;
+  m_cached_in_state = in_state;
+  m_cached_transforms = inputs.transforms.buffer.handle;
+  m_cached_colors = inputs.colors.buffer.handle;
+  m_cached_custom = inputs.custom.buffer.handle;
+  m_cached_count = effective_count;
+  m_cached_format = inputs.format.value;
+  m_cached_points_buf = points_primary;
+  m_cached_points_vertices = inputs.points.mesh.vertices;
+
+  if(!proto)
+  {
+    // No prototype mesh → empty output (but leave the inputs wired,
+    // so when a mesh appears later we pick it up on the next call).
+    if(!m_wrapped_state)
+      m_wrapped_state = std::make_shared<ossia::scene_state>();
+    m_wrapped_state->roots.reset();
+    m_wrapped_state->materials.reset();
+    m_wrapped_state->version = ++m_version_counter;
+    m_wrapped_state->dirty_index = m_version_counter;
+    m_pending_dirty = 0xFF;
+    return;
+  }
+
+  // Build the instance_component.
+  // Transforms + colors: if a Points input is wired, prefer its
+  // attributes (transform_matrix / translation / color0). Otherwise
+  // fall back to the raw buffer inlets.
+  auto inst = std::make_shared<ossia::instance_component>();
+  inst->prototype = proto;
+  inst->instance_count
+      = effective_count > 0 ? uint32_t(effective_count) : 0u;
+  inst->instance_transforms
+      = routing.transforms
+            ? routing.transforms
+            : wrapGpuBuffer(inputs.transforms.buffer);
+  inst->instance_colors
+      = routing.colors
+            ? routing.colors
+            : wrapGpuBuffer(inputs.colors.buffer);
+  inst->instance_custom = wrapGpuBuffer(inputs.custom.buffer);
+
+  // Transform format: if the Points input provided a transform_matrix
+  // attribute, force Mat4. Else if it provided translation/position,
+  // force Translation. Else obey the user's combobox.
+  if(routing.has_matrix)
+  {
+    inst->transform_type
+        = ossia::instance_component::transform_format::mat4;
+  }
+  else if(routing.transforms)
+  {
+    inst->transform_type
+        = ossia::instance_component::transform_format::translation;
+  }
+  else
+  {
+    switch(inputs.format.value)
+    {
+      case TRS:
+        inst->transform_type = ossia::instance_component::transform_format::trs;
+        break;
+      case Translation:
+        inst->transform_type
+            = ossia::instance_component::transform_format::translation;
+        break;
+      default:
+        inst->transform_type
+            = ossia::instance_component::transform_format::mat4;
+        break;
+    }
+  }
+  inst->dirty_index = ++m_version_counter;
+
+  // Wrap into a scene_node:
+  //   child 0: local-controls scene_transform (Instancer's position /
+  //            rotation / scale knobs). Updates parentWorld for every
+  //            sibling that follows.
+  //   child 1: prototype-ancestor scene_transform (the composed TRS
+  //            that findFirstMesh accumulated walking down to the
+  //            mesh upstream — e.g. the glTF root's scale, or a
+  //            Primitive's TRS if it stamped one). Decomposed back
+  //            into translation/quaternion/scale so the FlattenVisitor
+  //            sees a normal scene_transform; the matrix is converted
+  //            via Qt's decomposition on the off-chance the upstream
+  //            TRS includes shear (rare). When the matrix is identity
+  //            (no upstream transform), this is effectively a no-op
+  //            but is always emitted to keep the child layout stable
+  //            across rebuilds.
+  //   child 2: the instance_component payload.
+  ossia::scene_transform xform;
+  xform.translation[0] = inputs.position.value.x;
+  xform.translation[1] = inputs.position.value.y;
+  xform.translation[2] = inputs.position.value.z;
+  auto q = QQuaternion::fromEulerAngles(
+      inputs.rotation.value.x, inputs.rotation.value.y,
+      inputs.rotation.value.z);
+  xform.rotation[0] = q.x();
+  xform.rotation[1] = q.y();
+  xform.rotation[2] = q.z();
+  xform.rotation[3] = q.scalar();
+  xform.scale[0] = inputs.scale.value.x;
+  xform.scale[1] = inputs.scale.value.y;
+  xform.scale[2] = inputs.scale.value.z;
+  xform.raw_slot = m_xform_ref;
+
+  // Decompose the prototype-ancestor TRS. QMatrix4x4 doesn't expose a
+  // single TRS-decomposition call so we pull the columns: column 3 is
+  // the translation; the upper-left 3×3's column lengths give scale;
+  // the rotation matrix is the upper-left 3×3 with each column
+  // normalised. Skips reconstruction (leaves identity defaults) when
+  // protoWorld is the identity.
+  ossia::scene_transform protoXform;
+  protoXform.translation[0] = 0.f;
+  protoXform.translation[1] = 0.f;
+  protoXform.translation[2] = 0.f;
+  protoXform.rotation[0] = 0.f;
+  protoXform.rotation[1] = 0.f;
+  protoXform.rotation[2] = 0.f;
+  protoXform.rotation[3] = 1.f;
+  protoXform.scale[0] = 1.f;
+  protoXform.scale[1] = 1.f;
+  protoXform.scale[2] = 1.f;
+  if(!protoWorld.isIdentity())
+  {
+    const float* d = protoWorld.constData();
+    protoXform.translation[0] = d[12];
+    protoXform.translation[1] = d[13];
+    protoXform.translation[2] = d[14];
+    QVector3D c0(d[0], d[1], d[2]);
+    QVector3D c1(d[4], d[5], d[6]);
+    QVector3D c2(d[8], d[9], d[10]);
+    protoXform.scale[0] = c0.length();
+    protoXform.scale[1] = c1.length();
+    protoXform.scale[2] = c2.length();
+    if(protoXform.scale[0] > 1e-6f) c0 /= protoXform.scale[0];
+    if(protoXform.scale[1] > 1e-6f) c1 /= protoXform.scale[1];
+    if(protoXform.scale[2] > 1e-6f) c2 /= protoXform.scale[2];
+    QMatrix3x3 rotmat;
+    rotmat(0,0)=c0.x(); rotmat(1,0)=c0.y(); rotmat(2,0)=c0.z();
+    rotmat(0,1)=c1.x(); rotmat(1,1)=c1.y(); rotmat(2,1)=c1.z();
+    rotmat(0,2)=c2.x(); rotmat(1,2)=c2.y(); rotmat(2,2)=c2.z();
+    QQuaternion pq = QQuaternion::fromRotationMatrix(rotmat);
+    protoXform.rotation[0] = pq.x();
+    protoXform.rotation[1] = pq.y();
+    protoXform.rotation[2] = pq.z();
+    protoXform.rotation[3] = pq.scalar();
+  }
+  // raw_slot stays default (invalid) — this is a synthesized child and
+  // doesn't need a registry slot. The FlattenVisitor's scene_transform
+  // branch composes regardless of slot validity.
+
+  auto children = std::make_shared<std::vector<ossia::scene_payload>>();
+  children->push_back(xform);
+  children->push_back(protoXform);
+  children->push_back(ossia::instance_component_ptr(std::move(inst)));
+
+  auto node = std::make_shared<ossia::scene_node>();
+  node->children = std::move(children);
+  node->dirty_index = m_version_counter;
+
+  auto roots = std::make_shared<std::vector<ossia::scene_node_ptr>>();
+  roots->push_back(std::move(node));
+
+  if(!m_wrapped_state)
+    m_wrapped_state = std::make_shared<ossia::scene_state>();
+  m_wrapped_state->roots = std::move(roots);
+  // Pass through materials / animations / cameras / env from the
+  // input so PBR shaders still have their material table.
+  if(in.state)
+  {
+    m_wrapped_state->materials = in.state->materials;
+    m_wrapped_state->animations = in.state->animations;
+    m_wrapped_state->cameras = in.state->cameras;
+    m_wrapped_state->skeletons = in.state->skeletons;
+    m_wrapped_state->environment = in.state->environment;
+    m_wrapped_state->active_camera_id = in.state->active_camera_id;
+  }
+  m_wrapped_state->version = m_version_counter;
+  m_wrapped_state->dirty_index = m_version_counter;
+  m_pending_dirty = 0xFF;
+}
+
+void Instancer::operator()()
+{
+  // Upstream scene_state / buffer-handle / point-cloud dirty flags can
+  // change without a port-update event — detect here and call
+  // rebuild(). Controls themselves trigger rebuild via update().
+  //
+  // The Points-input cache also has to compare the current vertex count
+  // and the primary buffer handle against the cached values written in
+  // rebuild() (m_cached_points_vertices / m_cached_points_buf). When an
+  // upstream CSF compute regenerates its point cloud with a different
+  // count (3500 → 4000) but reuses the same persistent QRhiBuffer, the
+  // dirty_mesh flag is NOT set (the buffer handle didn't change), and
+  // without these comparisons Instancer kept publishing the stale
+  // instance_count. Downstream ScenePreprocessor's update() then took
+  // its meshesUnchanged early-return; the persistent m_pendingGpuCopies
+  // queue kept firing the OLD count for the GPU translation/color copy,
+  // appearing as "instances frozen at the previous count, then snapping
+  // back at random intervals" whenever some unrelated rebuild kicked in.
+  const auto& in = inputs.scene_in.scene;
+  const ossia::scene_state* in_state = in.state.get();
+  void* points_primary
+      = !inputs.points.mesh.buffers.empty()
+            ? inputs.points.mesh.buffers[0].handle
+            : nullptr;
+  const bool upstream_changed
+      = m_cached_in_state != in_state
+        || m_cached_transforms != inputs.transforms.buffer.handle
+        || m_cached_colors != inputs.colors.buffer.handle
+        || m_cached_custom != inputs.custom.buffer.handle
+        || m_cached_points_buf != points_primary
+        || m_cached_points_vertices != inputs.points.mesh.vertices
+        || inputs.points.dirty_mesh;
+  if(!m_wrapped_state || upstream_changed)
+    rebuild();
+  outputs.scene_out.scene.state = m_wrapped_state;
+  outputs.scene_out.dirty = m_pending_dirty;
+  m_pending_dirty = 0;
+}
+
+void Instancer::init(
+    score::gfx::RenderList& r, QRhiResourceUpdateBatch& res)
+{
+  if(!raw_transform_slot.valid())
+  {
+    raw_transform_slot = r.registry().allocate(
+        score::gfx::GpuResourceRegistry::Arena::RawTransform,
+        sizeof(score::gfx::RawLocalTransform));
+    m_xform_ref = r.registry().toOssiaRef(raw_transform_slot);
+  }
+  if(raw_transform_slot.valid())
+  {
+    score::gfx::RawLocalTransform seed{};
+    r.registry().updateSlot(res, raw_transform_slot, &seed, sizeof(seed));
+  }
+}
+
+void Instancer::update(
+    score::gfx::RenderList& r, QRhiResourceUpdateBatch& res, score::gfx::Edge*)
+{
+  if(!raw_transform_slot.valid())
+    return;
+
+  score::gfx::RawLocalTransform xform{};
+  xform.translation[0] = inputs.position.value.x;
+  xform.translation[1] = inputs.position.value.y;
+  xform.translation[2] = inputs.position.value.z;
+  QQuaternion q = QQuaternion::fromEulerAngles(
+      inputs.rotation.value.x, inputs.rotation.value.y,
+      inputs.rotation.value.z);
+  xform.rotation[0] = q.x();
+  xform.rotation[1] = q.y();
+  xform.rotation[2] = q.z();
+  xform.rotation[3] = q.scalar();
+  xform.scale[0] = inputs.scale.value.x;
+  xform.scale[1] = inputs.scale.value.y;
+  xform.scale[2] = inputs.scale.value.z;
+  r.registry().updateSlot(res, raw_transform_slot, &xform, sizeof(xform));
+}
+
+void Instancer::release(score::gfx::RenderList& r)
+{
+  if(raw_transform_slot.valid())
+    r.registry().free(raw_transform_slot);
+  m_xform_ref = {};
+  // Producer-state-drift Option A — see Light::release.
+  m_wrapped_state.reset();
+}
+
+} // namespace Threedim
diff --git a/src/plugins/score-plugin-threedim/Threedim/Instancer.hpp b/src/plugins/score-plugin-threedim/Threedim/Instancer.hpp
new file mode 100644
index 0000000000..ce5f147366
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/Instancer.hpp
@@ -0,0 +1,169 @@
+#pragma once
+#include "TransformHelper.hpp"
+
+#include <Threedim/TinyObj.hpp>
+#include <halp/buffer.hpp>
+#include <halp/controls.hpp>
+#include <halp/geometry.hpp>
+#include <halp/meta.hpp>
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <Gfx/Graph/GpuResourceRegistry.hpp>
+
+#include <cstdint>
+#include <memory>
+
+class QRhiResourceUpdateBatch;
+
+namespace score::gfx
+{
+class RenderList;
+struct Edge;
+}
+
+namespace Threedim
+{
+
+// GPU-instancing authoring node. Takes a scene containing a mesh and a
+// GPU buffer of per-instance transforms (+ optional colors / custom),
+// emits a scene_spec wrapping an `instance_component` that
+// ScenePreprocessor forwards to downstream shaders as the standard
+// `instance_transforms` / `instance_colors` / `instance_custom`
+// auxiliary buffers.
+//
+// Consumer shaders (classic_pbr_mdi and friends) read the per-instance
+// attributes via the existing VERTEX_INPUTS location 3..5 convention
+// already in `GeometryToBufferStrategies.hpp`:
+//   location 3 = per-instance translation / rotation / transform_matrix
+//   location 4 = per-instance color0
+//   location 5 = per-instance (scale / custom)
+// No shader edits needed — the aux-buffer naming convention is the
+// same one MeshInstancer uses.
+//
+// Transform formats (packed floats per instance):
+//   mat4        : 16 floats (full 4×4 matrix, column-major)
+//   trs         : 10 floats (3 translation + 4 quaternion + 3 scale)
+//   translation : 3 floats  (position-only, rotation / scale = identity)
+class Instancer
+{
+public:
+  halp_meta(name, "Instancer")
+  halp_meta(category, "Visuals/3D/Scene")
+  halp_meta(c_name, "instancer")
+  halp_meta(authors, "ossia team")
+  halp_meta(
+      manual_url,
+      "https://ossia.io/score-docs/processes/instancer.html")
+  halp_meta(uuid, "5e8a2c7f-9b4d-4e3a-a1c6-2d7f0b3e8c4a")
+
+  enum TransformFormat
+  {
+    Mat4,
+    TRS,
+    Translation
+  };
+
+  struct ins
+  {
+    struct
+    {
+      halp_meta(name, "Scene In");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_in;
+
+    halp::gpu_buffer_input<"Transforms"> transforms;
+    halp::gpu_buffer_input<"Colors"> colors;
+    halp::gpu_buffer_input<"Custom"> custom;
+
+    // Optional point-cloud geometry input. When wired, its semantic
+    // attributes override the raw buffer inputs above:
+    //   translation / position → Transforms buffer (Translation mode)
+    //   transform_matrix       → Transforms buffer (Mat4 mode)
+    //   color0                 → Colors buffer
+    // The `count` inlet is overridden by the geometry's vertex_count
+    // when this is wired (so downstream doesn't need to track the
+    // point-cloud size manually). Lets shaderlib presets
+    // (RandomScatter, EmitFromMesh, CurlNoiseForce, NoiseField etc.)
+    // feed Instancer directly without a glue repack.
+    struct
+    {
+      halp_meta(name, "Points");
+      halp::dynamic_gpu_geometry mesh;
+      float transform[16]{};
+      bool dirty_mesh = false;
+      bool dirty_transform = false;
+    } points;
+
+    // Port-driven rebuild: scalar controls trigger Instancer::rebuild().
+    // Upstream scene_in / buffer handles are detected in operator()()
+    // because they can change without a port-update event.
+    struct : halp::combobox_t<"Format", TransformFormat>
+    {
+      struct range
+      {
+        std::string_view values[3]{"mat4", "trs", "translation"};
+        int init{0};
+      };
+      void update(Instancer& n) { n.rebuild(); }
+    } format;
+
+    struct : halp::spinbox_i32<"Count", halp::irange{1, 1000000, 1}>
+    { void update(Instancer& n) { n.rebuild(); } } count;
+
+    // Optional TRS applied to the prototype before instancing — lets
+    // the node place the instanced cloud without a separate
+    // Transform3D upstream.
+    struct : PositionControl
+    { void update(Instancer& n) { n.rebuild(); } } position;
+    struct : RotationControl
+    { void update(Instancer& n) { n.rebuild(); } } rotation;
+    struct : ScaleControl
+    { void update(Instancer& n) { n.rebuild(); } } scale;
+  } inputs;
+
+  struct outs
+  {
+    struct
+    {
+      halp_meta(name, "Scene Out");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_out;
+  } outputs;
+
+  void rebuild();
+  void operator()();
+
+  void init(score::gfx::RenderList& r, QRhiResourceUpdateBatch& res);
+  void update(
+      score::gfx::RenderList& r, QRhiResourceUpdateBatch& res,
+      score::gfx::Edge* e);
+  void release(score::gfx::RenderList& r);
+
+  // Cache so we republish a stable shared_ptr when inputs haven't
+  // changed — ScenePreprocessor's identity caches stay warm.
+  std::shared_ptr<ossia::scene_state> m_wrapped_state;
+  uint8_t m_pending_dirty{0xFF};
+  CachedTRS m_cachedTRS{};
+  // Track input identity to detect when a rebuild is needed without
+  // relying on buffer-contents equality.
+  const ossia::scene_state* m_cached_in_state{};
+  void* m_cached_transforms{};
+  void* m_cached_colors{};
+  void* m_cached_custom{};
+  int32_t m_cached_count{-1};
+  int m_cached_format{-1};
+  // For the point-cloud input: cache the primary-buffer identity so we
+  // detect upstream handle replacements without poking every buffer
+  // every frame.
+  void* m_cached_points_buf{};
+  int64_t m_cached_points_vertices{-1};
+  int64_t m_version_counter{0};
+
+  score::gfx::GpuResourceRegistry::Slot raw_transform_slot;
+  ossia::gpu_slot_ref m_xform_ref{};
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/InverseKinematics.hpp b/src/plugins/score-plugin-threedim/Threedim/InverseKinematics.hpp
new file mode 100644
index 0000000000..c76823ff4b
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/InverseKinematics.hpp
@@ -0,0 +1,313 @@
+#pragma once
+#include <halp/controls.hpp>
+#include <halp/meta.hpp>
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <QMatrix4x4>
+#include <QQuaternion>
+#include <QVector3D>
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+
+namespace Threedim
+{
+
+// Two-bone analytical IK solver operating on a scene_spec's skeleton.
+//
+// Given a 3-joint chain (root → mid → end), a target world-space position,
+// and a pole vector (to disambiguate the elbow plane), produces the joint
+// rotations that make the end effector reach — or as close as possible to —
+// the target. Law-of-cosines closed form, runs in ~50 floating-point ops,
+// no iteration.
+//
+// The solver reads the input skeleton's TRS, finds the named end joint,
+// walks two parents up to identify the chain, and emits a scene_spec with
+// ONLY the three joints' local rotations modified. The rest of the
+// skeleton and the mesh / material data pass through unchanged.
+//
+// This is the "reach for that door handle" IK — for full articulated
+// rigs with >2 bones, spine chains, or pole-axis constraints, chain a
+// sequence of these per limb, or write a FABRIK/CCD successor that
+// operates on N-joint chains. The interface is intentionally narrow so
+// swapping in more sophisticated solvers later doesn't break patches.
+//
+// Limitations:
+//   - no joint-limit / rotation-constraint support yet
+//   - no twist decomposition
+//   - chain must be a direct parent line in the skeleton; siblings / branches
+//     aren't supported
+//   - target-unreachable case: extends the chain fully toward the target
+//     (the natural "straight-arm stretch" behaviour).
+class InverseKinematics
+{
+public:
+  halp_meta(name, "Inverse Kinematics (2-bone)")
+  halp_meta(category, "Visuals/3D/Scene")
+  halp_meta(c_name, "inverse_kinematics")
+  halp_meta(authors, "ossia team")
+  halp_meta(
+      manual_url,
+      "https://ossia.io/score-docs/processes/inverse-kinematics.html")
+  halp_meta(uuid, "6e9f2a4c-1b85-4d3e-a7f6-8c2b4d5e9a0f")
+
+  struct ins
+  {
+    struct
+    {
+      halp_meta(name, "Scene In");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_in;
+
+    halp::lineedit<"End joint name", "hand_r"> end_joint;
+
+    halp::xyz_spinboxes_f32<
+        "Target",
+        halp::range{-10000., 10000., 0.}>
+        target;
+    halp::xyz_spinboxes_f32<
+        "Pole vector",
+        halp::range{-10000., 10000., 0.}>
+        pole;
+
+    halp::hslider_f32<"Weight", halp::range{0., 1., 1.}> weight;
+  } inputs;
+
+  struct outs
+  {
+    struct
+    {
+      halp_meta(name, "Scene Out");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_out;
+  } outputs;
+
+  std::shared_ptr<ossia::scene_state> m_state;
+  int64_t m_version{0};
+
+  static QVector3D toVec(const float v[3]) { return QVector3D(v[0], v[1], v[2]); }
+  static QQuaternion toQuat(const float v[4])
+  {
+    return QQuaternion(v[3], v[0], v[1], v[2]);
+  }
+  static void fromQuat(float v[4], const QQuaternion& q)
+  {
+    v[0] = q.x(); v[1] = q.y(); v[2] = q.z(); v[3] = q.scalar();
+  }
+
+  // Compute world-space position of joint `idx` by walking up the parent
+  // chain and composing TRS transforms.
+  static QVector3D worldJointPos(
+      const ossia::skeleton_component& skel, int32_t idx)
+  {
+    if(idx < 0 || idx >= (int32_t)skel.joints.size())
+      return QVector3D();
+
+    // Build a chain from root to idx, then compose forward.
+    ossia::small_vector<int32_t, 16> chain;
+    for(int32_t i = idx; i >= 0; i = skel.joints[i].parent_index)
+      chain.push_back(i);
+    std::reverse(chain.begin(), chain.end());
+
+    QMatrix4x4 M;
+    for(int32_t i : chain)
+    {
+      const auto& j = skel.joints[i];
+      QMatrix4x4 T;
+      T.translate(j.translation[0], j.translation[1], j.translation[2]);
+      T.rotate(QQuaternion(
+          j.rotation[3], j.rotation[0], j.rotation[1], j.rotation[2]));
+      T.scale(j.scale[0], j.scale[1], j.scale[2]);
+      M = M * T;
+    }
+    return M.map(QVector3D());
+  }
+
+  // 2-bone IK core: given three world positions + target + pole, compute
+  // the rotations (world-space) to apply at the root and mid joints so that
+  // end reaches the target. Returns the delta rotations as quaternions.
+  struct Solution
+  {
+    QQuaternion rootDelta;
+    QQuaternion midDelta;
+  };
+  static Solution solve2Bone(
+      QVector3D root, QVector3D mid, QVector3D end,
+      QVector3D target, QVector3D pole)
+  {
+    const float eps = 1e-6f;
+    QVector3D r2m = mid - root;
+    QVector3D m2e = end - mid;
+    QVector3D r2e = end - root;
+    QVector3D r2t = target - root;
+
+    const float lA   = r2m.length();
+    const float lB   = m2e.length();
+    const float lTgt = std::min(r2t.length(), lA + lB - eps);
+    if(lA < eps || lB < eps || lTgt < eps)
+      return {QQuaternion(), QQuaternion()};
+
+    // New elbow interior angle via law of cosines:
+    // cos(theta) = (lA² + lB² - lTgt²) / (2 lA lB)
+    const float cosNew = std::clamp(
+        (lA * lA + lB * lB - lTgt * lTgt) / (2.0f * lA * lB), -1.0f, 1.0f);
+    const float thetaNew = std::acos(cosNew);
+
+    // Current elbow interior angle.
+    const float cosCur = std::clamp(
+        QVector3D::dotProduct(-r2m.normalized(), m2e.normalized()),
+        -1.0f, 1.0f);
+    const float thetaCur = std::acos(cosCur);
+
+    // Rotation axis for the elbow: perpendicular to the current arm plane,
+    // oriented by the pole vector so we pick the "elbow side".
+    QVector3D planeNormal = QVector3D::crossProduct(r2m, m2e);
+    if(planeNormal.lengthSquared() < eps)
+    {
+      // Arm is straight → use pole vector's projected perpendicular.
+      QVector3D poleDir = (pole - root).normalized();
+      planeNormal = QVector3D::crossProduct(r2e.normalized(), poleDir);
+      if(planeNormal.lengthSquared() < eps)
+        planeNormal = QVector3D(0, 1, 0);
+    }
+    planeNormal.normalize();
+
+    QQuaternion elbowDelta = QQuaternion::fromAxisAndAngle(
+        planeNormal, (thetaCur - thetaNew) * 180.0f / float(M_PI));
+
+    // Rotate the shoulder so the new r2m points toward target minus the
+    // elbow contribution.
+    QVector3D r2t_n = r2t.normalized();
+    QVector3D r2e_n = r2e.normalized();
+    QQuaternion rootDelta = QQuaternion::rotationTo(r2e_n, r2t_n);
+
+    return {rootDelta, elbowDelta};
+  }
+
+  void operator()()
+  {
+    const auto& in = inputs.scene_in.scene;
+    if(!in.state || !in.state->roots)
+    {
+      outputs.scene_out.scene.state.reset();
+      outputs.scene_out.dirty = 0;
+      return;
+    }
+
+    // Find the skeleton: first skeleton_component referenced by any mesh.
+    const ossia::skeleton_component* srcSkel = nullptr;
+    if(in.state->skeletons && !in.state->skeletons->empty())
+      srcSkel = (*in.state->skeletons)[0].get();
+    if(!srcSkel || srcSkel->joints.empty())
+    {
+      outputs.scene_out.scene = in; // passthrough
+      outputs.scene_out.dirty = 0;
+      return;
+    }
+
+    const std::string endName = inputs.end_joint.value;
+    int32_t endIdx = srcSkel->find_joint(endName);
+    if(endIdx < 0 || srcSkel->joints[endIdx].parent_index < 0)
+    {
+      outputs.scene_out.scene = in;
+      outputs.scene_out.dirty = 0;
+      return;
+    }
+    const int32_t midIdx  = srcSkel->joints[endIdx].parent_index;
+    if(srcSkel->joints[midIdx].parent_index < 0)
+    {
+      outputs.scene_out.scene = in;
+      outputs.scene_out.dirty = 0;
+      return;
+    }
+    const int32_t rootIdx = srcSkel->joints[midIdx].parent_index;
+
+    // Current world-space joint positions.
+    QVector3D wRoot = worldJointPos(*srcSkel, rootIdx);
+    QVector3D wMid  = worldJointPos(*srcSkel, midIdx);
+    QVector3D wEnd  = worldJointPos(*srcSkel, endIdx);
+
+    QVector3D target(
+        inputs.target.value.x, inputs.target.value.y, inputs.target.value.z);
+    QVector3D pole(
+        inputs.pole.value.x, inputs.pole.value.y, inputs.pole.value.z);
+
+    Solution sol = solve2Bone(wRoot, wMid, wEnd, target, pole);
+
+    // Blend by weight. At weight=0 the output scene is the input unchanged.
+    const float w = std::clamp(inputs.weight.value, 0.0f, 1.0f);
+    if(w <= 0.0f)
+    {
+      outputs.scene_out.scene = in;
+      outputs.scene_out.dirty = 0;
+      return;
+    }
+    QQuaternion rootDelta = QQuaternion::slerp(QQuaternion(), sol.rootDelta, w);
+    QQuaternion midDelta  = QQuaternion::slerp(QQuaternion(), sol.midDelta,  w);
+
+    // Copy the skeleton and mutate the two rotations. Keep other joints
+    // untouched so downstream animation / rendering sees a minimal diff.
+    auto newSkel = std::make_shared<ossia::skeleton_component>(*srcSkel);
+
+    // These deltas are in world space. Translate to local (parent-relative)
+    // rotation by undoing the parent's accumulated rotation.
+    auto worldRotOf = [&](int32_t idx) {
+      QQuaternion q;
+      for(int32_t i = idx; i >= 0; i = srcSkel->joints[i].parent_index)
+      {
+        QQuaternion local(
+            srcSkel->joints[i].rotation[3],
+            srcSkel->joints[i].rotation[0],
+            srcSkel->joints[i].rotation[1],
+            srcSkel->joints[i].rotation[2]);
+        q = local * q;
+      }
+      return q;
+    };
+    QQuaternion parentRoot = srcSkel->joints[rootIdx].parent_index >= 0
+        ? worldRotOf(srcSkel->joints[rootIdx].parent_index)
+        : QQuaternion();
+    QQuaternion parentMid  = worldRotOf(rootIdx);
+
+    QQuaternion rootLocalNew
+        = parentRoot.inverted() * rootDelta * parentRoot
+        * toQuat(srcSkel->joints[rootIdx].rotation);
+    QQuaternion midLocalNew
+        = parentMid.inverted() * midDelta * parentMid
+        * toQuat(srcSkel->joints[midIdx].rotation);
+
+    fromQuat(newSkel->joints[rootIdx].rotation, rootLocalNew);
+    fromQuat(newSkel->joints[midIdx].rotation,  midLocalNew);
+    newSkel->dirty_index++;
+
+    // Build the output scene_state — shallow copy of input, swap the
+    // skeletons vector to contain our mutated skeleton.
+    if(!m_state || m_state->version != in.state->version - 1)
+      m_state = std::make_shared<ossia::scene_state>(*in.state);
+    else
+      *m_state = *in.state;
+
+    auto skels = std::make_shared<std::vector<ossia::skeleton_component_ptr>>();
+    if(in.state->skeletons)
+      *skels = *in.state->skeletons;
+    if(skels->empty())
+      skels->push_back(newSkel);
+    else
+      (*skels)[0] = newSkel;
+    m_state->skeletons = std::move(skels);
+    m_version++;
+    m_state->version = m_version;
+
+    outputs.scene_out.scene.state = m_state;
+    outputs.scene_out.dirty = ossia::scene_port::dirty_transform;
+  }
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/Light.cpp b/src/plugins/score-plugin-threedim/Threedim/Light.cpp
new file mode 100644
index 0000000000..a7982fc79c
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/Light.cpp
@@ -0,0 +1,278 @@
+#include "Light.hpp"
+
+#include <Gfx/Graph/RenderList.hpp>
+#include <Gfx/Graph/SceneGPUState.hpp>
+
+#include <cmath>
+#include <cstring>
+
+namespace Threedim
+{
+
+namespace
+{
+inline ossia::light_type toLightType(Light::Mode m) noexcept
+{
+  switch(m)
+  {
+    case Light::Directional: return ossia::light_type::directional;
+    case Light::Point:       return ossia::light_type::point;
+    case Light::Spot:        return ossia::light_type::spot;
+    case Light::Rect:        return ossia::light_type::rect_area;
+    case Light::Disk:        return ossia::light_type::disk_area;
+    case Light::Sphere:      return ossia::light_type::sphere_area;
+    case Light::Dome:        return ossia::light_type::dome;
+  }
+  return ossia::light_type::point;
+}
+
+inline ossia::light_decay toLightDecay(Light::Decay d) noexcept
+{
+  switch(d)
+  {
+    case Light::DecayNone:      return ossia::light_decay::none;
+    case Light::DecayLinear:    return ossia::light_decay::linear;
+    case Light::DecayQuadratic: return ossia::light_decay::quadratic;
+    case Light::DecayCubic:     return ossia::light_decay::cubic;
+  }
+  return ossia::light_decay::quadratic;
+}
+}
+
+void Light::rebuild()
+{
+  if(!m_state)
+    m_state = std::make_shared<ossia::scene_state>();
+  if(m_light_stable_id == 0)
+    m_light_stable_id = ossia::mint_stable_id();
+  if(m_xform_stable_id == 0)
+    m_xform_stable_id = ossia::mint_stable_id();
+
+  auto lc = std::make_shared<ossia::light_component>();
+  lc->stable_id = m_light_stable_id;
+  lc->type = toLightType(Mode(inputs.mode.value));
+  lc->decay = toLightDecay(Decay(inputs.decay.value));
+
+  lc->color[0] = inputs.color.value.r;
+  lc->color[1] = inputs.color.value.g;
+  lc->color[2] = inputs.color.value.b;
+  lc->intensity = inputs.intensity.value;
+  lc->range = inputs.range.value;
+
+  // Degrees → radians for cone angles.
+  constexpr float deg2rad = float(M_PI) / 180.f;
+  lc->inner_cone_angle = inputs.inner_cone.value * deg2rad;
+  lc->outer_cone_angle = inputs.outer_cone.value * deg2rad;
+
+  // Area-shape dimensions: Rect uses width+height, Disk/Sphere use
+  // radius. The fields are unused for Directional/Point/Spot but
+  // setting them anyway is harmless.
+  lc->width = inputs.width.value;
+  lc->height = inputs.height.value;
+  lc->radius = inputs.radius.value;
+
+  lc->shadow.enabled = inputs.cast_shadow.value;
+  lc->shadow.bias = inputs.shadow_bias.value;
+  lc->shadow.normal_bias = inputs.shadow_normal_bias.value;
+
+  // Propagate the RawLight arena slot ref (populated in init()).
+  lc->raw_slot = m_light_ref;
+
+  lc->dirty_index = ++m_version;
+
+  // Standard wrapping: a scene_node holding [scene_transform,
+  // light_component]. The transform encodes the light's world position
+  // + orientation; FlattenVisitor pushes that through parentWorld when
+  // visiting this node, so the light's direction column ends up
+  // correctly oriented in world space even when the node is placed
+  // under a parent transform chain.
+  ossia::scene_transform xform;
+  xform.stable_id = m_xform_stable_id;
+  xform.translation[0] = inputs.position.value.x;
+  xform.translation[1] = inputs.position.value.y;
+  xform.translation[2] = inputs.position.value.z;
+
+  QQuaternion q = QQuaternion::fromEulerAngles(
+      inputs.rotation.value.x,
+      inputs.rotation.value.y,
+      inputs.rotation.value.z);
+
+  // Directional / spot / area-light direction is determined by the
+  // node's rotation applied to -Z (Vulkan / glTF convention). When
+  // the rotation is identity, the light points along -Z.
+  xform.rotation[0] = q.x();
+  xform.rotation[1] = q.y();
+  xform.rotation[2] = q.z();
+  xform.rotation[3] = q.scalar();
+  xform.scale[0] = xform.scale[1] = xform.scale[2] = 1.f;
+  // Propagate the RawTransform slot ref (populated in init()).
+  xform.raw_slot = m_xform_ref;
+
+  auto children = std::make_shared<std::vector<ossia::scene_payload>>();
+  children->push_back(xform);
+  children->push_back(ossia::light_component_ptr(std::move(lc)));
+
+  auto node = std::make_shared<ossia::scene_node>();
+  node->children = std::move(children);
+
+  auto roots = std::make_shared<std::vector<ossia::scene_node_ptr>>();
+  roots->push_back(std::move(node));
+
+  m_state->roots = std::move(roots);
+  m_state->version = m_version;
+  m_pending_dirty = ossia::scene_port::dirty_lights;
+}
+
+void Light::operator()()
+{
+  if(!m_state)
+    rebuild();
+  outputs.scene_out.scene.state = m_state;
+  outputs.scene_out.dirty = m_pending_dirty;
+  m_pending_dirty = 0;
+}
+
+namespace
+{
+// Mode → raw type encoding used by RawLightData::local_direction.w and
+// LightGPU::position_type.w. Area / dome modes collapse onto punctual
+// analogues for the raw arena (directional for dome, point for rect /
+// disk / sphere) — area-light shading is a shader-side extension
+// scheduled after the preprocessor consumes the raw slots.
+inline float toRawLightType(Light::Mode m) noexcept
+{
+  switch(m)
+  {
+    case Light::Directional: return 0.f;
+    case Light::Point:       return 1.f;
+    case Light::Spot:        return 2.f;
+    case Light::Rect:
+    case Light::Disk:
+    case Light::Sphere:      return 1.f;
+    case Light::Dome:        return 0.f;
+  }
+  return 1.f;
+}
+
+inline uint32_t toRawLightDecay(Light::Decay d) noexcept
+{
+  return (uint32_t)d;
+}
+}
+
+// Order invariant: called by GfxRenderer::initState BEFORE the first
+// operator()() and BEFORE processControlIn fires any rebuild() callback.
+// m_light_ref / m_xform_ref populated here are therefore safe to read
+// in rebuild() without a guard. Adding prepare() to this node breaks the
+// invariant — see CpuFilterNode.hpp for details.
+void Light::init(score::gfx::RenderList& r, QRhiResourceUpdateBatch& res)
+{
+  if(!raw_light_slot.valid())
+  {
+    raw_light_slot = r.registry().allocate(
+        score::gfx::GpuResourceRegistry::Arena::RawLight,
+        sizeof(score::gfx::RawLightData));
+    m_light_ref = r.registry().toOssiaRef(raw_light_slot);
+  }
+  if(raw_light_slot.valid())
+  {
+    score::gfx::RawLightData seed{};
+    r.registry().updateSlot(res, raw_light_slot, &seed, sizeof(seed));
+  }
+  if(!raw_transform_slot.valid())
+  {
+    raw_transform_slot = r.registry().allocate(
+        score::gfx::GpuResourceRegistry::Arena::RawTransform,
+        sizeof(score::gfx::RawLocalTransform));
+    m_xform_ref = r.registry().toOssiaRef(raw_transform_slot);
+  }
+  if(raw_transform_slot.valid())
+  {
+    score::gfx::RawLocalTransform seed{};
+    r.registry().updateSlot(res, raw_transform_slot, &seed, sizeof(seed));
+  }
+}
+
+void Light::update(
+    score::gfx::RenderList& r, QRhiResourceUpdateBatch& res, score::gfx::Edge*)
+{
+  if(!raw_light_slot.valid())
+    return;
+
+  score::gfx::RawLightData raw{};
+  raw.color[0] = inputs.color.value.r;
+  raw.color[1] = inputs.color.value.g;
+  raw.color[2] = inputs.color.value.b;
+  raw.color[3] = inputs.intensity.value;
+
+  // Light convention: local -Z is the configured direction. The
+  // preprocessor's world-matrix pass maps that through the node's
+  // parent chain + rotation to get the world-space direction used
+  // by the consumer shader. Keep the canonical local vector here.
+  raw.local_direction[0] = 0.f;
+  raw.local_direction[1] = 0.f;
+  raw.local_direction[2] = -1.f;
+  raw.local_direction[3] = toRawLightType(Mode(inputs.mode.value));
+
+  constexpr float deg2rad = float(M_PI) / 180.f;
+  const float inner_rad = inputs.inner_cone.value * deg2rad;
+  const float outer_rad = inputs.outer_cone.value * deg2rad;
+
+  raw.range_cone[0] = inputs.range.value;
+  raw.range_cone[1] = std::cos(inner_rad);
+  raw.range_cone[2] = std::cos(outer_rad);
+  raw.range_cone[3] = inputs.shadow_bias.value;
+
+  raw.shadow_enabled = inputs.cast_shadow.value ? 1u : 0u;
+  raw.decay_mode = toRawLightDecay(Decay(inputs.decay.value));
+  raw.normal_bias = inputs.shadow_normal_bias.value;
+  // Stamp our scene_transform's arena slot index so consumer shaders
+  // can read world_transforms.data[transform_slot] to compose a world-
+  // space direction/position from the local-frame fields above.
+  raw.transform_slot = raw_transform_slot.valid()
+                           ? raw_transform_slot.slot_index
+                           : 0u;
+
+  r.registry().updateSlot(res, raw_light_slot, &raw, sizeof(raw));
+
+  if(raw_transform_slot.valid())
+  {
+    score::gfx::RawLocalTransform xform{};
+    xform.translation[0] = inputs.position.value.x;
+    xform.translation[1] = inputs.position.value.y;
+    xform.translation[2] = inputs.position.value.z;
+    QQuaternion q = QQuaternion::fromEulerAngles(
+        inputs.rotation.value.x, inputs.rotation.value.y,
+        inputs.rotation.value.z);
+    xform.rotation[0] = q.x();
+    xform.rotation[1] = q.y();
+    xform.rotation[2] = q.z();
+    xform.rotation[3] = q.scalar();
+    xform.scale[0] = 1.f;
+    xform.scale[1] = 1.f;
+    xform.scale[2] = 1.f;
+    r.registry().updateSlot(res, raw_transform_slot, &xform, sizeof(xform));
+  }
+}
+
+void Light::release(score::gfx::RenderList& r)
+{
+  if(raw_light_slot.valid())
+    r.registry().free(raw_light_slot);
+  if(raw_transform_slot.valid())
+    r.registry().free(raw_transform_slot);
+  m_light_ref = {};
+  m_xform_ref = {};
+  // Clear the cached scene_state shared_ptr so the next operator()()
+  // re-runs rebuild() against the post-release registry. Without this,
+  // an in-place release+init path (relinkGraph / maybeRebuild) would
+  // republish a state whose lc->raw_slot still embeds the OLD
+  // (now-freed) slot index. ScenePreprocessor then harvests that
+  // stale index into scene_light_indices, the rasterizer reads from
+  // a different slot than the one Light::update() is now writing
+  // to → wildly wrong lighting that drifts each cycle as the LIFO
+  // free-list reshuffles. Producer-state-drift Option A.
+  m_state.reset();
+}
+
+} // namespace Threedim
diff --git a/src/plugins/score-plugin-threedim/Threedim/Light.hpp b/src/plugins/score-plugin-threedim/Threedim/Light.hpp
new file mode 100644
index 0000000000..c874affb67
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/Light.hpp
@@ -0,0 +1,188 @@
+#pragma once
+#include <Threedim/TinyObj.hpp>
+#include <halp/controls.hpp>
+#include <halp/meta.hpp>
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <Gfx/Graph/GpuResourceRegistry.hpp>
+
+#include <QMatrix4x4>
+#include <QQuaternion>
+#include <QVector3D>
+
+#include <cstdint>
+#include <memory>
+
+class QRhiResourceUpdateBatch;
+
+namespace score::gfx
+{
+class RenderList;
+struct Edge;
+}
+
+namespace Threedim
+{
+
+// Unified light producer. One node with a mode combobox covers every
+// punctual / area light type ossia::light_component defines —
+// directional, point, spot, rect, disk, sphere, cylinder, dome —
+// mirroring UsdLux's RectLight/DiskLight/SphereLight and glTF
+// KHR_lights_punctual.
+//
+// Emits an ossia::scene_spec containing one scene_node with:
+//   - child[0] = scene_transform (position + rotation, no scale)
+//   - child[1] = light_component_ptr
+// ScenePreprocessor packs it into the scene-wide `scene_lights` SSBO via
+// packLight(). Current consumer shaders (`classic_pbr_*.frag`) only
+// sample the common fields (position/direction/color/intensity/range +
+// spot cone angles) — area-light shapes pass through correctly but
+// are rendered as point-light approximations until shaders add the
+// Rect/Disk/Sphere sampling math. That's a shader-side follow-up.
+class Light
+{
+public:
+  halp_meta(name, "Light")
+  halp_meta(category, "Visuals/3D/Scene")
+  halp_meta(c_name, "light")
+  halp_meta(authors, "ossia team")
+  halp_meta(
+      manual_url,
+      "https://ossia.io/score-docs/processes/light.html")
+  halp_meta(uuid, "9f3c1a5e-4b7d-4e2a-8c5f-1d6e0b9a3c7f")
+
+  enum Mode
+  {
+    Directional,
+    Point,
+    Spot,
+    Rect,
+    Disk,
+    Sphere,
+    Dome
+  };
+
+  enum Decay
+  {
+    DecayNone,
+    DecayLinear,
+    DecayQuadratic,  // physically correct
+    DecayCubic
+  };
+
+  struct ins
+  {
+    // Port-driven rebuild: each control's update() callback triggers
+    // Light::rebuild() on user change. operator()() just republishes.
+    struct : halp::combobox_t<"Mode", Mode>
+    {
+      struct range
+      {
+        std::string_view values[7]{
+            "Directional", "Point", "Spot",
+            "Rect", "Disk", "Sphere", "Dome"};
+        int init{0};
+      };
+      void update(Light& n) { n.rebuild(); }
+    } mode;
+
+    // Common — always applies
+    struct : halp::color_chooser<"Color">
+    { void update(Light& n) { n.rebuild(); } } color;
+    struct : halp::hslider_f32<"Intensity", halp::range{0., 100., 1.}>
+    { void update(Light& n) { n.rebuild(); } } intensity;
+    // range=0 → infinite falloff (directional / dome ignore this field)
+    struct : halp::hslider_f32<"Range", halp::range{0., 1000., 0.}>
+    { void update(Light& n) { n.rebuild(); } } range;
+
+    struct : halp::combobox_t<"Falloff", Decay>
+    {
+      struct range
+      {
+        std::string_view values[4]{
+            "None", "Linear", "Quadratic (physical)", "Cubic"};
+        int init{2};
+      };
+      void update(Light& n) { n.rebuild(); }
+    } decay;
+
+    // Spot cone (radians via hsliders taking degrees; converted in cpp)
+    struct : halp::hslider_f32<"Inner cone °", halp::range{0., 90., 0.}>
+    { void update(Light& n) { n.rebuild(); } } inner_cone;
+    struct : halp::hslider_f32<"Outer cone °", halp::range{0., 90., 45.}>
+    { void update(Light& n) { n.rebuild(); } } outer_cone;
+
+    // Area shapes
+    struct : halp::hslider_f32<"Width",  halp::range{0.01, 100., 1.}>
+    { void update(Light& n) { n.rebuild(); } } width;
+    struct : halp::hslider_f32<"Height", halp::range{0.01, 100., 1.}>
+    { void update(Light& n) { n.rebuild(); } } height;
+    struct : halp::hslider_f32<"Radius", halp::range{0.01, 100., 0.5}>
+    { void update(Light& n) { n.rebuild(); } } radius;
+
+    // Shadow settings
+    struct : halp::toggle<"Cast shadow">
+    { void update(Light& n) { n.rebuild(); } } cast_shadow;
+    struct : halp::hslider_f32<"Shadow bias", halp::range{0., 0.1, 0.001}>
+    { void update(Light& n) { n.rebuild(); } } shadow_bias;
+    struct : halp::hslider_f32<"Shadow normal bias", halp::range{0., 0.1, 0.01}>
+    { void update(Light& n) { n.rebuild(); } } shadow_normal_bias;
+
+    // Transform: position for positional lights, rotation encodes the
+    // direction used by Directional / Spot (local -Z mapped to the
+    // light direction, glTF / Vulkan convention).
+    struct : PositionControl
+    { void update(Light& n) { n.rebuild(); } } position;
+    struct : RotationControl
+    { void update(Light& n) { n.rebuild(); } } rotation;
+  } inputs;
+
+  struct outs
+  {
+    struct
+    {
+      halp_meta(name, "Scene");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_out;
+  } outputs;
+
+  // Built once from control values whenever a port's update() fires.
+  // operator()() just republishes m_state.
+  void rebuild();
+  void operator()();
+
+  // Render-thread hooks. init claims one RawLight slot; update packs
+  // color / intensity / type / local-direction / range / cone angles /
+  // decay / shadow into a RawLightData and uploads; release returns
+  // the slot. Final world-direction composition happens inside the
+  // preprocessor (parent-chain world matrix), so this slot carries
+  // only the node-local fields.
+  void init(score::gfx::RenderList& r, QRhiResourceUpdateBatch& res);
+  void update(
+      score::gfx::RenderList& r, QRhiResourceUpdateBatch& res,
+      score::gfx::Edge* e);
+  void release(score::gfx::RenderList& r);
+
+  std::shared_ptr<ossia::scene_state> m_state;
+  int64_t m_version{0};
+  uint8_t m_pending_dirty{ossia::scene_port::dirty_lights};
+  // Stable id for the single light_component this node emits. Minted
+  // lazily on first rebuild() and reused across all subsequent rebuilds
+  // so downstream caches (preprocessor fingerprint, SER coherence key)
+  // stay keyed on identity, not pointer.
+  uint64_t m_light_stable_id{};
+  uint64_t m_xform_stable_id{};
+
+  score::gfx::GpuResourceRegistry::Slot raw_light_slot;
+  score::gfx::GpuResourceRegistry::Slot raw_transform_slot;
+
+  // Ossia-facing snapshots. Written once in init() on the render
+  // thread; copied onto each emitted light_component / scene_transform
+  // raw_slot in operator()() on the execution thread.
+  ossia::gpu_slot_ref m_light_ref{};
+  ossia::gpu_slot_ref m_xform_ref{};
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/MaterialOverride.cpp b/src/plugins/score-plugin-threedim/Threedim/MaterialOverride.cpp
new file mode 100644
index 0000000000..3c88f07955
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/MaterialOverride.cpp
@@ -0,0 +1,219 @@
+#include "MaterialOverride.hpp"
+
+#include <algorithm>
+
+namespace Threedim
+{
+
+namespace
+{
+
+// Copy a gpu texture handle from halp into an ossia texture_ref.
+// We only populate the `texture` field — `source` stays null so the
+// ScenePreprocessor's channelDynamicHandle() treats this ref as DYNAMIC.
+// Sampler state is left at its default (linear/linear/repeat); can be
+// exposed as controls later if needed.
+void applyTextureOverride(
+    ossia::texture_ref& dst, const halp::gpu_texture& src) noexcept
+{
+  dst.source.reset();
+  dst.texture.native_handle = src.handle;
+  dst.texture.bindless_index = 0;
+  // sampler stays default
+}
+
+// Decide whether a given material-index should receive overrides, given
+// the mode and index inputs.
+bool shouldOverride(int idx, int mode, int override_index) noexcept
+{
+  switch(mode)
+  {
+    case MaterialOverride::All:     return true;
+    case MaterialOverride::ByIndex: return idx == override_index;
+    default:                        return false;
+  }
+}
+
+} // namespace
+
+void MaterialOverride::rebuild()
+{
+  const auto& in = inputs.scene_in.scene;
+  const ossia::scene_state* in_state = in.state.get();
+  const int64_t in_version = in_state ? in_state->version : -1;
+
+  void* cur_tex[4]{
+      inputs.base_color_tex.texture.handle,
+      inputs.metal_rough_tex.texture.handle,
+      inputs.normal_tex.texture.handle,
+      inputs.emissive_tex.texture.handle};
+
+  // No texture overrides and no factor overrides → passthrough. Keeps
+  // downstream identity caches warm for the common "unconfigured" case.
+  const bool any_tex = cur_tex[0] || cur_tex[1] || cur_tex[2] || cur_tex[3];
+  const bool any_factor = inputs.use_base_color.value || inputs.use_metallic.value
+                          || inputs.use_roughness.value
+                          || inputs.use_emissive.value;
+  if(!any_tex && !any_factor)
+  {
+    m_cached_out = in.state;
+    m_pending_dirty = 0xFF;
+    return;
+  }
+
+  const float cur_base[4]{
+      inputs.base_r.value, inputs.base_g.value,
+      inputs.base_b.value, inputs.base_a.value};
+  const float cur_em[4]{
+      inputs.em_r.value, inputs.em_g.value, inputs.em_b.value,
+      inputs.em_strength.value};
+
+  m_cached_in_state = in_state;
+  m_cached_in_version = in_version;
+  m_cached_mode = inputs.mode.value;
+  m_cached_index = inputs.index.value;
+  std::copy(cur_tex, cur_tex + 4, m_cached_tex);
+  m_cached_use_base = inputs.use_base_color.value;
+  m_cached_use_metallic = inputs.use_metallic.value;
+  m_cached_use_roughness = inputs.use_roughness.value;
+  m_cached_use_emissive = inputs.use_emissive.value;
+  std::copy(cur_base, cur_base + 4, m_cached_base);
+  m_cached_metallic = inputs.metallic.value;
+  m_cached_roughness = inputs.roughness.value;
+  std::copy(cur_em, cur_em + 4, m_cached_em);
+
+  if(!in_state || !in_state->materials || in_state->materials->empty())
+  {
+    m_cached_out = in.state;
+    m_pending_dirty = 0xFF;
+    return;
+  }
+
+  const auto& src_mats = *in_state->materials;
+  auto new_mats = std::make_shared<std::vector<ossia::material_component_ptr>>();
+  new_mats->reserve(src_mats.size());
+
+  // Track which source materials we clone this cycle so we can GC stale
+  // entries from m_clone_cache (freed when upstream shrinks or swaps).
+  ossia::hash_set<const ossia::material_component*> seen_src;
+  seen_src.reserve(src_mats.size());
+
+  for(std::size_t i = 0; i < src_mats.size(); ++i)
+  {
+    const auto& src_mat = src_mats[i];
+    if(!src_mat || !shouldOverride((int)i, inputs.mode.value, inputs.index.value))
+    {
+      new_mats->push_back(src_mat);
+      continue;
+    }
+    seen_src.insert(src_mat.get());
+
+    // Reuse the cached clone shared_ptr if we've cloned this source
+    // before — MUTATING its fields in place. The shared_ptr address
+    // stays stable across rebuilds, so the preprocessor's
+    // m_loaderMaterialSlots keeps the material arena slot allocated
+    // across frames: no per-frame GC + reallocate churn, Material arena
+    // content stays hot for SSBO-direct shader reads (task 28a).
+    // stable_id is inherited from the source via the copy — the
+    // fingerprint sees the override as the same logical material.
+    auto it = m_clone_cache.find(src_mat.get());
+    std::shared_ptr<ossia::material_component> cloned;
+    if(it != m_clone_cache.end())
+    {
+      // Reuse: start from the original upstream fields every rebuild to
+      // avoid accumulating stale override state (e.g. when the user
+      // toggles 'use_metallic' off, the factor must revert to
+      // upstream's).
+      cloned = it->second;
+      *cloned = *src_mat;
+    }
+    else
+    {
+      cloned = std::make_shared<ossia::material_component>(*src_mat);
+      m_clone_cache.emplace(src_mat.get(), cloned);
+    }
+
+    if(cur_tex[0])
+      applyTextureOverride(cloned->base_color_texture, inputs.base_color_tex.texture);
+    if(cur_tex[1])
+      applyTextureOverride(
+          cloned->metallic_roughness_texture, inputs.metal_rough_tex.texture);
+    if(cur_tex[2])
+      applyTextureOverride(cloned->normal_texture, inputs.normal_tex.texture);
+    if(cur_tex[3])
+      applyTextureOverride(cloned->emissive_texture, inputs.emissive_tex.texture);
+
+    if(inputs.use_base_color.value)
+    {
+      cloned->base_color_factor[0] = cur_base[0];
+      cloned->base_color_factor[1] = cur_base[1];
+      cloned->base_color_factor[2] = cur_base[2];
+      cloned->base_color_factor[3] = cur_base[3];
+    }
+    if(inputs.use_metallic.value)
+      cloned->metallic_factor = inputs.metallic.value;
+    if(inputs.use_roughness.value)
+      cloned->roughness_factor = inputs.roughness.value;
+    if(inputs.use_emissive.value)
+    {
+      cloned->emissive_factor[0] = cur_em[0];
+      cloned->emissive_factor[1] = cur_em[1];
+      cloned->emissive_factor[2] = cur_em[2];
+      cloned->emissive_strength = cur_em[3];
+    }
+
+    new_mats->push_back(cloned);
+  }
+
+  // GC cache entries whose source material vanished from upstream.
+  for(auto it = m_clone_cache.begin(); it != m_clone_cache.end();)
+  {
+    if(seen_src.find(it->first) == seen_src.end())
+      it = m_clone_cache.erase(it);
+    else
+      ++it;
+  }
+
+  auto state = std::make_shared<ossia::scene_state>();
+  // Passthrough: roots / cameras / animations / skeletons / environment
+  // all reference the upstream shared_ptrs (no deep copy). Only materials
+  // is swapped out.
+  state->roots = in_state->roots;
+  state->animations = in_state->animations;
+  state->cameras = in_state->cameras;
+  state->skeletons = in_state->skeletons;
+  state->environment = in_state->environment;
+  state->active_camera_id = in_state->active_camera_id;
+  state->materials = std::move(new_mats);
+  state->version = ++m_version_counter;
+  state->dirty_index = m_version_counter;
+
+  m_cached_out = state;
+  m_pending_dirty = 0xFF;
+}
+
+void MaterialOverride::operator()()
+{
+  // Upstream scene_state and live texture handles can change without a
+  // port-update event (upstream runs per-tick; video/CSF textures swap
+  // native handles mid-stream). Detect those here and trigger rebuild.
+  const auto& in = inputs.scene_in.scene;
+  const ossia::scene_state* in_state = in.state.get();
+  const int64_t in_version = in_state ? in_state->version : -1;
+  void* cur_tex[4]{
+      inputs.base_color_tex.texture.handle,
+      inputs.metal_rough_tex.texture.handle,
+      inputs.normal_tex.texture.handle,
+      inputs.emissive_tex.texture.handle};
+  const bool upstream_changed
+      = m_cached_in_state != in_state || m_cached_in_version != in_version
+        || m_cached_tex[0] != cur_tex[0] || m_cached_tex[1] != cur_tex[1]
+        || m_cached_tex[2] != cur_tex[2] || m_cached_tex[3] != cur_tex[3];
+  if(!m_cached_out || upstream_changed)
+    rebuild();
+  outputs.scene_out.scene.state = m_cached_out;
+  outputs.scene_out.dirty = m_pending_dirty;
+  m_pending_dirty = 0;
+}
+
+} // namespace Threedim
diff --git a/src/plugins/score-plugin-threedim/Threedim/MaterialOverride.hpp b/src/plugins/score-plugin-threedim/Threedim/MaterialOverride.hpp
new file mode 100644
index 0000000000..a5d554cf24
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/MaterialOverride.hpp
@@ -0,0 +1,177 @@
+#pragma once
+#include <halp/controls.hpp>
+#include <halp/meta.hpp>
+#include <halp/texture.hpp>
+
+#include <ossia/dataflow/geometry_port.hpp>
+#include <ossia/detail/hash_map.hpp>
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+namespace Threedim
+{
+
+// Injects runtime GPU textures and/or factor overrides into a scene's
+// material table. The primary live-VJ use case: drop a video texture
+// (or HDR shader output) onto an existing material without reloading
+// the scene. Authored on top of the Dynamic Texture pathway in
+// ScenePreprocessor — the texture handle is forwarded verbatim and
+// ScenePreprocessor emits it as a `*Dyn<slot>` auxiliary-texture binding
+// that classic_pbr_full (and any shader opting into the DYNAMIC source
+// branch) samples directly.
+//
+// Scope: the four PBR slots (base color / metal-rough / normal /
+// emissive). Occlusion and extension textures (transmission, clearcoat,
+// sheen…) are not in the ScenePreprocessor's array pool yet, so
+// overriding them here would have no effect downstream.
+//
+// Mode:
+//   All      — every material in the scene gets the override applied.
+//   ByIndex  — only `scene.state->materials[Index]` is overridden. Other
+//              materials pass through unchanged. Use Scene Inspector +
+//              the ByIndex variant to target a single object.
+//
+// Factor toggles gate whether the scalar/vector controls take effect;
+// textures auto-gate on "handle is non-null" so an unwired inlet is a
+// no-op regardless of state.
+class MaterialOverride
+{
+public:
+  halp_meta(name, "Material Override")
+  halp_meta(category, "Visuals/3D/Scene")
+  halp_meta(c_name, "material_override")
+  halp_meta(authors, "ossia team")
+  halp_meta(
+      manual_url,
+      "https://ossia.io/score-docs/processes/material-override.html")
+  halp_meta(uuid, "c3d8e5f2-9a4b-4e7d-b8c1-2f6a9e3d5b7c")
+
+  enum Mode
+  {
+    All,
+    ByIndex
+  };
+
+  struct ins
+  {
+    struct
+    {
+      halp_meta(name, "Scene In");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_in;
+
+    // Port-driven rebuild: scalar controls trigger rebuild() via their
+    // update() callbacks. Texture handles are checked in operator()()
+    // because their native handles can change without a port-update
+    // event (live video / CSF outputs swap native handles mid-stream).
+    struct : halp::combobox_t<"Mode", Mode>
+    {
+      struct range
+      {
+        std::string_view values[2]{"All", "By Index"};
+        int init{0};
+      };
+      void update(MaterialOverride& n) { n.rebuild(); }
+    } mode;
+    struct : halp::spinbox_i32<"Index", halp::irange{0, 4096, 0}>
+    { void update(MaterialOverride& n) { n.rebuild(); } } index;
+
+    // Texture overrides. Unwired (handle==nullptr) → pass through.
+    // Handle changes detected in operator()(), not via control update().
+    halp::gpu_texture_input<"Base Color Tex"> base_color_tex;
+    halp::gpu_texture_input<"Metal Rough Tex"> metal_rough_tex;
+    halp::gpu_texture_input<"Normal Tex"> normal_tex;
+    halp::gpu_texture_input<"Emissive Tex"> emissive_tex;
+
+    // Factor overrides. Gated on the companion toggles; otherwise the
+    // original factor from the loader passes through.
+    struct : halp::toggle<"Use base color">
+    { void update(MaterialOverride& n) { n.rebuild(); } } use_base_color;
+    struct : halp::hslider_f32<"R", halp::range{0., 1., 1.}>
+    { void update(MaterialOverride& n) { n.rebuild(); } } base_r;
+    struct : halp::hslider_f32<"G", halp::range{0., 1., 1.}>
+    { void update(MaterialOverride& n) { n.rebuild(); } } base_g;
+    struct : halp::hslider_f32<"B", halp::range{0., 1., 1.}>
+    { void update(MaterialOverride& n) { n.rebuild(); } } base_b;
+    struct : halp::hslider_f32<"A", halp::range{0., 1., 1.}>
+    { void update(MaterialOverride& n) { n.rebuild(); } } base_a;
+
+    struct : halp::toggle<"Use metallic">
+    { void update(MaterialOverride& n) { n.rebuild(); } } use_metallic;
+    struct : halp::hslider_f32<"Metallic", halp::range{0., 1., 0.}>
+    { void update(MaterialOverride& n) { n.rebuild(); } } metallic;
+
+    struct : halp::toggle<"Use roughness">
+    { void update(MaterialOverride& n) { n.rebuild(); } } use_roughness;
+    struct : halp::hslider_f32<"Roughness", halp::range{0., 1., 0.5}>
+    { void update(MaterialOverride& n) { n.rebuild(); } } roughness;
+
+    struct : halp::toggle<"Use emissive">
+    { void update(MaterialOverride& n) { n.rebuild(); } } use_emissive;
+    struct : halp::hslider_f32<"Emissive R", halp::range{0., 10., 0.}>
+    { void update(MaterialOverride& n) { n.rebuild(); } } em_r;
+    struct : halp::hslider_f32<"Emissive G", halp::range{0., 10., 0.}>
+    { void update(MaterialOverride& n) { n.rebuild(); } } em_g;
+    struct : halp::hslider_f32<"Emissive B", halp::range{0., 10., 0.}>
+    { void update(MaterialOverride& n) { n.rebuild(); } } em_b;
+    struct : halp::hslider_f32<"Emissive strength", halp::range{0., 10., 1.}>
+    { void update(MaterialOverride& n) { n.rebuild(); } } em_strength;
+  } inputs;
+
+  struct outs
+  {
+    struct
+    {
+      halp_meta(name, "Scene Out");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_out;
+  } outputs;
+
+  void rebuild();
+  void operator()();
+
+  // Cached output; stable shared_ptr identity when inputs haven't
+  // changed so ScenePreprocessor's per-frame fingerprint fast-path stays
+  // warm. Dynamic-texture swaps still propagate because ScenePreprocessor
+  // refreshes its dynamic-slot map every frame (keyed on native_handle).
+  std::shared_ptr<const ossia::scene_state> m_cached_out;
+  uint8_t m_pending_dirty{0xFF};
+
+  // Cache of override clones keyed by source material_component*. We
+  // reuse the same std::shared_ptr<ossia::material_component> clone
+  // across rebuilds when the source is unchanged, MUTATING its fields
+  // in place. That keeps the shared_ptr address stable → the
+  // preprocessor's m_loaderMaterialSlots keeps the arena slot allocated
+  // across frames → no per-frame GC + re-allocate cycle → the Material
+  // arena SSBO content is stable without churn. When the upstream
+  // material list changes structurally, stale cache entries are
+  // garbage-collected in rebuild().
+  ossia::hash_map<
+      const ossia::material_component*,
+      std::shared_ptr<ossia::material_component>>
+      m_clone_cache;
+
+  // Identity cache: (input-scene pointer, input version, control values,
+  // texture handles). If all match, we reuse m_cached_out without
+  // rebuilding the materials list.
+  const ossia::scene_state* m_cached_in_state{};
+  int64_t m_cached_in_version{-1};
+  int m_cached_mode{-1};
+  int m_cached_index{-1};
+  void* m_cached_tex[4]{};
+  bool m_cached_use_base{false};
+  bool m_cached_use_metallic{false};
+  bool m_cached_use_roughness{false};
+  bool m_cached_use_emissive{false};
+  float m_cached_base[4]{};
+  float m_cached_metallic{-1.f};
+  float m_cached_roughness{-1.f};
+  float m_cached_em[4]{};
+  int64_t m_version_counter{0};
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/MergeGeometries/Executor.cpp b/src/plugins/score-plugin-threedim/Threedim/MergeGeometries/Executor.cpp
new file mode 100644
index 0000000000..f9ee79a031
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/MergeGeometries/Executor.cpp
@@ -0,0 +1,58 @@
+#include "Executor.hpp"
+
+#include <Gfx/GfxApplicationPlugin.hpp>
+#include <Gfx/GfxContext.hpp>
+#include <Gfx/GfxExecNode.hpp>
+#include <Gfx/Graph/MergeGeometriesNode.hpp>
+#include <Process/Dataflow/Port.hpp>
+#include <Process/ExecutionContext.hpp>
+#include <Threedim/MergeGeometries/Process.hpp>
+
+#include <ossia/dataflow/port.hpp>
+
+#include <score/document/DocumentContext.hpp>
+
+namespace Gfx::MergeGeometries
+{
+class merge_geometries_exec_node final : public gfx_exec_node
+{
+public:
+  merge_geometries_exec_node(GfxExecutionAction& ctx)
+      : gfx_exec_node{ctx}
+  {
+  }
+
+  void init()
+  {
+    auto node = std::make_unique<score::gfx::MergeGeometriesNode>();
+    id = exec_context->ui->register_node(std::move(node));
+  }
+
+  ~merge_geometries_exec_node() { exec_context->ui->unregister_node(id); }
+
+  std::string label() const noexcept override { return "Gfx::MergeGeometries_node"; }
+};
+
+ProcessExecutorComponent::ProcessExecutorComponent(
+    Gfx::MergeGeometries::Model& element,
+    const Execution::Context& ctx,
+    QObject* parent)
+    : ProcessComponent_T{element, ctx, "mergeGeometriesComponent", parent}
+{
+  auto n = ossia::make_node<merge_geometries_exec_node>(
+      *ctx.execState, ctx.doc.plugin<DocumentPlugin>().exec);
+
+  for(int i = 0; i < 8; ++i)
+    n->add_geometry();
+  n->add_geometry_out();
+  n->init();
+
+  this->node = n;
+  m_ossia_process = std::make_shared<ossia::node_process>(n);
+}
+
+void ProcessExecutorComponent::cleanup()
+{
+  ProcessComponent_T::cleanup();
+}
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/MergeGeometries/Executor.hpp b/src/plugins/score-plugin-threedim/Threedim/MergeGeometries/Executor.hpp
new file mode 100644
index 0000000000..ea2a7dff4c
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/MergeGeometries/Executor.hpp
@@ -0,0 +1,22 @@
+#pragma once
+#include <Process/Execution/ProcessComponent.hpp>
+
+#include <ossia/dataflow/node_process.hpp>
+
+namespace Gfx::MergeGeometries
+{
+class Model;
+class ProcessExecutorComponent final
+    : public Execution::
+          ProcessComponent_T<Gfx::MergeGeometries::Model, ossia::node_process>
+{
+  COMPONENT_METADATA("b7c8d9e0-f1a2-4b3c-8d4e-5f6a7b8c9d0e")
+public:
+  ProcessExecutorComponent(
+      Model& element, const Execution::Context& ctx, QObject* parent);
+  void cleanup() override;
+};
+
+using ProcessExecutorComponentFactory
+    = Execution::ProcessComponentFactory_T<ProcessExecutorComponent>;
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/MergeGeometries/Metadata.hpp b/src/plugins/score-plugin-threedim/Threedim/MergeGeometries/Metadata.hpp
new file mode 100644
index 0000000000..f3c8f0df39
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/MergeGeometries/Metadata.hpp
@@ -0,0 +1,22 @@
+#pragma once
+#include <Process/ProcessMetadata.hpp>
+
+namespace Gfx::MergeGeometries
+{
+class Model;
+}
+
+PROCESS_METADATA(
+    , Gfx::MergeGeometries::Model, "e8f7a6b5-c4d3-4e2f-1a0b-9c8d7e6f5a4b",
+    "mergegeometries",
+    "Merge Geometries",
+    Process::ProcessCategory::Visual,
+    "Visuals/3D/Scene",
+    "Concatenate N upstream geometry_specs into one for a single downstream renderer",
+    "ossia team",
+    (QStringList{"gfx", "geometry", "merge", "3d", "scene"}),
+    {},
+    {},
+    QUrl{},
+    Process::ProcessFlags::SupportsAll
+)
diff --git a/src/plugins/score-plugin-threedim/Threedim/MergeGeometries/Process.cpp b/src/plugins/score-plugin-threedim/Threedim/MergeGeometries/Process.cpp
new file mode 100644
index 0000000000..eb5d146c32
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/MergeGeometries/Process.cpp
@@ -0,0 +1,74 @@
+#include "Process.hpp"
+
+#include <score/application/ApplicationComponents.hpp>
+
+#include <Process/Dataflow/Port.hpp>
+
+#include <Gfx/Graph/Node.hpp>
+#include <Gfx/TexturePort.hpp>
+
+#include <wobjectimpl.h>
+
+W_OBJECT_IMPL(Gfx::MergeGeometries::Model)
+namespace Gfx::MergeGeometries
+{
+
+Model::Model(
+    const TimeVal& duration, const Id<Process::ProcessModel>& id, QObject* parent)
+    : Process::ProcessModel{duration, id, "gfxProcess", parent}
+{
+  metadata().setInstanceName(*this);
+  init();
+}
+
+Model::~Model() = default;
+
+void Model::init()
+{
+  if(m_inlets.empty() && m_outlets.empty())
+  {
+    for(int i = 0; i < 8; ++i)
+    {
+      QString name = QStringLiteral("Geometry %1").arg(i + 1);
+      m_inlets.push_back(new GeometryInlet{name, Id<Process::Port>(i), this});
+    }
+    m_outlets.push_back(new GeometryOutlet{"Merged", Id<Process::Port>(0), this});
+  }
+}
+
+QString Model::prettyName() const noexcept
+{
+  return tr("Merge Geometries");
+}
+
+}
+
+template <>
+void DataStreamReader::read(const Gfx::MergeGeometries::Model& proc)
+{
+  readPorts(*this, proc.m_inlets, proc.m_outlets);
+  insertDelimiter();
+}
+
+template <>
+void DataStreamWriter::write(Gfx::MergeGeometries::Model& proc)
+{
+  writePorts(
+      *this, components.interfaces<Process::PortFactoryList>(), proc.m_inlets,
+      proc.m_outlets, &proc);
+  checkDelimiter();
+}
+
+template <>
+void JSONReader::read(const Gfx::MergeGeometries::Model& proc)
+{
+  readPorts(*this, proc.m_inlets, proc.m_outlets);
+}
+
+template <>
+void JSONWriter::write(Gfx::MergeGeometries::Model& proc)
+{
+  writePorts(
+      *this, components.interfaces<Process::PortFactoryList>(), proc.m_inlets,
+      proc.m_outlets, &proc);
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/MergeGeometries/Process.hpp b/src/plugins/score-plugin-threedim/Threedim/MergeGeometries/Process.hpp
new file mode 100644
index 0000000000..0ed0735e6a
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/MergeGeometries/Process.hpp
@@ -0,0 +1,37 @@
+#pragma once
+#include <Gfx/CommandFactory.hpp>
+#include <Threedim/MergeGeometries/Metadata.hpp>
+#include <Process/GenericProcessFactory.hpp>
+#include <Process/Process.hpp>
+
+namespace Gfx::MergeGeometries
+{
+class Model final : public Process::ProcessModel
+{
+  SCORE_SERIALIZE_FRIENDS
+  PROCESS_METADATA_IMPL(Gfx::MergeGeometries::Model)
+  W_OBJECT(Model)
+
+public:
+  Model(
+      const TimeVal& duration,
+      const Id<Process::ProcessModel>& id,
+      QObject* parent);
+
+  template <typename Impl>
+  Model(Impl& vis, QObject* parent)
+      : Process::ProcessModel{vis, parent}
+  {
+    vis.writeTo(*this);
+    init();
+  }
+
+  ~Model() override;
+
+private:
+  void init();
+  QString prettyName() const noexcept override;
+};
+
+using ProcessFactory = Process::ProcessFactory_T<Gfx::MergeGeometries::Model>;
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/ModelDisplay/ModelDisplayNode.cpp b/src/plugins/score-plugin-threedim/Threedim/ModelDisplay/ModelDisplayNode.cpp
index 32a1b21ac6..c9974a09ef 100644
--- a/src/plugins/score-plugin-threedim/Threedim/ModelDisplay/ModelDisplayNode.cpp
+++ b/src/plugins/score-plugin-threedim/Threedim/ModelDisplay/ModelDisplayNode.cpp
@@ -3,6 +3,7 @@
 #include <Gfx/Graph/GeometryFilterNodeRenderer.hpp>
 #include <Gfx/Graph/NodeRenderer.hpp>
 #include <Gfx/Graph/RenderList.hpp>
+#include <ossia/detail/algorithms.hpp>
 #include <Gfx/Graph/RenderState.hpp>
 #include <boost/algorithm/string.hpp>
 #include <ossia/detail/fmt.hpp>
@@ -62,6 +63,8 @@ layout(std140, binding = 2) uniform camera_t { \n\
       mat4 matrixProjection; \n\
       mat3 matrixNormal; \n\
       float fov; \n\
+      float near; \n\
+      float far; \n\
 } camera; \n\
  \n\
 "
@@ -81,29 +84,160 @@ float gl_PointSize;
 const constexpr auto vtx_projection_perspective = R"_(
 vec4 v_projected = camera.matrixModelViewProjection * vec4(in_position.xyz, 1.0);
 )_";
-const constexpr auto vtx_projection_fulldome = R"_(
-vec4 v_projected = vec4(1.0);
+// ----------------------------------------------------------------------------
+// Fulldome fisheye projections
+//
+// All four variants share the same θ/φ derivation and the same reverse-Z
+// depth; they differ only in the `r_ndc = f(θ)` mapping. Kept as separate
+// vertex-shader snippets (rather than a runtime branch on a uniform) so
+// the GPU dispatches branch-free code for the selected projection.
+//
+//   equidistant   — r = θ / (FOV/2)          (domemaster, uniform angular resolution; default)
+//   equisolid     — r = sin(θ/2) / sin(FOV/4) (equal-area; typical of photographic fisheye lenses)
+//   stereographic — r = tan(θ/2) / tan(FOV/4) (conformal; "little planet" look)
+//   orthographic  — r = sin(θ)   / sin(FOV/2) (parallel-projection sphere; FOV ≤ 180° only)
+//
+// Points with r_ndc > 1 fall outside the NDC unit square and are hardware-
+// clipped, so FOV > 180° works out of the box for equidistant / equisolid /
+// stereographic. Orthographic cannot exceed 180° geometrically.
+// ----------------------------------------------------------------------------
+const constexpr auto vtx_projection_fulldome_equidistant = R"_(
+//
+// Fulldome / domemaster (equidistant angular fisheye).
+//
+//   r_2D = theta / (fov/2)           — radial image distance (NDC units)
+//   theta = angle from dome forward axis (view-space +Z in this convention)
+//   phi   = azimuth around forward axis
+//
+// Convention kept from the original implementation: the .xzy swizzle re-
+// orients world +Z as dome-up, world +Y as dome-forward; the view matrix
+// then places the zenith along view-space +Z.
+//
+// Works for FOV > 180° (e.g. 240°): points with theta > FOV/2 land outside
+// the NDC unit square and get hardware-clipped. For point clouds each
+// vertex is a single point, so no per-primitive clipping subtleties.
+//
+// Depth: linear reverse-Z in radial distance. z_gl in [-1,+1] such that
+// renderer.clipSpaceCorrMatrix (GL→Vulkan Z remap) yields z_vulkan=1 at
+// near, z_vulkan=0 at far. Matches the project-wide reverse-Z convention
+// (depth cleared to 0.0, compare op Greater).
+//
+vec4 v_projected = vec4(0.0, 0.0, 0.0, 1.0);
 {
   vec4 viewspace = camera.matrixModelView * vec4(in_position.xzy, 1.0);
-  // Code from Emmanuel Durand:
-  // https://emmanueldurand.net/spherical_projection/
-  // - inlined as another function injected could be called toSphere or do #define pi. yay GLSL...
-  float r = length(viewspace.xyz);
-  float val = clamp(viewspace.z / r, -1.0, 1.0);
-  float theta = atan(length(viewspace.xy), viewspace.z);
-
-  val = viewspace.x / (r * sin(theta));
-  float first = acos(clamp(val, -1.0, 1.0));
-  val = viewspace.y / (r * sin(theta));
-  float second = asin(clamp(val, -1.0, 1.0));
-
-  float phi = mix(2.0 * 3.14159265358979323846264338327 - first, first, second >= 0.0);
-  const float proj_ratio = 3.14159265358979323846264338327 / (360.0 / camera.fov);
-  v_projected.x = theta * cos(phi);
-  v_projected.y = theta * sin(phi);
-  v_projected.y /= proj_ratio;
-  v_projected.x /= proj_ratio;
-  v_projected.z = r / 1000.;
+  vec3 d = viewspace.xyz;
+  float r = length(d);
+
+  const float PI = 3.14159265358979323846264338327;
+
+  if(r > 1e-6)
+  {
+    float theta = acos(clamp(d.z / r, -1.0, 1.0));
+    float phi   = (length(d.xy) > 1e-6) ? atan(d.y, d.x) : 0.0;
+    float half_fov_rad = max(radians(camera.fov * 0.5), 1e-6);
+    float r_ndc = theta / half_fov_rad;
+
+    v_projected.x = r_ndc * cos(phi);
+    v_projected.y = r_ndc * sin(phi);
+  }
+
+  // Reverse-Z linear depth: z_gl = 1 at r = near (gets remapped to
+  // z_vulkan = 1 by clipSpaceCorrMatrix), z_gl = -1 at r = far.
+  float t = clamp(
+      (r - camera.near) / max(camera.far - camera.near, 1e-6),
+      0.0, 1.0);
+  v_projected.z = 1.0 - 2.0 * t;
+  v_projected.w = 1.0;
+}
+)_";
+
+// Equisolid-angle (equal-area fisheye). Matches the response of most
+// physical fisheye camera lenses (Nikon, Canon). Areas-on-the-sphere map
+// to equal areas-on-the-image, so the edge gets less angular resolution
+// than the centre.
+const constexpr auto vtx_projection_fulldome_equisolid = R"_(
+vec4 v_projected = vec4(0.0, 0.0, 0.0, 1.0);
+{
+  vec4 viewspace = camera.matrixModelView * vec4(in_position.xzy, 1.0);
+  vec3 d = viewspace.xyz;
+  float r = length(d);
+
+  if(r > 1e-6)
+  {
+    float theta = acos(clamp(d.z / r, -1.0, 1.0));
+    float phi   = (length(d.xy) > 1e-6) ? atan(d.y, d.x) : 0.0;
+    float quarter_fov_rad = max(radians(camera.fov * 0.25), 1e-6);
+    float r_ndc = sin(theta * 0.5) / sin(quarter_fov_rad);
+
+    v_projected.x = r_ndc * cos(phi);
+    v_projected.y = r_ndc * sin(phi);
+  }
+
+  float t = clamp(
+      (r - camera.near) / max(camera.far - camera.near, 1e-6),
+      0.0, 1.0);
+  v_projected.z = 1.0 - 2.0 * t;
+  v_projected.w = 1.0;
+}
+)_";
+
+// Stereographic fisheye. Conformal — local angles / shapes preserved,
+// circles on the sphere stay circles in the image. No edge compression of
+// shape. Good for VR / architectural preview, less good for uniform
+// pixel-per-degree on a dome.
+const constexpr auto vtx_projection_fulldome_stereographic = R"_(
+vec4 v_projected = vec4(0.0, 0.0, 0.0, 1.0);
+{
+  vec4 viewspace = camera.matrixModelView * vec4(in_position.xzy, 1.0);
+  vec3 d = viewspace.xyz;
+  float r = length(d);
+
+  if(r > 1e-6)
+  {
+    float theta = acos(clamp(d.z / r, -1.0, 1.0));
+    float phi   = (length(d.xy) > 1e-6) ? atan(d.y, d.x) : 0.0;
+    float quarter_fov_rad = max(radians(camera.fov * 0.25), 1e-6);
+    // tan diverges at θ=π; rely on hardware clipping for θ ≥ FOV/2.
+    float r_ndc = tan(theta * 0.5) / tan(quarter_fov_rad);
+
+    v_projected.x = r_ndc * cos(phi);
+    v_projected.y = r_ndc * sin(phi);
+  }
+
+  float t = clamp(
+      (r - camera.near) / max(camera.far - camera.near, 1e-6),
+      0.0, 1.0);
+  v_projected.z = 1.0 - 2.0 * t;
+  v_projected.w = 1.0;
+}
+)_";
+
+// Orthographic sphere projection. Parallel projection — the image looks
+// like a billiard-ball photographed from infinity. FOV must be ≤ 180°;
+// beyond that the mapping collapses (sin(θ) decreases past π/2).
+const constexpr auto vtx_projection_fulldome_orthographic = R"_(
+vec4 v_projected = vec4(0.0, 0.0, 0.0, 1.0);
+{
+  vec4 viewspace = camera.matrixModelView * vec4(in_position.xzy, 1.0);
+  vec3 d = viewspace.xyz;
+  float r = length(d);
+
+  if(r > 1e-6)
+  {
+    float theta = acos(clamp(d.z / r, -1.0, 1.0));
+    float phi   = (length(d.xy) > 1e-6) ? atan(d.y, d.x) : 0.0;
+    float half_fov_rad = max(radians(camera.fov * 0.5), 1e-6);
+    float r_ndc = sin(theta) / sin(half_fov_rad);
+
+    v_projected.x = r_ndc * cos(phi);
+    v_projected.y = r_ndc * sin(phi);
+  }
+
+  float t = clamp(
+      (r - camera.near) / max(camera.far - camera.near, 1e-6),
+      0.0, 1.0);
+  v_projected.z = 1.0 - 2.0 * t;
+  v_projected.w = 1.0;
 }
 )_";
 const constexpr auto vtx_output_process_triangle = R"_()_";
@@ -149,6 +283,17 @@ void main()
   %vtx_do_projection%
 
   gl_Position = renderer.clipSpaceCorrMatrix * v_projected;
+#if defined(QSHADER_HLSL) || defined(QSHADER_MSL)
+  // Match the codebase Y-handling convention used by ImageNode et al.:
+  // GL is Y-up framebuffer (no flip), Vulkan's Y flip is baked into
+  // QRhi's clipSpaceCorrMatrix, but D3D/Metal share Vulkan's framebuffer
+  // origin without sharing its NDC sign convention — so we flip here so
+  // the offscreen texture lands top-row-first like the other backends,
+  // and the screen compositor (ScaledRenderer) keeps its SPIRV-only UV
+  // flip. Without this the model rendered fine on GL/Vulkan but ended
+  // up upside-down on D3D11/12.
+  gl_Position.y = -gl_Position.y;
+#endif
 
   %vtx_output_process%
 }
@@ -237,6 +382,17 @@ void main()
   %vtx_do_projection%
 
   gl_Position = renderer.clipSpaceCorrMatrix * v_projected;
+#if defined(QSHADER_HLSL) || defined(QSHADER_MSL)
+  // Match the codebase Y-handling convention used by ImageNode et al.:
+  // GL is Y-up framebuffer (no flip), Vulkan's Y flip is baked into
+  // QRhi's clipSpaceCorrMatrix, but D3D/Metal share Vulkan's framebuffer
+  // origin without sharing its NDC sign convention — so we flip here so
+  // the offscreen texture lands top-row-first like the other backends,
+  // and the screen compositor (ScaledRenderer) keeps its SPIRV-only UV
+  // flip. Without this the model rendered fine on GL/Vulkan but ended
+  // up upside-down on D3D11/12.
+  gl_Position.y = -gl_Position.y;
+#endif
 
   %vtx_output_process%
 }
@@ -292,6 +448,17 @@ void main()
   %vtx_do_projection%
 
   gl_Position = renderer.clipSpaceCorrMatrix * v_projected;
+#if defined(QSHADER_HLSL) || defined(QSHADER_MSL)
+  // Match the codebase Y-handling convention used by ImageNode et al.:
+  // GL is Y-up framebuffer (no flip), Vulkan's Y flip is baked into
+  // QRhi's clipSpaceCorrMatrix, but D3D/Metal share Vulkan's framebuffer
+  // origin without sharing its NDC sign convention — so we flip here so
+  // the offscreen texture lands top-row-first like the other backends,
+  // and the screen compositor (ScaledRenderer) keeps its SPIRV-only UV
+  // flip. Without this the model rendered fine on GL/Vulkan but ended
+  // up upside-down on D3D11/12.
+  gl_Position.y = -gl_Position.y;
+#endif
 
   %vtx_output_process%
 }
@@ -358,6 +525,17 @@ void main()
   %vtx_do_projection%
 
   gl_Position = renderer.clipSpaceCorrMatrix * v_projected;
+#if defined(QSHADER_HLSL) || defined(QSHADER_MSL)
+  // Match the codebase Y-handling convention used by ImageNode et al.:
+  // GL is Y-up framebuffer (no flip), Vulkan's Y flip is baked into
+  // QRhi's clipSpaceCorrMatrix, but D3D/Metal share Vulkan's framebuffer
+  // origin without sharing its NDC sign convention — so we flip here so
+  // the offscreen texture lands top-row-first like the other backends,
+  // and the screen compositor (ScaledRenderer) keeps its SPIRV-only UV
+  // flip. Without this the model rendered fine on GL/Vulkan but ended
+  // up upside-down on D3D11/12.
+  gl_Position.y = -gl_Position.y;
+#endif
 
   %vtx_output_process%
 }
@@ -413,6 +591,17 @@ void main()
   %vtx_do_projection%
 
   gl_Position = renderer.clipSpaceCorrMatrix * v_projected;
+#if defined(QSHADER_HLSL) || defined(QSHADER_MSL)
+  // Match the codebase Y-handling convention used by ImageNode et al.:
+  // GL is Y-up framebuffer (no flip), Vulkan's Y flip is baked into
+  // QRhi's clipSpaceCorrMatrix, but D3D/Metal share Vulkan's framebuffer
+  // origin without sharing its NDC sign convention — so we flip here so
+  // the offscreen texture lands top-row-first like the other backends,
+  // and the screen compositor (ScaledRenderer) keeps its SPIRV-only UV
+  // flip. Without this the model rendered fine on GL/Vulkan but ended
+  // up upside-down on D3D11/12.
+  gl_Position.y = -gl_Position.y;
+#endif
 
   %vtx_output_process%
 }
@@ -461,6 +650,17 @@ void main()
   %vtx_do_projection%
 
   gl_Position = renderer.clipSpaceCorrMatrix * v_projected;
+#if defined(QSHADER_HLSL) || defined(QSHADER_MSL)
+  // Match the codebase Y-handling convention used by ImageNode et al.:
+  // GL is Y-up framebuffer (no flip), Vulkan's Y flip is baked into
+  // QRhi's clipSpaceCorrMatrix, but D3D/Metal share Vulkan's framebuffer
+  // origin without sharing its NDC sign convention — so we flip here so
+  // the offscreen texture lands top-row-first like the other backends,
+  // and the screen compositor (ScaledRenderer) keeps its SPIRV-only UV
+  // flip. Without this the model rendered fine on GL/Vulkan but ended
+  // up upside-down on D3D11/12.
+  gl_Position.y = -gl_Position.y;
+#endif
 
   %vtx_output_process%
 }
@@ -510,6 +710,17 @@ void main()
   %vtx_do_projection%
 
   gl_Position = renderer.clipSpaceCorrMatrix * v_projected;
+#if defined(QSHADER_HLSL) || defined(QSHADER_MSL)
+  // Match the codebase Y-handling convention used by ImageNode et al.:
+  // GL is Y-up framebuffer (no flip), Vulkan's Y flip is baked into
+  // QRhi's clipSpaceCorrMatrix, but D3D/Metal share Vulkan's framebuffer
+  // origin without sharing its NDC sign convention — so we flip here so
+  // the offscreen texture lands top-row-first like the other backends,
+  // and the screen compositor (ScaledRenderer) keeps its SPIRV-only UV
+  // flip. Without this the model rendered fine on GL/Vulkan but ended
+  // up upside-down on D3D11/12.
+  gl_Position.y = -gl_Position.y;
+#endif
 
   %vtx_output_process%
 }
@@ -557,6 +768,17 @@ void main()
   %vtx_do_projection%
 
   gl_Position = renderer.clipSpaceCorrMatrix * v_projected;
+#if defined(QSHADER_HLSL) || defined(QSHADER_MSL)
+  // Match the codebase Y-handling convention used by ImageNode et al.:
+  // GL is Y-up framebuffer (no flip), Vulkan's Y flip is baked into
+  // QRhi's clipSpaceCorrMatrix, but D3D/Metal share Vulkan's framebuffer
+  // origin without sharing its NDC sign convention — so we flip here so
+  // the offscreen texture lands top-row-first like the other backends,
+  // and the screen compositor (ScaledRenderer) keeps its SPIRV-only UV
+  // flip. Without this the model rendered fine on GL/Vulkan but ended
+  // up upside-down on D3D11/12.
+  gl_Position.y = -gl_Position.y;
+#endif
 
   %vtx_output_process%
 }
@@ -608,7 +830,19 @@ class ModelDisplayNode::Renderer : public GenericNodeRenderer
     QShader viewspaceVS, viewspaceFS;
     QShader barycentricVS, barycentricFS;
     QShader colorVS, colorFS;
-  } triangle_perspective, point_perspective, triangle_fulldome, point_fulldome;
+  };
+
+  // Camera mode enum — matches the UI ordering. Index into
+  // triangle_shaders / point_shaders arrays.
+  //
+  //   0 = Perspective
+  //   1 = Fulldome (Equidistant, domemaster)
+  //   2 = Fulldome (Equisolid-angle, photographic fisheye)
+  //   3 = Fulldome (Stereographic, conformal)
+  //   4 = Fulldome (Orthographic, ≤ 180° only)
+  static constexpr int CAMERA_MODE_COUNT = 5;
+  RenderShaders triangle_shaders[CAMERA_MODE_COUNT];
+  RenderShaders point_shaders[CAMERA_MODE_COUNT];
 
   int64_t meshChangedIndex{-1};
   int m_curShader{0};
@@ -870,32 +1104,14 @@ class ModelDisplayNode::Renderer : public GenericNodeRenderer
     m_blend_alpha_op = n.blend_alpha_op;
     m_blend_enabled = n.blend_enabled;
 
-    switch(m_draw_mode)
-    {
-      case 0:
-      case 2:
-        switch(m_camera_mode)
-        {
-          case 0:
-            initPasses_impl(renderer, mesh, triangle_perspective);
-            break;
-          case 1:
-            initPasses_impl(renderer, mesh, triangle_fulldome);
-            break;
-        }
-        break;
-      case 1:
-        switch(m_camera_mode)
-        {
-          case 0:
-            initPasses_impl(renderer, mesh, point_perspective);
-            break;
-          case 1:
-            initPasses_impl(renderer, mesh, point_fulldome);
-            break;
-        }
-        break;
-    }
+    // Pick triangle- vs point-topology shader set, then index by
+    // camera_mode. Values outside [0, CAMERA_MODE_COUNT) clamp to
+    // perspective so a stale UI value never indexes out-of-bounds.
+    const int mode = (m_camera_mode >= 0 && m_camera_mode < CAMERA_MODE_COUNT)
+                         ? m_camera_mode
+                         : 0;
+    auto& set = (m_draw_mode == 1) ? point_shaders[mode] : triangle_shaders[mode];
+    initPasses_impl(renderer, mesh, set);
 
     QRhiGraphicsPipeline::TargetBlend blend;
     blend.enable = m_blend_enabled;
@@ -908,24 +1124,33 @@ class ModelDisplayNode::Renderer : public GenericNodeRenderer
 
     for(auto& [e, pass] : this->m_p)
     {
-      pass.pipeline->destroy();
+      pass.p.pipeline->destroy();
 
-      pass.pipeline->setTargetBlends({blend});
+      pass.p.pipeline->setTargetBlends({blend});
 
       switch(m_draw_mode)
       {
         case 0:
-          pass.pipeline->setTopology(QRhiGraphicsPipeline::Triangles);
+          pass.p.pipeline->setTopology(QRhiGraphicsPipeline::Triangles);
           break;
         case 1:
-          pass.pipeline->setTopology(QRhiGraphicsPipeline::Points);
+          pass.p.pipeline->setTopology(QRhiGraphicsPipeline::Points);
           break;
         case 2:
-          pass.pipeline->setTopology(QRhiGraphicsPipeline::Lines);
+          pass.p.pipeline->setTopology(QRhiGraphicsPipeline::Lines);
           break;
       }
 
-      pass.pipeline->create();
+      // Reverse-Z project rule (matches PipelineStateHelpers::applyPipelineState
+      // default). buildPipeline leaves DepthOp at QRhi's default `Less` which
+      // rejects every fragment against the 0.0-cleared reverse-Z buffer.
+      // ModelDisplay's projection matrix now produces reverse-Z NDC, so we
+      // must also flip the compare op.
+      pass.p.pipeline->setDepthTest(true);
+      pass.p.pipeline->setDepthWrite(true);
+      pass.p.pipeline->setDepthOp(QRhiGraphicsPipeline::Greater);
+
+      pass.p.pipeline->create();
     }
   }
 
@@ -1019,19 +1244,25 @@ class ModelDisplayNode::Renderer : public GenericNodeRenderer
 
   void createShaders(RenderList& renderer, const score::gfx::Mesh& mesh)
   {
-    createShaders(
-        this->triangle_perspective, renderer, vtx_output_triangle,
-        vtx_output_process_triangle, vtx_projection_perspective, mesh);
-    createShaders(
-        this->point_perspective, renderer, vtx_output_point, vtx_output_process_point,
-        vtx_projection_perspective, mesh);
-
-    createShaders(
-        this->triangle_fulldome, renderer, vtx_output_triangle,
-        vtx_output_process_triangle, vtx_projection_fulldome, mesh);
-    createShaders(
-        this->point_fulldome, renderer, vtx_output_point, vtx_output_process_point,
-        vtx_projection_fulldome, mesh);
+    // One projection snippet per camera_mode — order MUST match the UI
+    // enum ordering described on RenderShaders.
+    const char* projections[CAMERA_MODE_COUNT] = {
+        vtx_projection_perspective,
+        vtx_projection_fulldome_equidistant,
+        vtx_projection_fulldome_equisolid,
+        vtx_projection_fulldome_stereographic,
+        vtx_projection_fulldome_orthographic,
+    };
+
+    for(int i = 0; i < CAMERA_MODE_COUNT; ++i)
+    {
+      createShaders(
+          triangle_shaders[i], renderer, vtx_output_triangle,
+          vtx_output_process_triangle, projections[i], mesh);
+      createShaders(
+          point_shaders[i], renderer, vtx_output_point, vtx_output_process_point,
+          projections[i], mesh);
+    }
   }
 
   void recreateRenderTarget(RenderList& renderer)
@@ -1057,7 +1288,7 @@ class ModelDisplayNode::Renderer : public GenericNodeRenderer
     m_samplers.push_back({sampler, texture});
   }
 
-  void init(RenderList& renderer, QRhiResourceUpdateBatch& res) override
+  void initState(RenderList& renderer, QRhiResourceUpdateBatch& res) override
   {
     recreateRenderTarget(renderer);
     const auto& mesh = m_mesh ? *m_mesh : renderer.defaultQuad();
@@ -1066,6 +1297,62 @@ class ModelDisplayNode::Renderer : public GenericNodeRenderer
     processUBOInit(renderer);
     m_material.init(renderer, node.input, m_samplers);
 
+    m_initialized = true;
+  }
+
+  void addOutputPass(
+      RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) override
+  {
+    // The shader selection depends on mesh properties and node settings.
+    // initPasses() creates passes for ALL edges at once, so we only call it
+    // the first time (when m_p is empty). Subsequent edges are already covered.
+    if(m_p.empty())
+    {
+      const auto& mesh = m_mesh ? *m_mesh : renderer.defaultQuad();
+      initPasses(renderer, mesh);
+    }
+  }
+
+  bool hasOutputPassForEdge(Edge& edge) const override
+  {
+    return ossia::find_if(m_p, [&](const auto& p) { return p.first == &edge; })
+           != m_p.end();
+  }
+
+  void releaseState(RenderList& r) override
+  {
+    if(!m_initialized)
+      return;
+
+    m_renderer = nullptr;
+
+    // Release any remaining passes
+    for(auto& pass : m_p)
+      pass.second.release();
+    m_p.clear();
+
+    for(auto sampler : m_samplers)
+    {
+      delete sampler.sampler;
+    }
+    m_samplers.clear();
+
+    delete m_processUBO;
+    m_processUBO = nullptr;
+
+    delete m_material.buffer;
+    m_material.buffer = nullptr;
+
+    m_meshbufs = {};
+
+    m_initialized = false;
+  }
+
+  void init(RenderList& renderer, QRhiResourceUpdateBatch& res) override
+  {
+    initState(renderer, res);
+
+    const auto& mesh = m_mesh ? *m_mesh : renderer.defaultQuad();
     initPasses(renderer, mesh);
   }
 
@@ -1085,10 +1372,20 @@ class ModelDisplayNode::Renderer : public GenericNodeRenderer
     memcpy(to, from.data(), sizeof(float[N]));
   }
 
+  int mdupdate_log = 0;
   void update(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) override
   {
     auto& n = static_cast<const ModelDisplayNode&>(this->node);
 
+    if(mdupdate_log < 3)
+    {
+      qDebug() << "ModelDisplay::update materialChanged=" << this->materialChanged
+               << "geometryChanged=" << this->geometryChanged
+               << "fov=" << n.fov
+               << "passes=" << m_p.size();
+      mdupdate_log++;
+    }
+
     bool mustRecreatePasses = false;
     if(this->materialChanged)
     {
@@ -1103,6 +1400,21 @@ class ModelDisplayNode::Renderer : public GenericNodeRenderer
           qreal(renderer.state.renderSize.width()) / renderer.state.renderSize.height(),
           n.near,
           n.far);
+
+      // Project-wide reverse-Z convention: near=1, far=0, depth cleared to
+      // 0.0, depth op Greater. QMatrix4x4::perspective() produces standard
+      // GL Z (near=-1, far=+1) which clipSpaceCorrMatrix then maps to
+      // Vulkan [0, 1] — the wrong direction for reverse-Z.
+      //
+      // Pre-multiplying by a Z-flip matrix flips the NDC z output of the
+      // perspective: z_ndc → -z_ndc. After clipSpaceCorrMatrix's [-1,1] →
+      // [0,1] remap, that gives near→1, far→0, exactly what the rest of
+      // the pipeline expects.
+      {
+        QMatrix4x4 zFlip;
+        zFlip(2, 2) = -1.0f;
+        projection = zFlip * projection;
+      }
       QMatrix4x4 view;
 
       view.lookAt(
@@ -1122,6 +1434,8 @@ class ModelDisplayNode::Renderer : public GenericNodeRenderer
       toGL(mvp, mc.mvp);
       toGL(norm, mc.modelNormal);
       mc.fov = n.fov;
+      mc.znear = n.near;
+      mc.zfar = n.far;
 
       res.updateDynamicBuffer(m_material.buffer, 0, sizeof(ModelCameraUBO), &mc);
 
@@ -1146,6 +1460,7 @@ class ModelDisplayNode::Renderer : public GenericNodeRenderer
       if(m_blend_enabled != n.blend_enabled)
         mustRecreatePasses = true;
     }
+    this->materialChanged = false;
 
     res.updateDynamicBuffer(m_processUBO, 0, sizeof(ProcessUBO), &n.standardUBO);
 
diff --git a/src/plugins/score-plugin-threedim/Threedim/ObjLoader.hpp b/src/plugins/score-plugin-threedim/Threedim/ObjLoader.hpp
deleted file mode 100644
index 516fe12bfd..0000000000
--- a/src/plugins/score-plugin-threedim/Threedim/ObjLoader.hpp
+++ /dev/null
@@ -1,51 +0,0 @@
-#pragma once
-#include <Threedim/TinyObj.hpp>
-#include <halp/controls.hpp>
-#include <halp/file_port.hpp>
-#include <halp/geometry.hpp>
-#include <halp/meta.hpp>
-#include <ossia/detail/mutex.hpp>
-
-namespace Threedim
-{
-
-class ObjLoader
-{
-public:
-  halp_meta(name, "Object loader")
-  halp_meta(category, "Visuals/Meshes")
-  halp_meta(c_name, "obj_loader")
-  halp_meta(
-      authors,
-      "Jean-Michaël Celerier, TinyOBJ authors, miniPLY authors, Eigen authors")
-  halp_meta(manual_url, "https://ossia.io/score-docs/processes/meshes.html#obj-loader")
-  halp_meta(uuid, "5df71765-505f-4ab7-98c1-f305d10a01ef")
-
-  struct ins
-  {
-    struct obj_t : halp::file_port<"3D file">
-    {
-      halp_meta(extensions, "3D files (*.obj *.ply)");
-      static std::function<void(ObjLoader&)> process(file_type data);
-    } obj;
-    PositionControl position;
-    RotationControl rotation;
-    ScaleControl scale;
-  } inputs;
-
-  struct
-  {
-    struct : halp::mesh
-    {
-      halp_meta(name, "Geometry");
-      std::vector<halp::dynamic_geometry> mesh;
-    } geometry;
-  } outputs;
-
-  void rebuild_geometry();
-
-  std::vector<mesh> meshinfo{};
-  float_vec complete;
-};
-
-}
diff --git a/src/plugins/score-plugin-threedim/Threedim/PBRMesh.cpp b/src/plugins/score-plugin-threedim/Threedim/PBRMesh.cpp
new file mode 100644
index 0000000000..98b206ec52
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/PBRMesh.cpp
@@ -0,0 +1,436 @@
+#include "PBRMesh.hpp"
+
+#include <Gfx/Graph/RenderList.hpp>
+#include <Gfx/Graph/SceneGPUState.hpp>
+
+#include <QMatrix4x4>
+#include <QQuaternion>
+
+#include <algorithm>
+#include <cstring>
+
+namespace Threedim
+{
+
+namespace
+{
+
+// halp::attribute_format → ossia::vertex_format. The enum orderings differ,
+// so this has to be a switch rather than a static_cast. Unknown formats
+// fall back to float3 (the most common vertex-attribute case).
+ossia::vertex_format mapFormat(halp::attribute_format f) noexcept
+{
+  using H = halp::attribute_format;
+  using O = ossia::vertex_format;
+  switch(f)
+  {
+    case H::float1:     return O::float1;
+    case H::float2:     return O::float2;
+    case H::float3:     return O::float3;
+    case H::float4:     return O::float4;
+    case H::half1:      return O::half1;
+    case H::half2:      return O::half2;
+    case H::half3:      return O::half3;
+    case H::half4:      return O::half4;
+    case H::unormbyte1: return O::unorm8x1;
+    case H::unormbyte2: return O::unorm8x2;
+    case H::unormbyte4: return O::unorm8x4;
+    case H::uint1:      return O::uint32x1;
+    case H::uint2:      return O::uint32x2;
+    case H::uint3:      return O::uint32x3;
+    case H::uint4:      return O::uint32x4;
+    case H::sint1:      return O::sint32x1;
+    case H::sint2:      return O::sint32x2;
+    case H::sint3:      return O::sint32x3;
+    case H::sint4:      return O::sint32x4;
+    case H::ushort1:    return O::uint16x1;
+    case H::ushort2:    return O::uint16x2;
+    case H::ushort4:    return O::uint16x4;
+    case H::sshort1:    return O::sint16x1;
+    case H::sshort2:    return O::sint16x2;
+    case H::sshort4:    return O::sint16x4;
+    default:            return O::float3;
+  }
+}
+
+ossia::primitive_topology mapTopology(halp::primitive_topology t) noexcept
+{
+  using H = halp::primitive_topology;
+  using O = ossia::primitive_topology;
+  switch(t)
+  {
+    case H::triangles:      return O::triangles;
+    case H::triangle_strip: return O::triangle_strip;
+    case H::triangle_fan:   return O::triangle_fan;
+    case H::lines:          return O::lines;
+    case H::line_strip:     return O::line_strip;
+    case H::points:         return O::points;
+  }
+  return O::triangles;
+}
+
+ossia::index_format mapIndexFormat(halp::index_format f) noexcept
+{
+  return (f == halp::index_format::uint16) ? ossia::index_format::uint16
+                                           : ossia::index_format::uint32;
+}
+
+// Wrap a halp GPU buffer handle into an ossia::buffer_resource carrying a
+// gpu_buffer_handle (no CPU-side data, no upload). Returns null on a null
+// handle so caller can skip that slot.
+ossia::buffer_resource_ptr
+wrapGpuBuffer(void* handle, int64_t byte_size) noexcept
+{
+  if(!handle)
+    return nullptr;
+  ossia::gpu_buffer_handle gh;
+  gh.native_handle = handle;
+  gh.byte_size = byte_size;
+  gh.byte_offset = 0;
+  auto res = std::make_shared<ossia::buffer_resource>();
+  res->resource = gh;
+  res->dirty_index = 1;
+  return res;
+}
+
+} // namespace
+
+void PBRMesh::operator()()
+{
+  if(m_material_stable_id == 0) m_material_stable_id = ossia::mint_stable_id();
+  if(m_primitive_stable_id == 0) m_primitive_stable_id = ossia::mint_stable_id();
+  if(m_xform_stable_id == 0) m_xform_stable_id = ossia::mint_stable_id();
+
+  const auto& m = inputs.geometry_in.mesh;
+  void* buf0_handle
+      = m.buffers.empty() ? nullptr : m.buffers[0].handle;
+
+  // Identity-caching fast path: skip the rebuild when the input
+  // geometry buffers / counts / textures / factors are all unchanged.
+  const float cur_factors[10]{
+      inputs.base_r.value, inputs.base_g.value, inputs.base_b.value,
+      inputs.base_a.value, inputs.metallic.value, inputs.roughness.value,
+      inputs.em_r.value, inputs.em_g.value, inputs.em_b.value,
+      inputs.em_strength.value};
+  void* cur_tex[4]{
+      inputs.base_color_tex.texture.handle,
+      inputs.metal_rough_tex.texture.handle,
+      inputs.normal_tex.texture.handle,
+      inputs.emissive_tex.texture.handle};
+
+  float scratch[16];
+  CachedTRS xformCache = m_cachedTRS;
+  const bool trs_changed = computeTRSMatrix(inputs, scratch, xformCache);
+
+  // Intentionally NOT gating on `inputs.geometry_in.dirty_mesh`: upstream
+  // CSF compute nodes raise that flag every frame to signal content
+  // changed, but our downstream ScenePreprocessor handles content changes
+  // via its GPU-copy path (which re-fires every runInitialPasses). Only
+  // STRUCTURAL changes — buffer-handle swap, vertex/index count change,
+  // texture-override swap, factor change — need a new scene_state
+  // version; content-only changes keep the cached shared_ptr so
+  // ScenePreprocessor's fingerprint fast-path stays warm and doesn't
+  // rebuild the MDI merge + invalidate downstream pipeline state.
+  const bool inputs_changed
+      = m_cached_buf0 != buf0_handle
+        || m_cached_vertices != m.vertices
+        || m_cached_indices != m.indices
+        || !std::equal(m_cached_tex, m_cached_tex + 4, cur_tex)
+        || !std::equal(m_cached_factors, m_cached_factors + 10, cur_factors);
+
+  if(!inputs_changed && !trs_changed && m_wrapped_state && buf0_handle)
+  {
+    outputs.scene_out.scene.state = m_wrapped_state;
+    outputs.scene_out.dirty = 0;
+    return;
+  }
+  m_cachedTRS = xformCache;
+  m_cached_buf0 = buf0_handle;
+  m_cached_vertices = m.vertices;
+  m_cached_indices = m.indices;
+  std::copy(cur_tex, cur_tex + 4, m_cached_tex);
+  std::copy(cur_factors, cur_factors + 10, m_cached_factors);
+
+  if(!buf0_handle || m.vertices <= 0)
+  {
+    outputs.scene_out.scene = {};
+    m_wrapped_state.reset();
+    return;
+  }
+
+  // Wrap halp buffers → ossia buffer_resources (parallel indexing so
+  // attribute buffer_index resolution is a direct lookup).
+  ossia::small_vector<ossia::buffer_resource_ptr, 4> wrapped_buffers;
+  wrapped_buffers.reserve(m.buffers.size());
+  for(const auto& b : m.buffers)
+    wrapped_buffers.push_back(wrapGpuBuffer(b.handle, b.byte_size));
+
+  // Build one mesh_primitive off the geometry.
+  ossia::mesh_primitive mp;
+  // vertex_buffers parallel to halp's buffers so attr.buffer_index resolves
+  // directly. Leaves nulls in place — attributes whose buffer is null are
+  // filtered out on the attribute walk below.
+  for(const auto& w : wrapped_buffers)
+    if(w)
+      mp.vertex_buffers.push_back(w);
+
+  // Map halp buffer index → mp.vertex_buffers index (we may have dropped
+  // nulls along the way).
+  ossia::small_vector<int, 4> bufRemap;
+  bufRemap.resize(wrapped_buffers.size(), -1);
+  int out_idx = 0;
+  for(std::size_t i = 0; i < wrapped_buffers.size(); ++i)
+  {
+    if(wrapped_buffers[i])
+      bufRemap[i] = out_idx++;
+  }
+
+  for(const auto& attr : m.attributes)
+  {
+    if(attr.binding < 0 || attr.binding >= (int)m.input.size())
+      continue;
+    const auto& in = m.input[attr.binding];
+    if(in.buffer < 0 || in.buffer >= (int)bufRemap.size())
+      continue;
+    const int buf_idx = bufRemap[in.buffer];
+    if(buf_idx < 0)
+      continue;
+
+    ossia::vertex_attribute va;
+    va.semantic = static_cast<ossia::attribute_semantic>(attr.semantic);
+    va.format = mapFormat(attr.format);
+    va.buffer_index = (uint32_t)buf_idx;
+    va.byte_offset = uint32_t(in.byte_offset + attr.byte_offset);
+    // Binding stride governs per-vertex advance; fall back to 0 (tightly
+    // packed single attribute) if the binding entry is missing.
+    va.byte_stride = (attr.binding < (int)m.bindings.size())
+                         ? (uint32_t)m.bindings[attr.binding].stride
+                         : 0u;
+    va.rate = ossia::vertex_attribute::input_rate::per_vertex;
+    mp.attributes.push_back(va);
+  }
+
+  // Index buffer (optional).
+  if(m.index.buffer >= 0 && m.index.buffer < (int)m.buffers.size())
+  {
+    const auto& ib = m.buffers[m.index.buffer];
+    if(ib.handle)
+    {
+      ossia::gpu_buffer_handle gh;
+      gh.native_handle = ib.handle;
+      gh.byte_size = ib.byte_size;
+      gh.byte_offset = m.index.byte_offset;
+      auto ibr = std::make_shared<ossia::buffer_resource>();
+      ibr->resource = gh;
+      ibr->dirty_index = 1;
+      mp.index_buffer = ibr;
+      mp.index_type = mapIndexFormat(m.index.format);
+    }
+  }
+
+  mp.topology = mapTopology(m.topology);
+  mp.stable_id = m_primitive_stable_id;
+  mp.vertex_count = (uint32_t)std::max(0, m.vertices);
+  mp.index_count = (uint32_t)std::max(0, m.indices);
+
+  // Author the material. Factors come from the controls; texture slots
+  // populate the dynamic-handle pathway when the corresponding inlet
+  // carries a non-null handle. The primitive's `material` is bound to
+  // this shared_ptr directly — no index lookup.
+  auto mat = std::make_shared<ossia::material_component>();
+  mat->stable_id = m_material_stable_id;
+  mat->base_color_factor[0] = cur_factors[0];
+  mat->base_color_factor[1] = cur_factors[1];
+  mat->base_color_factor[2] = cur_factors[2];
+  mat->base_color_factor[3] = cur_factors[3];
+  mat->metallic_factor = cur_factors[4];
+  mat->roughness_factor = cur_factors[5];
+  mat->emissive_factor[0] = cur_factors[6];
+  mat->emissive_factor[1] = cur_factors[7];
+  mat->emissive_factor[2] = cur_factors[8];
+  mat->emissive_strength = cur_factors[9];
+
+  auto stamp_tex = [](ossia::texture_ref& dst, void* h) {
+    if(!h)
+      return;
+    dst.texture.native_handle = h;
+    dst.texture.bindless_index = 0;
+    dst.source.reset();
+  };
+  stamp_tex(mat->base_color_texture, cur_tex[0]);
+  stamp_tex(mat->metallic_roughness_texture, cur_tex[1]);
+  stamp_tex(mat->normal_texture, cur_tex[2]);
+  stamp_tex(mat->emissive_texture, cur_tex[3]);
+
+  // Propagate the Material arena slot ref (populated in init()).
+  mat->raw_slot = m_material_ref;
+
+  mp.material = ossia::material_component_ptr(mat);
+
+  auto mesh_comp = std::make_shared<ossia::mesh_component>();
+  mesh_comp->primitives.push_back(std::move(mp));
+
+  // Assemble the single scene_node: TRS first (Loader convention), then
+  // the mesh_component as the second payload. Matches GltfParser's
+  // layout so the built-in TRS controls act on the mesh the same way.
+  ossia::scene_transform xform;
+  xform.stable_id = m_xform_stable_id;
+  xform.translation[0] = inputs.position.value.x;
+  xform.translation[1] = inputs.position.value.y;
+  xform.translation[2] = inputs.position.value.z;
+  auto q = QQuaternion::fromEulerAngles(
+      inputs.rotation.value.x, inputs.rotation.value.y,
+      inputs.rotation.value.z);
+  xform.rotation[0] = q.x();
+  xform.rotation[1] = q.y();
+  xform.rotation[2] = q.z();
+  xform.rotation[3] = q.scalar();
+  xform.scale[0] = inputs.scale.value.x;
+  xform.scale[1] = inputs.scale.value.y;
+  xform.scale[2] = inputs.scale.value.z;
+  // Propagate the RawTransform slot ref (populated in init()).
+  xform.raw_slot = m_xform_ref;
+
+  auto children = std::make_shared<std::vector<ossia::scene_payload>>();
+  children->push_back(xform);
+  children->push_back(ossia::mesh_component_ptr(std::move(mesh_comp)));
+
+  auto node = std::make_shared<ossia::scene_node>();
+  node->children = std::move(children);
+  node->dirty_index = ++m_version_counter;
+
+  auto roots = std::make_shared<std::vector<ossia::scene_node_ptr>>();
+  roots->push_back(std::move(node));
+
+  auto mats = std::make_shared<std::vector<ossia::material_component_ptr>>();
+  mats->push_back(std::move(mat));
+
+  auto state = std::make_shared<ossia::scene_state>();
+  state->roots = std::move(roots);
+  state->materials = std::move(mats);
+  state->version = m_version_counter;
+  state->dirty_index = m_version_counter;
+
+  m_wrapped_state = std::move(state);
+  outputs.scene_out.scene.state = m_wrapped_state;
+  outputs.scene_out.dirty = 0xFF;
+}
+
+void PBRMesh::init(score::gfx::RenderList& r, QRhiResourceUpdateBatch& res)
+{
+  // One slot in the Material arena per PBRMesh for its lifetime. Seeded
+  // with default-constructed MaterialGPU bytes so any reader that samples
+  // the slot before the first update() sees a neutral white material
+  // rather than undefined memory.
+  if(!material_slot.valid())
+  {
+    material_slot = r.registry().allocate(
+        score::gfx::GpuResourceRegistry::Arena::Material,
+        sizeof(score::gfx::MaterialGPU));
+    m_material_ref = r.registry().toOssiaRef(material_slot);
+  }
+  if(material_slot.valid())
+  {
+    score::gfx::MaterialGPU seed{};
+    r.registry().updateSlot(res, material_slot, &seed, sizeof(seed));
+  }
+  if(!raw_transform_slot.valid())
+  {
+    raw_transform_slot = r.registry().allocate(
+        score::gfx::GpuResourceRegistry::Arena::RawTransform,
+        sizeof(score::gfx::RawLocalTransform));
+    m_xform_ref = r.registry().toOssiaRef(raw_transform_slot);
+  }
+  if(raw_transform_slot.valid())
+  {
+    score::gfx::RawLocalTransform seed{};
+    r.registry().updateSlot(res, raw_transform_slot, &seed, sizeof(seed));
+  }
+}
+
+void PBRMesh::update(
+    score::gfx::RenderList& r, QRhiResourceUpdateBatch& res,
+    score::gfx::Edge*)
+{
+  // Pack control-snapshot factor values into the slot, and — when a
+  // runtime GPU handle is wired on one of the four channels — register
+  // a dynamic slot in the registry and stamp tex_ref_dynamic(slot) into
+  // the slot's textureRefs[]. Producer-authored refs agree with the
+  // preprocessor's own rebuildDynamicSlots because both go through the
+  // same shared registry map — idempotent.
+  if(!material_slot.valid())
+    return;
+
+  score::gfx::MaterialGPU gpu{};
+  gpu.baseColor[0] = inputs.base_r.value;
+  gpu.baseColor[1] = inputs.base_g.value;
+  gpu.baseColor[2] = inputs.base_b.value;
+  gpu.baseColor[3] = inputs.base_a.value;
+  gpu.metallicRoughnessOcclusionUnlit[0] = inputs.metallic.value;
+  gpu.metallicRoughnessOcclusionUnlit[1] = inputs.roughness.value;
+  gpu.metallicRoughnessOcclusionUnlit[2] = 1.f;
+  gpu.metallicRoughnessOcclusionUnlit[3] = 0.f;
+  gpu.emissive_strength[0] = inputs.em_r.value;
+  gpu.emissive_strength[1] = inputs.em_g.value;
+  gpu.emissive_strength[2] = inputs.em_b.value;
+  gpu.emissive_strength[3] = inputs.em_strength.value;
+
+  using Ch = score::gfx::GpuResourceRegistry::TextureChannel;
+  uint32_t fm = 0u;
+  using namespace score::gfx::material_feature;
+  auto stamp_dyn = [&](Ch ch, void* handle, int idx, uint32_t feature_bit) {
+    if(!handle)
+      return;
+    const int slot = r.registry().resolveDynamicSlot(ch, handle);
+    if(slot < 0)
+      return;
+    gpu.textureRefs[idx] = score::gfx::tex_ref_dynamic((uint32_t)slot);
+    fm |= feature_bit;
+  };
+  stamp_dyn(Ch::BaseColor,  inputs.base_color_tex.texture.handle,   0, has_base_color_texture);
+  stamp_dyn(Ch::MetalRough, inputs.metal_rough_tex.texture.handle,  1, has_metal_rough_texture);
+  stamp_dyn(Ch::Normal,     inputs.normal_tex.texture.handle,       2, has_normal_texture);
+  stamp_dyn(Ch::Emissive,   inputs.emissive_tex.texture.handle,     3, has_emissive_texture);
+
+  // PBRMesh is lit PBR (unlit flag not exposed), fully opaque by default.
+  // No extension lobes wired through the current control surface. As
+  // extension support grows on this node we OR additional feature bits.
+  gpu.feature_mask = fm;
+  // hit_group_id stays 0 = standard lit; RT pipeline build will swap in
+  // a mask-specific index when relevant.
+
+  r.registry().updateSlot(res, material_slot, &gpu, sizeof(gpu));
+
+  if(raw_transform_slot.valid())
+  {
+    score::gfx::RawLocalTransform xform{};
+    xform.translation[0] = inputs.position.value.x;
+    xform.translation[1] = inputs.position.value.y;
+    xform.translation[2] = inputs.position.value.z;
+    QQuaternion q = QQuaternion::fromEulerAngles(
+        inputs.rotation.value.x, inputs.rotation.value.y,
+        inputs.rotation.value.z);
+    xform.rotation[0] = q.x();
+    xform.rotation[1] = q.y();
+    xform.rotation[2] = q.z();
+    xform.rotation[3] = q.scalar();
+    xform.scale[0] = inputs.scale.value.x;
+    xform.scale[1] = inputs.scale.value.y;
+    xform.scale[2] = inputs.scale.value.z;
+    r.registry().updateSlot(res, raw_transform_slot, &xform, sizeof(xform));
+  }
+}
+
+void PBRMesh::release(score::gfx::RenderList& r)
+{
+  if(material_slot.valid())
+    r.registry().free(material_slot);
+  if(raw_transform_slot.valid())
+    r.registry().free(raw_transform_slot);
+  m_material_ref = {};
+  m_xform_ref = {};
+  // Producer-state-drift Option A — see Light::release.
+  m_wrapped_state.reset();
+}
+
+} // namespace Threedim
diff --git a/src/plugins/score-plugin-threedim/Threedim/PBRMesh.hpp b/src/plugins/score-plugin-threedim/Threedim/PBRMesh.hpp
new file mode 100644
index 0000000000..fb54f01df3
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/PBRMesh.hpp
@@ -0,0 +1,156 @@
+#pragma once
+#include "TransformHelper.hpp"
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <Gfx/Graph/GpuResourceRegistry.hpp>
+
+#include <Threedim/TinyObj.hpp>
+#include <halp/controls.hpp>
+#include <halp/geometry.hpp>
+#include <halp/meta.hpp>
+#include <halp/texture.hpp>
+
+#include <cstdint>
+#include <memory>
+
+class QRhiResourceUpdateBatch;
+
+namespace score::gfx
+{
+class RenderList;
+struct Edge;
+}
+
+namespace Threedim
+{
+
+// Wraps a GPU-resident geometry (the output of a compute-shader framework
+// node — `halp::dynamic_gpu_geometry`) as a one-node scene_spec with a
+// PBR material attached. The bridge between "CSF produces a geometry"
+// and "scene-graph pipeline consumes scene_spec".
+//
+// Typical wiring:
+//   CSFNode(mesh_out) → PBRMesh(mesh_in, texture_in) → ScenePreprocessor
+//
+// The node emits a single scene_node at the root holding:
+//   - a scene_transform built from the TRS controls
+//   - a mesh_component wrapping the GPU geometry into one mesh_primitive
+//   - a direct material_component_ptr (also published into the scene's
+//     here: one material_component carrying the factor controls + any
+//     wired-in runtime textures)
+//
+// Texture inputs route through the Dynamic Texture pathway in
+// ScenePreprocessor: non-null handles become `*Dyn<slot>` auxiliary-texture
+// bindings that classic_pbr_full samples directly, no CPU upload, no
+// array-layer copy. Unwired inputs fall through to the scalar factors.
+class PBRMesh
+{
+public:
+  halp_meta(name, "PBR Mesh")
+  halp_meta(category, "Visuals/3D/Scene")
+  halp_meta(c_name, "pbr_mesh")
+  halp_meta(authors, "ossia team")
+  halp_meta(
+      manual_url, "https://ossia.io/score-docs/processes/pbr-mesh.html")
+  halp_meta(uuid, "d7a2f5c9-3e8b-4b1d-a6f2-5c8e9d1f3b7a")
+
+  struct ins
+  {
+    struct
+    {
+      halp_meta(name, "Mesh");
+      halp::dynamic_gpu_geometry mesh;
+      float transform[16]{};
+      bool dirty_mesh = false;
+      bool dirty_transform = false;
+    } geometry_in;
+
+    // Texture slots. Non-null handle → emitted as a dynamic texture
+    // on the material; null → shader falls back to the scalar factor.
+    halp::gpu_texture_input<"Base Color Tex"> base_color_tex;
+    halp::gpu_texture_input<"Metal Rough Tex"> metal_rough_tex;
+    halp::gpu_texture_input<"Normal Tex"> normal_tex;
+    halp::gpu_texture_input<"Emissive Tex"> emissive_tex;
+
+    // PBR factors — used as-is by the material (no per-factor toggle:
+    // defaults here match glTF defaults, so "untouched" controls produce
+    // a reasonable neutral material).
+    halp::hslider_f32<"Color R", halp::range{0., 1., 1.}> base_r;
+    halp::hslider_f32<"Color G", halp::range{0., 1., 1.}> base_g;
+    halp::hslider_f32<"Color B", halp::range{0., 1., 1.}> base_b;
+    halp::hslider_f32<"Color A", halp::range{0., 1., 1.}> base_a;
+    halp::hslider_f32<"Metallic", halp::range{0., 1., 0.}> metallic;
+    halp::hslider_f32<"Roughness", halp::range{0., 1., 0.5}> roughness;
+    halp::hslider_f32<"Emissive R", halp::range{0., 10., 0.}> em_r;
+    halp::hslider_f32<"Emissive G", halp::range{0., 10., 0.}> em_g;
+    halp::hslider_f32<"Emissive B", halp::range{0., 10., 0.}> em_b;
+    halp::hslider_f32<"Emissive strength", halp::range{0., 10., 1.}> em_strength;
+
+    // Root-node placement. Same TRS controls as Transform3D / Instancer
+    // so the node stands alone without a separate transform upstream.
+    PositionControl position;
+    RotationControl rotation;
+    ScaleControl scale;
+  } inputs;
+
+  struct outs
+  {
+    struct
+    {
+      halp_meta(name, "Scene Out");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_out;
+  } outputs;
+
+  void operator()();
+
+  // Render-thread hooks. init allocates a Material arena slot and seeds
+  // it with default bytes; update packs the factor fields from the
+  // control inputs into a MaterialGPU and uploads to the slot; release
+  // returns the slot. Texture references (textureRefs[]) are left at
+  // tex_ref_none() here — the preprocessor resolves those during its
+  // material-channel upload pass because only it knows the per-channel
+  // dynamic-slot / static-layer assignments for the upstream handles.
+  void init(score::gfx::RenderList& r, QRhiResourceUpdateBatch& res);
+  void update(
+      score::gfx::RenderList& r, QRhiResourceUpdateBatch& res,
+      score::gfx::Edge* e);
+  void release(score::gfx::RenderList& r);
+
+  // Republished stable shared_ptr when nothing changed, so ScenePreprocessor's
+  // identity/fingerprint caches stay warm.
+  std::shared_ptr<const ossia::scene_state> m_wrapped_state;
+  CachedTRS m_cachedTRS{};
+
+  // Identity cache: upstream mesh-buffer handles + vertex/index count +
+  // texture handles + factor values. Dirty if any change.
+  void* m_cached_buf0{};
+  int64_t m_cached_vertices{-1};
+  int64_t m_cached_indices{-1};
+  void* m_cached_tex[4]{};
+  float m_cached_factors[10]{};
+  int64_t m_version_counter{0};
+
+  // Stable ids minted once on first rebuild and reused across every
+  // subsequent rebuild so downstream fingerprint / SER / BVH caches stay
+  // identity-stable.
+  uint64_t m_material_stable_id{};
+  uint64_t m_primitive_stable_id{};
+  uint64_t m_xform_stable_id{};
+
+  // Slots: one in the Material arena, one in RawTransform for the
+  // emitted scene_transform. Allocated in init(), written in update(),
+  // freed in release().
+  score::gfx::GpuResourceRegistry::Slot material_slot;
+  score::gfx::GpuResourceRegistry::Slot raw_transform_slot;
+
+  // Ossia-facing snapshots. Written once in init() on the render
+  // thread; copied onto the emitted material_component /
+  // scene_transform raw_slot in operator()().
+  ossia::gpu_slot_ref m_material_ref{};
+  ossia::gpu_slot_ref m_xform_ref{};
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/PCLToGeometry.cpp b/src/plugins/score-plugin-threedim/Threedim/PCLToGeometry.cpp
index e2a7681012..1a4ea47f5c 100644
--- a/src/plugins/score-plugin-threedim/Threedim/PCLToGeometry.cpp
+++ b/src/plugins/score-plugin-threedim/Threedim/PCLToGeometry.cpp
@@ -1,60 +1,11 @@
 #include "PCLToGeometry.hpp"
 
-#include <Threedim/MeshHelpers.hpp>
 #include <Threedim/TinyObj.hpp>
-#include <rnd/random.hpp>
 
 #include <QDebug>
-#include <QString>
 
 namespace Threedim
 {
-PCLToMesh::PCLToMesh()
-{
-  rebuild_transform(inputs, outputs);
-  outputs.geometry.dirty_mesh = true;
-}
-
-void PCLToMesh::operator()()
-{
-  auto& tex = this->inputs.in.buffer;
-  if (!tex.changed)
-    return;
-
-  float* data = reinterpret_cast<float*>(tex.raw_data);
-  create_mesh(std::span<float>(data, tex.byte_size / sizeof(float)));
-}
-
-void PCLToMesh::create_mesh(std::span<float> v)
-{
-  {
-    // std::size_t vertices = v.size() / 3;
-
-    // this->complete.clear();
-    // this->complete.resize(std::ceil((v.size() / 3.) * (3 + 3 + 2)));
-    // std::copy_n(v.begin(), v.size(), complete.begin());
-
-    // auto& pch = rnd::fast_random_device();
-    //    this->complete.resize(6 * 25000);
-    //    for (float& v : this->complete)
-    //      v = std::uniform_real_distribution<>{0.f, 1.f}(pch);
-
-    auto prev_size = outputs.geometry.mesh.buffers.main_buffer.element_count;
-    const bool changed = v.size() != prev_size; // FIXME
-    //complete.assign(v.begin(), v.end());
-
-    outputs.geometry.mesh.buffers.main_buffer.elements
-        = (float*)this->inputs.in.buffer.raw_data; //complete.data();
-    outputs.geometry.mesh.buffers.main_buffer.element_count
-        = this->inputs.in.buffer.byte_size / sizeof(float); //complete.size();
-    outputs.geometry.mesh.buffers.main_buffer.dirty = true;
-
-    outputs.geometry.mesh.input.input0.byte_offset = 0;
-    outputs.geometry.mesh.vertices = v.size() / 6;
-    outputs.geometry.dirty_mesh = true; // FIXME
-  }
-}
-
 
 PCLToMesh2::PCLToMesh2()
 {
diff --git a/src/plugins/score-plugin-threedim/Threedim/PCLToGeometry.hpp b/src/plugins/score-plugin-threedim/Threedim/PCLToGeometry.hpp
index d38908204d..48d7f08460 100644
--- a/src/plugins/score-plugin-threedim/Threedim/PCLToGeometry.hpp
+++ b/src/plugins/score-plugin-threedim/Threedim/PCLToGeometry.hpp
@@ -1,121 +1,14 @@
 #pragma once
 
-#include <ossia/detail/pod_vector.hpp>
-
-#include <boost/container/vector.hpp>
-
 #include <Threedim/TinyObj.hpp>
 #include <halp/buffer.hpp>
 #include <halp/controls.hpp>
 #include <halp/geometry.hpp>
 #include <halp/meta.hpp>
 
-namespace halp
-{
-
-struct position_gpu_geometry
-{
-  struct buffers
-  {
-    struct
-    {
-      enum
-      {
-        dynamic,
-        vertex
-      };
-      void* handle{};
-      int size{};
-      bool dirty{};
-    } main_buffer;
-  } buffers;
-
-  struct bindings
-  {
-    struct
-    {
-      enum
-      {
-        per_vertex
-      };
-      int stride = 3 * sizeof(float);
-      int step_rate = 1;
-    } position_binding;
-  };
-
-  struct attributes
-  {
-    struct
-    {
-      enum
-      {
-        position
-      };
-      using datatype = float[3];
-      int32_t offset = 0;
-      int32_t binding = 0;
-    } position;
-  };
-
-  struct
-  {
-    struct
-    {
-      static constexpr auto buffer() { return &buffers::main_buffer; }
-      int offset = 0;
-    } input0;
-  } input;
-
-  int vertices = 0;
-  enum
-  {
-    triangles,
-    counter_clockwise,
-    cull_back
-  };
-};
-
-}
 namespace Threedim
 {
 
-class PCLToMesh
-{
-public:
-  halp_meta(name, "Pointcloud to mesh")
-  halp_meta(category, "Visuals/Meshes")
-  halp_meta(c_name, "pointcloud_to_mesh")
-  halp_meta(manual_url, "https://ossia.io/score-docs/processes/pointcloud-to-mesh.html")
-  halp_meta(uuid, "2450ffbf-04ed-4b42-8848-69f200d2742a")
-
-  struct ins
-  {
-    halp::cpu_buffer_input<"Buffer"> in;
-    PositionControl position;
-    RotationControl rotation;
-    ScaleControl scale;
-  } inputs;
-
-  struct
-  {
-    struct
-    {
-      halp_meta(name, "Geometry");
-      halp::position_color_packed_geometry mesh;
-      float transform[16]{};
-      bool dirty_mesh = false;
-      bool dirty_transform = false;
-    } geometry;
-  } outputs;
-
-  PCLToMesh();
-  void create_mesh(std::span<float> v);
-  void operator()();
-
-  std::vector<float> complete;
-};
-
-
 class PCLToMesh2
 {
 public:
diff --git a/src/plugins/score-plugin-threedim/Threedim/Primitive.hpp b/src/plugins/score-plugin-threedim/Threedim/Primitive.hpp
index 9a894a567f..e79c4181b5 100644
--- a/src/plugins/score-plugin-threedim/Threedim/Primitive.hpp
+++ b/src/plugins/score-plugin-threedim/Threedim/Primitive.hpp
@@ -1,5 +1,7 @@
 #pragma once
 
+#include "TransformHelper.hpp"
+
 #include <Threedim/TinyObj.hpp>
 #include <halp/audio.hpp>
 #include <halp/geometry.hpp>
@@ -13,9 +15,20 @@ struct Primitive
   halp_meta(author, "Jean-Michaël Celerier, vcglib")
   halp_meta(manual_url, "https://ossia.io/score-docs/processes/meshes.html#primitive")
 
+  // Derived classes' operator() calls this with their own inputs +
+  // geometry output to propagate the position/rotation/scale controls
+  // into the transform[16] slot + dirty_transform flag. Only sets
+  // dirty_transform when the matrix actually changes vs last frame.
+  template <typename In, typename Out>
+  void apply_transform(const In& in, Out& out)
+  {
+    out.dirty_transform = computeTRSMatrix(in, out.transform, m_cachedTRS);
+  }
+
   void operator()() { }
   PrimitiveOutputs outputs;
   std::vector<float> complete;
+  CachedTRS m_cachedTRS{};
 };
 
 // Plane is a special case due to needing a different geometry type
@@ -53,9 +66,14 @@ struct Plane
 
   void prepare(halp::setup) { update(); }
   void update();
-  void operator()() { }
+  void operator()()
+  {
+    outputs.geometry.dirty_transform
+        = computeTRSMatrix(inputs, outputs.geometry.transform, m_cachedTRS);
+  }
 
   std::vector<float> complete;
+  CachedTRS m_cachedTRS{};
 };
 
 struct Cube : Primitive
@@ -74,6 +92,7 @@ struct Cube : Primitive
 
   void prepare(halp::setup) { update(); }
   void update();
+  void operator()() { apply_transform(inputs, outputs.geometry); }
 };
 
 struct Sphere : Primitive
@@ -97,6 +116,7 @@ struct Sphere : Primitive
 
   void prepare(halp::setup) { update(); }
   void update();
+  void operator()() { apply_transform(inputs, outputs.geometry); }
 };
 
 struct Icosahedron : Primitive
@@ -114,6 +134,7 @@ struct Icosahedron : Primitive
 
   void prepare(halp::setup) { update(); }
   void update();
+  void operator()() { apply_transform(inputs, outputs.geometry); }
 };
 
 struct Cone : Primitive
@@ -151,6 +172,7 @@ struct Cone : Primitive
 
   void prepare(halp::setup) { update(); }
   void update();
+  void operator()() { apply_transform(inputs, outputs.geometry); }
 };
 
 struct Cylinder : Primitive
@@ -178,6 +200,7 @@ struct Cylinder : Primitive
 
   void prepare(halp::setup) { update(); }
   void update();
+  void operator()() { apply_transform(inputs, outputs.geometry); }
 };
 
 struct Torus : Primitive
@@ -215,6 +238,7 @@ struct Torus : Primitive
 
   void prepare(halp::setup) { update(); }
   void update();
+  void operator()() { apply_transform(inputs, outputs.geometry); }
 };
 
 }
diff --git a/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/FormatOverride.cpp b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/FormatOverride.cpp
new file mode 100644
index 0000000000..14ba2d1d4b
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/FormatOverride.cpp
@@ -0,0 +1,98 @@
+#include "FormatOverride.hpp"
+
+#include <ossia/detail/variant.hpp>
+
+#include <functional>
+#include <utility>
+
+namespace Threedim::PrimitiveCloud
+{
+
+namespace
+{
+
+// Recursively rewrites primitive_cloud_components inside a scene_node's
+// children list. Returns a fresh scene_node shared_ptr when something
+// was rewritten (or a nested scene_node was rewritten), the original
+// otherwise — so unchanged subtrees keep their identity for downstream
+// fingerprinting.
+ossia::scene_node_ptr rewriteNode(
+    const ossia::scene_node_ptr& in, std::string_view override)
+{
+  if(!in || !in->children || in->children->empty())
+    return in;
+
+  bool any_rewrite = false;
+  std::vector<ossia::scene_payload> fresh_children;
+  fresh_children.reserve(in->children->size());
+
+  for(const auto& payload : *in->children)
+  {
+    if(auto* pc = ossia::get_if<ossia::primitive_cloud_component_ptr>(&payload))
+    {
+      if(*pc && (*pc)->format_id != override)
+      {
+        auto fresh = std::make_shared<ossia::primitive_cloud_component>(**pc);
+        fresh->format_id = std::string{override};
+        fresh_children.emplace_back(
+            ossia::primitive_cloud_component_ptr{std::move(fresh)});
+        any_rewrite = true;
+        continue;
+      }
+    }
+    else if(auto* sn = ossia::get_if<ossia::scene_node_ptr>(&payload))
+    {
+      auto rewritten = rewriteNode(*sn, override);
+      if(rewritten.get() != sn->get())
+      {
+        fresh_children.emplace_back(std::move(rewritten));
+        any_rewrite = true;
+        continue;
+      }
+    }
+    fresh_children.emplace_back(payload);
+  }
+
+  if(!any_rewrite)
+    return in;
+
+  auto fresh = std::make_shared<ossia::scene_node>(*in);
+  fresh->children = std::make_shared<std::vector<ossia::scene_payload>>(
+      std::move(fresh_children));
+  return fresh;
+}
+
+} // namespace
+
+std::shared_ptr<ossia::scene_state> applyFormatOverride(
+    std::shared_ptr<const ossia::scene_state> state, std::string_view override)
+{
+  if(!state)
+    return nullptr;
+  if(override.empty())
+    return std::const_pointer_cast<ossia::scene_state>(state);
+
+  auto out = std::make_shared<ossia::scene_state>(*state);
+
+  if(state->roots && !state->roots->empty())
+  {
+    auto fresh_roots = std::make_shared<std::vector<ossia::scene_node_ptr>>();
+    fresh_roots->reserve(state->roots->size());
+    bool any_rewrite = false;
+    for(const auto& root : *state->roots)
+    {
+      auto rewritten = rewriteNode(root, override);
+      if(rewritten.get() != root.get())
+        any_rewrite = true;
+      fresh_roots->push_back(std::move(rewritten));
+    }
+    if(any_rewrite)
+      out->roots = std::move(fresh_roots);
+  }
+
+  out->version = state->version + 1;
+  out->dirty_index = state->dirty_index + 1;
+  return out;
+}
+
+} // namespace Threedim::PrimitiveCloud
diff --git a/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/FormatOverride.hpp b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/FormatOverride.hpp
new file mode 100644
index 0000000000..19698b4926
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/FormatOverride.hpp
@@ -0,0 +1,34 @@
+#pragma once
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <memory>
+#include <string_view>
+
+namespace Threedim::PrimitiveCloud
+{
+
+// Shallow-clones `state` and rewrites every primitive_cloud_component
+// reachable through the scene tree to carry `override` as its
+// `format_id`. Heavy fields (raw_data buffer_resource, extra_buffers,
+// bounds, …) are shared via shared_ptr — no GPU upload duplicates.
+//
+// Used by AssetLoader's "Format override" line edit and the TagAs
+// pass-through node so unrecognised PLY columns / procedural producers
+// without an autodetected format_id can still be routed by a
+// FlattenedSceneFilterNode in mode 12 (format_id == match_str).
+//
+// `override.empty()` returns the input verbatim (`const_pointer_cast`
+// to drop the const, but no actual mutation is performed). A null
+// `state` returns null. Otherwise the returned shared_ptr is freshly
+// allocated; its `version` and `dirty_index` are bumped by 1 so
+// downstream change-detection sees a fresh frame.
+//
+// Walks scene_node children recursively. Nested scene_node_ptr inside
+// children is itself deep-cloned so the rewrite is leak-free for the
+// const tree shape.
+std::shared_ptr<ossia::scene_state> applyFormatOverride(
+    std::shared_ptr<const ossia::scene_state> state,
+    std::string_view override);
+
+} // namespace Threedim::PrimitiveCloud
diff --git a/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/PlyParser.cpp b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/PlyParser.cpp
new file mode 100644
index 0000000000..a6e5de3ea0
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/PlyParser.cpp
@@ -0,0 +1,276 @@
+#include "PlyParser.hpp"
+
+#include <miniply.h>
+
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+#include <string>
+#include <vector>
+
+namespace Threedim::PrimitiveCloud
+{
+
+namespace
+{
+
+// Standard mesh column set. A PLY whose vertex element has only these
+// columns (and a face element) is a regular triangle mesh and goes
+// through the existing AssetLoader mesh path.
+bool is_mesh_column(std::string_view name) noexcept
+{
+  static constexpr std::string_view mesh_cols[] = {
+      "x", "y", "z",
+      "nx", "ny", "nz",
+      "red", "green", "blue", "alpha",
+      "r", "g", "b", "a",
+      "s", "t", "u", "v",
+      "texture_u", "texture_v",
+  };
+  for(auto c : mesh_cols)
+    if(name == c)
+      return true;
+  return false;
+}
+
+// Bytes per PLY scalar type. Lists aren't supported on the splat path
+// (caller filters them out) so countType is irrelevant here.
+uint32_t byte_size_for(miniply::PLYPropertyType t) noexcept
+{
+  using PT = miniply::PLYPropertyType;
+  switch(t)
+  {
+    case PT::Char:   case PT::UChar:  return 1;
+    case PT::Short:  case PT::UShort: return 2;
+    case PT::Int:    case PT::UInt:   return 4;
+    case PT::Float:                    return 4;
+    case PT::Double:                   return 8;
+    default: return 0;
+  }
+}
+
+// Round `v` up to the next multiple of `align` (a power of two).
+uint32_t align_up(uint32_t v, uint32_t align) noexcept
+{
+  return (v + (align - 1)) & ~(align - 1);
+}
+
+// Detect whether the vertex element looks like a splat. Returns true
+// if it carries any column NOT in the standard mesh set OR if there
+// is no `face` element in the file.
+bool detect_splat_shape(miniply::PLYReader& reader)
+{
+  bool has_face = false;
+  bool has_extra = false;
+
+  for(uint32_t i = 0, end = reader.num_elements(); i < end; ++i)
+  {
+    auto* el = reader.get_element(i);
+    if(!el) continue;
+    if(el->name == "face")
+    {
+      has_face = true;
+      continue;
+    }
+    if(el->name == miniply::kPLYVertexElement)
+    {
+      for(auto& p : el->properties)
+      {
+        // List columns aren't a splat thing — skip.
+        if(p.countType != miniply::PLYPropertyType::None)
+          continue;
+        if(!is_mesh_column(p.name))
+        {
+          has_extra = true;
+          break;
+        }
+      }
+    }
+  }
+  return has_extra || !has_face;
+}
+
+// Recognise a known column-name fingerprint and return the canonical
+// format_id. Empty result means "unknown / wired by hand".
+std::string detect_format_id(const miniply::PLYElement& vtx)
+{
+  bool has_f_dc = false;
+  bool has_f_rest = false;
+  bool has_scale = false;
+  bool has_rot = false;
+  bool has_opacity = false;
+  for(auto& p : vtx.properties)
+  {
+    if(p.countType != miniply::PLYPropertyType::None)
+      continue;
+    const auto& n = p.name;
+    if(n == "f_dc_0" || n == "f_dc_1" || n == "f_dc_2") has_f_dc = true;
+    else if(n.rfind("f_rest_", 0) == 0) has_f_rest = true;
+    else if(n == "scale_0" || n == "scale_1" || n == "scale_2") has_scale = true;
+    else if(n == "rot_0" || n == "rot_1" || n == "rot_2" || n == "rot_3") has_rot = true;
+    else if(n == "opacity") has_opacity = true;
+  }
+  if(has_f_dc && has_f_rest && has_scale && has_rot && has_opacity)
+    return "3dgs.classic";
+  return {};
+}
+
+} // namespace
+
+bool ply_is_splat_shaped(std::string_view path)
+{
+  // miniply::PLYReader expects a NUL-terminated path. string_view from
+  // halp::file_port::filename is null-terminated in practice but not
+  // guaranteed; copy to be safe.
+  std::string p{path};
+  miniply::PLYReader reader(p.c_str());
+  if(!reader.valid())
+    return false;
+  return detect_splat_shape(reader);
+}
+
+ossia::primitive_cloud_component_ptr parse_ply(std::string_view path)
+{
+  std::string p{path};
+  miniply::PLYReader reader(p.c_str());
+  if(!reader.valid())
+    return nullptr;
+
+  if(!detect_splat_shape(reader))
+    return nullptr;
+
+  // Walk to the vertex element.
+  while(reader.has_element())
+  {
+    if(!reader.element_is(miniply::kPLYVertexElement))
+    {
+      reader.next_element();
+      continue;
+    }
+    if(!reader.load_element())
+      return nullptr;
+    break;
+  }
+  if(!reader.has_element())
+    return nullptr;
+
+  const auto* vtx = reader.element();
+  if(!vtx)
+    return nullptr;
+  const uint32_t N = reader.num_rows();
+  if(N == 0)
+    return nullptr;
+
+  // Skip list columns: not part of the splat schema. We collect the
+  // scalar-only column subset and lay them out tightly in row order.
+  // The conventional layout is: each scalar at its natural alignment,
+  // row stride padded to 4 (almost every splat PLY is all-float so
+  // this is essentially "sum of bytes per column"; we do the more
+  // conservative thing for mixed-type files).
+  struct Col
+  {
+    uint32_t prop_idx;
+    miniply::PLYPropertyType type;
+    uint32_t offset_in_row;
+    uint32_t size;
+    std::string name;
+  };
+  std::vector<Col> cols;
+  cols.reserve(vtx->properties.size());
+
+  uint32_t row_offset = 0;
+  uint32_t row_align = 1;
+  for(uint32_t i = 0; i < (uint32_t)vtx->properties.size(); ++i)
+  {
+    const auto& p = vtx->properties[i];
+    if(p.countType != miniply::PLYPropertyType::None)
+      continue; // list — skip
+    const uint32_t sz = byte_size_for(p.type);
+    if(sz == 0)
+      continue;
+    row_offset = align_up(row_offset, sz);
+    cols.push_back(Col{i, p.type, row_offset, sz, p.name});
+    row_offset += sz;
+    if(sz > row_align)
+      row_align = sz;
+  }
+  if(cols.empty())
+    return nullptr;
+  const uint32_t row_stride = align_up(row_offset, row_align);
+
+  // Allocate the packed row buffer. shared_ptr<uint8_t[]> wraps the
+  // storage; the buffer_resource keeps it alive via its data field.
+  const std::size_t bytes = std::size_t(N) * row_stride;
+  auto storage = std::shared_ptr<uint8_t[]>(new uint8_t[bytes]());
+
+  // Extract each scalar column at its row offset.
+  for(const auto& c : cols)
+  {
+    uint32_t idx = c.prop_idx;
+    reader.extract_properties_with_stride(
+        &idx, 1, c.type,
+        storage.get() + c.offset_in_row, row_stride);
+  }
+
+  // AABB: find x/y/z by name, read each position from the packed buffer.
+  ossia::aabb bounds{};
+  bounds.min[0] = bounds.min[1] = bounds.min[2] = 1.f;
+  bounds.max[0] = bounds.max[1] = bounds.max[2] = -1.f;
+  {
+    const Col* cx = nullptr; const Col* cy = nullptr; const Col* cz = nullptr;
+    for(const auto& c : cols)
+    {
+      if(c.name == "x") cx = &c;
+      else if(c.name == "y") cy = &c;
+      else if(c.name == "z") cz = &c;
+    }
+    if(cx && cy && cz
+       && cx->type == miniply::PLYPropertyType::Float
+       && cy->type == miniply::PLYPropertyType::Float
+       && cz->type == miniply::PLYPropertyType::Float)
+    {
+      const uint8_t* base = storage.get();
+      for(uint32_t i = 0; i < N; ++i)
+      {
+        float x, y, z;
+        std::memcpy(&x, base + i * row_stride + cx->offset_in_row, sizeof(float));
+        std::memcpy(&y, base + i * row_stride + cy->offset_in_row, sizeof(float));
+        std::memcpy(&z, base + i * row_stride + cz->offset_in_row, sizeof(float));
+        bounds.expand(x, y, z);
+      }
+    }
+  }
+
+  // Wrap as a buffer_resource. Storage uses storage_buffer usage so
+  // ScenePreprocessor uploads it as an SSBO.
+  auto br = std::make_shared<ossia::buffer_resource>();
+  br->resource = ossia::buffer_data{
+      .data = std::shared_ptr<const void>(storage, storage.get()),
+      .byte_size = (int64_t)bytes,
+      .usage_hint = ossia::buffer_data::usage::storage_buffer};
+  br->content_hash = (uint64_t)(uintptr_t)storage.get();
+
+  auto out = std::make_shared<ossia::primitive_cloud_component>();
+  out->raw_data = std::move(br);
+  out->row_stride = row_stride;
+  out->primitive_count = N;
+  out->topology = ossia::primitive_topology::points;
+  out->format_id = detect_format_id(*vtx);
+  // For known formats, name the per-row struct so ScenePreprocessor
+  // exposes raw_data as a per-vertex `splat: <Type>` ATTRIBUTE and the
+  // CSF can declare a matching TYPES entry. Empty falls back to the
+  // legacy AUXILIARY raw_splats path.
+  if(out->format_id == "3dgs.classic")
+    out->struct_type_name = "Splat3DGS";
+  out->bounds = bounds;
+  out->stable_id = ossia::mint_stable_id();
+
+  // (format_params left empty for v1: format CSF authors declare the
+  // LAYOUT block themselves matching the PLY column order. Adding a
+  // reflective column-table here later is a pure addition — no
+  // consumer depends on its absence.)
+
+  return out;
+}
+
+} // namespace Threedim::PrimitiveCloud
diff --git a/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/PlyParser.hpp b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/PlyParser.hpp
new file mode 100644
index 0000000000..0dad735832
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/PlyParser.hpp
@@ -0,0 +1,34 @@
+#pragma once
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <span>
+#include <string_view>
+
+namespace Threedim::PrimitiveCloud
+{
+
+// Cheap header-only sniff: is the PLY file at `path` shaped like a
+// primitive cloud (no `face` element, or has columns outside the
+// standard mesh set {x,y,z,nx,ny,nz,red,green,blue,alpha,s,t,u,v})?
+// Reads only the textual header, doesn't load row data.
+bool ply_is_splat_shaped(std::string_view path);
+
+// Parse `path` and produce a primitive_cloud_component. The component's
+// raw_data is a single tightly-packed buffer of the PLY rows: each row
+// is a struct of the columns in their PLY-declared order, std430-style
+// natural alignment (each float at +4, each int at +4, each uchar at
+// +1 with no inter-field padding — but the row stride is rounded to
+// the largest field alignment within the row, see
+// internal::row_stride_for).
+//
+// Returns nullptr if the PLY is not splat-shaped, or if parsing fails.
+//
+// Sets format_id to a recognized signature when columns match a known
+// fingerprint:
+//   - has f_dc_0/1/2 + f_rest_* + scale_0/1/2 + rot_0/1/2/3 + opacity
+//     -> "3dgs.classic"
+//   - else empty (the user wires the chain by hand or saves a preset)
+ossia::primitive_cloud_component_ptr parse_ply(std::string_view path);
+
+} // namespace Threedim::PrimitiveCloud
diff --git a/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SceneFromCloud.cpp b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SceneFromCloud.cpp
new file mode 100644
index 0000000000..e59b02daf4
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SceneFromCloud.cpp
@@ -0,0 +1,44 @@
+#include "SceneFromCloud.hpp"
+
+#include <cstdint>
+#include <string>
+
+namespace Threedim::PrimitiveCloud
+{
+
+std::shared_ptr<ossia::scene_state> sceneStateFromCloud(
+    ossia::primitive_cloud_component_ptr cloud,
+    std::string_view source_label)
+{
+  if(!cloud)
+    return nullptr;
+
+  auto children = std::make_shared<std::vector<ossia::scene_payload>>();
+  children->push_back(ossia::primitive_cloud_component_ptr{cloud});
+
+  auto node = std::make_shared<ossia::scene_node>();
+  // Stable id keyed on the cloud's raw_data pointer. Required by the
+  // registry's slot allocator: a 0 id is uncacheable and the cloud
+  // disappears between frames.
+  uint64_t key = 0;
+  if(cloud->raw_data)
+    key = (uint64_t)((uintptr_t)cloud->raw_data.get());
+  if(key == 0)
+    key = (uint64_t)((uintptr_t)cloud.get());
+  node->id.value = key;
+  node->name = source_label.empty()
+                   ? std::string("primitive_cloud")
+                   : std::string(source_label);
+  node->children = std::move(children);
+
+  auto roots = std::make_shared<std::vector<ossia::scene_node_ptr>>();
+  roots->push_back(std::move(node));
+
+  auto state = std::make_shared<ossia::scene_state>();
+  state->roots = std::move(roots);
+  state->version = 1;
+  state->dirty_index = 1;
+  return state;
+}
+
+} // namespace Threedim::PrimitiveCloud
diff --git a/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SceneFromCloud.hpp b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SceneFromCloud.hpp
new file mode 100644
index 0000000000..550c2cf758
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SceneFromCloud.hpp
@@ -0,0 +1,21 @@
+#pragma once
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <memory>
+#include <string_view>
+
+namespace Threedim::PrimitiveCloud
+{
+
+// Wrap a parsed primitive_cloud_component into a fresh scene_state with
+// one scene_node carrying it as its sole payload. Mirrors
+// SceneFromMeshes::sceneStateFromMeshes for the splat path.
+//
+// `source_label` becomes the scene_node name (typically the source
+// filename). Returns nullptr if `cloud` is null.
+std::shared_ptr<ossia::scene_state> sceneStateFromCloud(
+    ossia::primitive_cloud_component_ptr cloud,
+    std::string_view source_label = {});
+
+} // namespace Threedim::PrimitiveCloud
diff --git a/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SplatBinary.cpp b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SplatBinary.cpp
new file mode 100644
index 0000000000..43136ad83f
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SplatBinary.cpp
@@ -0,0 +1,56 @@
+#include "SplatBinary.hpp"
+
+#include <cstdint>
+#include <cstring>
+#include <memory>
+
+namespace Threedim::PrimitiveCloud
+{
+
+ossia::primitive_cloud_component_ptr parse_splat_binary(std::string_view bytes)
+{
+  constexpr uint32_t kRowSize = 32;
+  if(bytes.empty() || (bytes.size() % kRowSize) != 0)
+    return nullptr;
+
+  const uint32_t N = (uint32_t)(bytes.size() / kRowSize);
+  if(N == 0)
+    return nullptr;
+
+  // Copy into a stable shared buffer. The input string_view points at
+  // halp's mmap or text-file storage which doesn't outlive this call.
+  auto storage = std::shared_ptr<uint8_t[]>(new uint8_t[bytes.size()]);
+  std::memcpy(storage.get(), bytes.data(), bytes.size());
+
+  // AABB from first 12 bytes of each row (xyz floats).
+  ossia::aabb bounds{};
+  bounds.min[0] = bounds.min[1] = bounds.min[2] = 1.f;
+  bounds.max[0] = bounds.max[1] = bounds.max[2] = -1.f;
+  for(uint32_t i = 0; i < N; ++i)
+  {
+    float x, y, z;
+    std::memcpy(&x, storage.get() + i * kRowSize + 0,  sizeof(float));
+    std::memcpy(&y, storage.get() + i * kRowSize + 4,  sizeof(float));
+    std::memcpy(&z, storage.get() + i * kRowSize + 8,  sizeof(float));
+    bounds.expand(x, y, z);
+  }
+
+  auto br = std::make_shared<ossia::buffer_resource>();
+  br->resource = ossia::buffer_data{
+      .data = std::shared_ptr<const void>(storage, storage.get()),
+      .byte_size = (int64_t)bytes.size(),
+      .usage_hint = ossia::buffer_data::usage::storage_buffer};
+  br->content_hash = (uint64_t)(uintptr_t)storage.get();
+
+  auto out = std::make_shared<ossia::primitive_cloud_component>();
+  out->raw_data = std::move(br);
+  out->row_stride = kRowSize;
+  out->primitive_count = N;
+  out->topology = ossia::primitive_topology::points;
+  out->format_id = "3dgs.splat-binary";
+  out->bounds = bounds;
+  out->stable_id = ossia::mint_stable_id();
+  return out;
+}
+
+} // namespace Threedim::PrimitiveCloud
diff --git a/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SplatBinary.hpp b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SplatBinary.hpp
new file mode 100644
index 0000000000..886337dd25
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SplatBinary.hpp
@@ -0,0 +1,28 @@
+#pragma once
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <string_view>
+
+namespace Threedim::PrimitiveCloud
+{
+
+// Parse an Antimatter15 .splat file (32 bytes per primitive,
+// fixed schema).
+//
+// On-disk row layout (little-endian, packed, no padding):
+//   bytes  0..11   position xyz, 3 × float32
+//   bytes 12..23   scale_xyz, 3 × float32 (linear, NOT log-space)
+//   bytes 24..27   color rgba, 4 × uint8 unorm
+//   bytes 28..31   rotation quat, 4 × uint8 (sign-encoded as
+//                  (q + 1) * 127.5 around index 0; recipient
+//                  reconstructs by (b - 128) / 128)
+//
+// We pass these bytes through verbatim. The "3dgs.splat-binary" preset's
+// CSF declares the matching LAYOUT, dequantizes color8 to color, and
+// reconstructs the quat from the int8s.
+//
+// Returns nullptr if `bytes.size() % 32 != 0` or the input is empty.
+ossia::primitive_cloud_component_ptr parse_splat_binary(std::string_view bytes);
+
+} // namespace Threedim::PrimitiveCloud
diff --git a/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SpzCodec.cpp b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SpzCodec.cpp
new file mode 100644
index 0000000000..64076bf3ac
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SpzCodec.cpp
@@ -0,0 +1,160 @@
+#include "SpzCodec.hpp"
+
+#include <load-spz.h>
+
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <vector>
+
+namespace Threedim::PrimitiveCloud
+{
+
+namespace
+{
+
+// Canonical row layout matching what the 3dgs.classic preset's
+// AUXILIARY LAYOUT in 01_Decode.cs expects. Field offsets in floats.
+struct CanonicalRow
+{
+  static constexpr uint32_t kFloats = 62;
+  static constexpr uint32_t kBytes  = kFloats * sizeof(float);
+
+  static constexpr uint32_t kPos    = 0;   // 3 floats
+  static constexpr uint32_t kNormal = 3;   // 3 floats (zero-filled)
+  static constexpr uint32_t kSHDC   = 6;   // 3 floats
+  static constexpr uint32_t kSHRest = 9;   // 45 floats (channel-major)
+  static constexpr uint32_t kAlpha  = 54;  // 1 float (pre-sigmoid)
+  static constexpr uint32_t kScale  = 55;  // 3 floats (log-space)
+  static constexpr uint32_t kRot    = 58;  // 4 floats (w,x,y,z)
+};
+
+} // namespace
+
+ossia::primitive_cloud_component_ptr parse_spz(std::string_view bytes)
+{
+  if(bytes.empty())
+    return nullptr;
+
+  // The Niantic library expects the gzipped/NGSP payload as a
+  // std::vector<uint8_t>. Copy in (single allocation; the cost is
+  // dwarfed by the gzip inflate). Specify RUB→RDF in the unpack
+  // options so the library handles the basis flip for us.
+  std::vector<uint8_t> data(
+      reinterpret_cast<const uint8_t*>(bytes.data()),
+      reinterpret_cast<const uint8_t*>(bytes.data()) + bytes.size());
+
+  spz::UnpackOptions opts;
+  opts.to = spz::CoordinateSystem::RDF;
+
+  spz::GaussianCloud cloud = spz::loadSpz(data, opts);
+  if(cloud.numPoints <= 0 || cloud.positions.empty())
+    return nullptr;
+
+  const uint32_t N        = (uint32_t)cloud.numPoints;
+  const uint32_t shDeg    = (uint32_t)cloud.shDegree;
+  const uint32_t shCoefs  = (shDeg == 0) ? 0
+                          : (shDeg == 1) ? 3
+                          : (shDeg == 2) ? 8
+                          : (shDeg == 3) ? 15
+                          : 24; // degree 4
+  const uint32_t restPad  = 15;  // 3dgs.classic preset always reads 15 R/G/B coefs
+
+  if(cloud.positions.size() != (size_t)N * 3
+     || cloud.scales.size() != (size_t)N * 3
+     || cloud.rotations.size() != (size_t)N * 4
+     || cloud.alphas.size() != (size_t)N
+     || cloud.colors.size() != (size_t)N * 3)
+  {
+    return nullptr;
+  }
+  if(shCoefs > 0 && cloud.sh.size() != (size_t)N * shCoefs * 3)
+    return nullptr;
+
+  const std::size_t totalBytes
+      = (std::size_t)N * (std::size_t)CanonicalRow::kBytes;
+  auto storage = std::shared_ptr<uint8_t[]>(new uint8_t[totalBytes]());
+
+  ossia::aabb bounds{};
+  bounds.min[0] = bounds.min[1] = bounds.min[2] = 1.f;
+  bounds.max[0] = bounds.max[1] = bounds.max[2] = -1.f;
+
+  // Effective coefficient count we'll actually fill per-channel
+  // (clamped to 15 — preset hardcodes 45 = 3·15 rest floats; degree-4
+  // input gets truncated to degree 3 here, lossy but renderable).
+  const uint32_t fillCoefs = (shCoefs > restPad) ? restPad : shCoefs;
+
+  float* base = reinterpret_cast<float*>(storage.get());
+  for(uint32_t i = 0; i < N; ++i)
+  {
+    float* row = base + (std::size_t)i * CanonicalRow::kFloats;
+
+    // Position.
+    const float x = cloud.positions[i * 3 + 0];
+    const float y = cloud.positions[i * 3 + 1];
+    const float z = cloud.positions[i * 3 + 2];
+    row[CanonicalRow::kPos + 0] = x;
+    row[CanonicalRow::kPos + 1] = y;
+    row[CanonicalRow::kPos + 2] = z;
+    bounds.expand(x, y, z);
+
+    // Normals — not stored in SPZ; leave zero-filled.
+
+    // SH DC (= colors).
+    row[CanonicalRow::kSHDC + 0] = cloud.colors[i * 3 + 0];
+    row[CanonicalRow::kSHDC + 1] = cloud.colors[i * 3 + 1];
+    row[CanonicalRow::kSHDC + 2] = cloud.colors[i * 3 + 2];
+
+    // SH rest. SPZ packs (R,G,B) inner per coefficient; PLY canonical
+    // is channel-major (R block, G block, B block) per row. Transpose.
+    if(fillCoefs > 0)
+    {
+      const float* sh_src
+          = cloud.sh.data() + (std::size_t)i * shCoefs * 3;
+      float* shR = row + CanonicalRow::kSHRest + 0  * restPad;
+      float* shG = row + CanonicalRow::kSHRest + 1  * restPad;
+      float* shB = row + CanonicalRow::kSHRest + 2  * restPad;
+      for(uint32_t c = 0; c < fillCoefs; ++c)
+      {
+        shR[c] = sh_src[c * 3 + 0];
+        shG[c] = sh_src[c * 3 + 1];
+        shB[c] = sh_src[c * 3 + 2];
+      }
+      // Remaining coefs (fillCoefs..restPad) stay zero.
+    }
+
+    // Alpha — both formats store the pre-sigmoid value; pass through.
+    row[CanonicalRow::kAlpha] = cloud.alphas[i];
+
+    // Scale (log-space).
+    row[CanonicalRow::kScale + 0] = cloud.scales[i * 3 + 0];
+    row[CanonicalRow::kScale + 1] = cloud.scales[i * 3 + 1];
+    row[CanonicalRow::kScale + 2] = cloud.scales[i * 3 + 2];
+
+    // Rotation. SPZ: (x,y,z,w). PLY canonical: (w,x,y,z).
+    row[CanonicalRow::kRot + 0] = cloud.rotations[i * 4 + 3]; // w
+    row[CanonicalRow::kRot + 1] = cloud.rotations[i * 4 + 0]; // x
+    row[CanonicalRow::kRot + 2] = cloud.rotations[i * 4 + 1]; // y
+    row[CanonicalRow::kRot + 3] = cloud.rotations[i * 4 + 2]; // z
+  }
+
+  auto br = std::make_shared<ossia::buffer_resource>();
+  br->resource = ossia::buffer_data{
+      .data = std::shared_ptr<const void>(storage, storage.get()),
+      .byte_size = (int64_t)totalBytes,
+      .usage_hint = ossia::buffer_data::usage::storage_buffer};
+  br->content_hash = (uint64_t)(uintptr_t)storage.get();
+
+  auto out = std::make_shared<ossia::primitive_cloud_component>();
+  out->raw_data        = std::move(br);
+  out->row_stride      = CanonicalRow::kBytes;
+  out->primitive_count = N;
+  out->topology        = ossia::primitive_topology::points;
+  out->format_id        = "3dgs.classic";
+  out->struct_type_name = "Splat3DGS";
+  out->bounds           = bounds;
+  out->stable_id        = ossia::mint_stable_id();
+  return out;
+}
+
+} // namespace Threedim::PrimitiveCloud
diff --git a/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SpzCodec.hpp b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SpzCodec.hpp
new file mode 100644
index 0000000000..975e1358de
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SpzCodec.hpp
@@ -0,0 +1,35 @@
+#pragma once
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <span>
+#include <string_view>
+
+namespace Threedim::PrimitiveCloud
+{
+
+// Decode a Niantic SPZ v1-3 file into a primitive_cloud_component.
+//
+// SPZ stores splats column-grouped (positions, then scales, then
+// rotations, then alphas, then colors, then SH) inside a gzip-
+// compressed payload, in the RUB coordinate system. We unpack via
+// the vendored Niantic library, rotate to RDF (the convention every
+// existing 3dgs.classic preset assumes), then transpose into the
+// canonical 62-float / 248-byte PLY-compatible row layout:
+//
+//   floats 0..2    x, y, z
+//   floats 3..5    nx, ny, nz   (zero — not in SPZ)
+//   floats 6..8    f_dc_0..2     (SH DC = colors)
+//   floats 9..53   f_rest_0..44  (R coeffs, then G, then B; padded
+//                                 with zero for shDegree<3)
+//   float  54       opacity (pre-sigmoid)
+//   floats 55..57  scale_0..2 (log-space)
+//   floats 58..61  rot_0..3   (PLY convention w,x,y,z)
+//
+// Returns nullptr on parse failure or v4 files (ZSTD support not
+// vendored — converting v4 → v3 with the upstream `spz-tool` works
+// around it). Sets format_id = "3dgs.classic" so the existing preset
+// picks it up transparently.
+ossia::primitive_cloud_component_ptr parse_spz(std::string_view bytes);
+
+} // namespace Threedim::PrimitiveCloud
diff --git a/src/plugins/score-plugin-threedim/Threedim/RenderPipeline/Process.cpp b/src/plugins/score-plugin-threedim/Threedim/RenderPipeline/Process.cpp
index bc118514c7..3dcf0ce2a1 100644
--- a/src/plugins/score-plugin-threedim/Threedim/RenderPipeline/Process.cpp
+++ b/src/plugins/score-plugin-threedim/Threedim/RenderPipeline/Process.cpp
@@ -7,6 +7,9 @@
 #include <Gfx/ISFProcess.hpp>
 #include <Gfx/TexturePort.hpp>
 
+#include <score/document/DocumentInterface.hpp>
+#include <score/tools/FilePath.hpp>
+
 #include <QDir>
 #include <QFileInfo>
 #include <QImageReader>
@@ -29,8 +32,18 @@ Model::Model(
     {
       if(QFile fs{init}; fs.open(QIODevice::ReadOnly))
       {
-        QFile vs{fi.absolutePath() + QDir::separator() + fi.baseName() + ".vs"};
-        if(vs.open(QIODevice::ReadOnly))
+        m_scriptPath = init;
+        if(QFile vs{fi.absolutePath() + QDir::separator() + fi.baseName() + ".vs"};
+           vs.open(QIODevice::ReadOnly))
+        {
+          (void)setProgram(
+              {ShaderSource::ProgramType::RawRasterPipeline, vs.readAll(),
+               fs.readAll()});
+          return;
+        }
+        else if(
+            QFile vs{fi.absolutePath() + QDir::separator() + fi.baseName() + ".vert"};
+            vs.open(QIODevice::ReadOnly))
         {
           (void)setProgram(
               {ShaderSource::ProgramType::RawRasterPipeline, vs.readAll(),
@@ -118,7 +131,7 @@ bool Model::validate(const std::vector<QString>& txt) const noexcept
 {
   ShaderSource src{txt};
   src.type = isf::parser::ShaderType::RawRasterPipeline;
-  const auto& [_, error] = ProgramCache::instance().get(src);
+  const auto& [_, error] = ProgramCache::instance().get(src, m_scriptPath);
   if(!error.isEmpty())
   {
     this->errorMessage(error);
@@ -152,7 +165,9 @@ Process::ScriptChangeResult Model::setProgram(ShaderSource f)
   f.type = ProcessedProgram::ProgramType::RawRasterPipeline;
   setVertex(f.vertex);
   setFragment(f.fragment);
-  if(const auto& [processed, error] = ProgramCache::instance().get(f); bool(processed))
+  if(const auto& [processed, error]
+     = ProgramCache::instance().get(f, m_scriptPath);
+     bool(processed))
   {
     ossia::flat_map<QString, ossia::value> previous_values;
     for(auto inl : m_inlets)
@@ -164,7 +179,6 @@ Process::ScriptChangeResult Model::setProgram(ShaderSource f)
 
     m_processedProgram = *processed;
 
-    qDebug() << (int)f.type << (int)processed->type;
     //    initDefaultPorts();
 
     m_inlets.push_back(new GeometryInlet{"Geometry In", Id<Process::Port>(1000), this});
@@ -177,7 +191,7 @@ Process::ScriptChangeResult Model::setProgram(ShaderSource f)
   }
   else
   {
-    qDebug() << "Error while processing program: " << error;
+    qWarning() << "RenderPipeline: error while processing program:" << error;
   }
   return {};
 }
@@ -190,7 +204,9 @@ Process::Descriptor ProcessFactory::descriptor(QString path) const noexcept
 template <>
 void DataStreamReader::read(const Gfx::RenderPipeline::Model& proc)
 {
-  m_stream << proc.m_program;
+  auto& ctx = score::IDocument::documentContext(proc);
+  m_stream << proc.m_program
+           << score::relativizeFilePath(proc.m_scriptPath, ctx);
 
   readPorts(*this, proc.m_inlets, proc.m_outlets);
 
@@ -201,7 +217,12 @@ template <>
 void DataStreamWriter::write(Gfx::RenderPipeline::Model& proc)
 {
   Gfx::ShaderSource s;
-  m_stream >> s;
+  m_stream >> s >> proc.m_scriptPath;
+  if(!proc.m_scriptPath.isEmpty())
+  {
+    auto& ctx = score::IDocument::documentContext(proc);
+    proc.m_scriptPath = score::locateFilePath(proc.m_scriptPath, ctx);
+  }
   s.type = isf::parser::ShaderType::RawRasterPipeline;
   (void)proc.setProgram(s);
 
@@ -217,6 +238,11 @@ void JSONReader::read(const Gfx::RenderPipeline::Model& proc)
 {
   obj["Vertex"] = proc.vertex();
   obj["Fragment"] = proc.fragment();
+  if(!proc.m_scriptPath.isEmpty())
+  {
+    auto& ctx = score::IDocument::documentContext(proc);
+    obj["Root"] = score::relativizeFilePath(proc.m_scriptPath, ctx);
+  }
 
   readPorts(*this, proc.m_inlets, proc.m_outlets);
 }
@@ -228,6 +254,15 @@ void JSONWriter::write(Gfx::RenderPipeline::Model& proc)
   s.vertex = obj["Vertex"].toString();
   s.fragment = obj["Fragment"].toString();
   s.type = isf::parser::ShaderType::ISF;
+  if(auto r = obj.tryGet("Root"))
+  {
+    proc.m_scriptPath <<= *r;
+    if(!proc.m_scriptPath.isEmpty())
+    {
+      auto& ctx = score::IDocument::documentContext(proc);
+      proc.m_scriptPath = score::locateFilePath(proc.m_scriptPath, ctx);
+    }
+  }
   (void)proc.setProgram(s);
 
   writePorts(
diff --git a/src/plugins/score-plugin-threedim/Threedim/RenderPipeline/Process.hpp b/src/plugins/score-plugin-threedim/Threedim/RenderPipeline/Process.hpp
index f0aa99f504..dcf3486a93 100644
--- a/src/plugins/score-plugin-threedim/Threedim/RenderPipeline/Process.hpp
+++ b/src/plugins/score-plugin-threedim/Threedim/RenderPipeline/Process.hpp
@@ -68,12 +68,17 @@ class Model final : public Process::ProcessModel
 
   void errorMessage(const QString& arg_2) const W_SIGNAL(errorMessage, arg_2);
 
+  // Absolute path of the shader file this model was loaded from. Used as
+  // the base for quoted #include resolution. Empty for in-memory source.
+  QString rootPath() const noexcept { return m_scriptPath; }
+
 private:
   void init();
   void initDefaultPorts();
   QString prettyName() const noexcept override;
   ShaderSource m_program;
   ProcessedProgram m_processedProgram;
+  QString m_scriptPath;
 };
 
 struct ProcessFactory final : Process::ProcessFactory_T<Gfx::RenderPipeline::Model>
diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneDuplicator.cpp b/src/plugins/score-plugin-threedim/Threedim/SceneDuplicator.cpp
new file mode 100644
index 0000000000..b08a3dedf6
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/SceneDuplicator.cpp
@@ -0,0 +1,166 @@
+#include "SceneDuplicator.hpp"
+
+#include <QQuaternion>
+
+#include <cmath>
+
+namespace Threedim
+{
+
+namespace
+{
+
+// Compute one clone's TRS given its index and the pattern / params.
+// Fills an ossia::scene_transform suitable for prepending to the cloned
+// root's children. All positions in world space; parents identity.
+ossia::scene_transform
+transformForIndex(int idx, int count, int mode, int grid_cols_hint,
+                  float spacing, float radius) noexcept
+{
+  ossia::scene_transform t;
+  t.rotation[3] = 1.f;
+  t.scale[0] = t.scale[1] = t.scale[2] = 1.f;
+
+  switch(mode)
+  {
+    case SceneDuplicator::Grid:
+    {
+      const int cols = grid_cols_hint > 0
+          ? grid_cols_hint
+          : std::max(1, (int)std::round(std::sqrt(double(count))));
+      const int row = idx / cols;
+      const int col = idx % cols;
+      // Center the grid around the origin.
+      const int rows = (count + cols - 1) / cols;
+      const float cx = (col - 0.5f * (cols - 1)) * spacing;
+      const float cz = (row - 0.5f * (rows - 1)) * spacing;
+      t.translation[0] = cx;
+      t.translation[1] = 0.f;
+      t.translation[2] = cz;
+      break;
+    }
+    case SceneDuplicator::Ring:
+    {
+      const float theta = (count > 0)
+          ? (float(idx) / float(count)) * 2.f * float(M_PI)
+          : 0.f;
+      t.translation[0] = radius * std::cos(theta);
+      t.translation[1] = 0.f;
+      t.translation[2] = radius * std::sin(theta);
+      // Face outward (local +Z towards the center). Rotate around Y so
+      // local -Z points away from the origin.
+      auto q = QQuaternion::fromEulerAngles(
+          0.f, -theta * 180.f / float(M_PI), 0.f);
+      t.rotation[0] = q.x();
+      t.rotation[1] = q.y();
+      t.rotation[2] = q.z();
+      t.rotation[3] = q.scalar();
+      break;
+    }
+    case SceneDuplicator::Line:
+    default:
+    {
+      t.translation[0] = (idx - 0.5f * (count - 1)) * spacing;
+      t.translation[1] = 0.f;
+      t.translation[2] = 0.f;
+      break;
+    }
+  }
+  return t;
+}
+
+// Build one cloned root scene_node wrapping the prototype's roots.
+// Structure:
+//   scene_node { name = "<base>_<idx>", children = [
+//       scene_transform(xform),
+//       ...prototype roots (as scene_node_ptr payloads — shared; cheap)
+//   ]}
+ossia::scene_node_ptr makeCloneRoot(
+    const std::vector<ossia::scene_node_ptr>& proto_roots,
+    const std::string& base_name, int idx,
+    const ossia::scene_transform& xform, int64_t dirty_index)
+{
+  auto children = std::make_shared<std::vector<ossia::scene_payload>>();
+  children->reserve(1 + proto_roots.size());
+  children->push_back(xform);
+  for(const auto& r : proto_roots)
+    if(r)
+      children->push_back(r);
+
+  auto node = std::make_shared<ossia::scene_node>();
+  node->name = base_name + "_" + std::to_string(idx);
+  node->children = std::move(children);
+  node->dirty_index = dirty_index;
+  return node;
+}
+
+} // namespace
+
+void SceneDuplicator::rebuild()
+{
+  const auto& in = inputs.scene_in.scene;
+  const ossia::scene_state* in_state = in.state.get();
+  const int64_t in_version = in_state ? in_state->version : -1;
+  const int count = std::max(1, inputs.count.value);
+
+  m_cached_in_state = in_state;
+  m_cached_in_version = in_version;
+
+  if(!in_state || !in_state->roots || in_state->roots->empty())
+  {
+    m_cached_out = in.state;
+    m_pending_dirty = 0xFF;
+    return;
+  }
+
+  // Base name for clones — derived from the first root's name, falling
+  // back to "Clone" when the prototype has no names.
+  std::string base = (*in_state->roots)[0] ? (*in_state->roots)[0]->name
+                                           : std::string{};
+  if(base.empty())
+    base = "Clone";
+
+  const int64_t version = ++m_version_counter;
+
+  auto new_roots = std::make_shared<std::vector<ossia::scene_node_ptr>>();
+  new_roots->reserve(count);
+  for(int i = 0; i < count; ++i)
+  {
+    const auto xform = transformForIndex(
+        i, count, inputs.pattern.value, inputs.grid_cols.value,
+        inputs.spacing.value, inputs.radius.value);
+    new_roots->push_back(
+        makeCloneRoot(*in_state->roots, base, i, xform, version));
+  }
+
+  auto state = std::make_shared<ossia::scene_state>();
+  state->roots = std::move(new_roots);
+  // Share all non-root resources with the input — clones read the same
+  // materials / animations / cameras / skeletons / environment.
+  state->materials = in_state->materials;
+  state->animations = in_state->animations;
+  state->cameras = in_state->cameras;
+  state->skeletons = in_state->skeletons;
+  state->environment = in_state->environment;
+  state->active_camera_id = in_state->active_camera_id;
+  state->version = version;
+  state->dirty_index = version;
+
+  m_cached_out = state;
+  m_pending_dirty = 0xFF;
+}
+
+void SceneDuplicator::operator()()
+{
+  const auto* in_state = inputs.scene_in.scene.state.get();
+  const int64_t in_version = in_state ? in_state->version : -1;
+  const bool upstream_changed
+      = m_cached_in_state != in_state || m_cached_in_version != in_version;
+  if(!m_cached_out || upstream_changed)
+    rebuild();
+  outputs.scene_out.scene.state = m_cached_out;
+  outputs.scene_out.dirty = m_pending_dirty;
+  m_pending_dirty = 0;
+}
+
+} // namespace Threedim
diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneDuplicator.hpp b/src/plugins/score-plugin-threedim/Threedim/SceneDuplicator.hpp
new file mode 100644
index 0000000000..107bd6807b
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/SceneDuplicator.hpp
@@ -0,0 +1,117 @@
+#pragma once
+#include <halp/controls.hpp>
+#include <halp/meta.hpp>
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+namespace Threedim
+{
+
+// Scene-graph-level duplicator. Given a prototype scene_spec, emits N
+// cloned root nodes placed by a procedural pattern. Complementary to
+// Instancer:
+//
+//   Instancer          = GPU-primitive instancing (one mesh, N instances,
+//                        one draw call — scales to 1M+ particles, but the
+//                        prototype is a single mesh).
+//   SceneDuplicator    = scene-graph instancing (rich prototype w/
+//                        hierarchy / multiple meshes / lights, N CPU
+//                        clones each with its own TRS — scales to
+//                        dozens-to-a-few-hundreds).
+//
+// Materials / animations / skeletons / environment pass through from the
+// prototype unchanged (shared across clones). Only the root-level node
+// tree is cloned so downstream path-based tooling addresses each clone
+// independently via `/<prototype_root>_<n>/...`.
+//
+// Patterns:
+//   Grid   — `count` clones laid out on an XZ grid with `spacing`. Y=0.
+//   Ring   — `count` clones on a circle in the XZ plane of `radius`
+//            centered at the origin, facing outward (rotated around Y).
+//   Line   — `count` clones along +X with `spacing` separation.
+//
+// Downstream addressing: each clone's root node is named
+// `<proto_name>_<idx>` (0-indexed), so:
+//   SceneDuplicator(prototype=ChairScene, mode=Ring, count=8) →
+//     /Chair_0, /Chair_1, … /Chair_7
+//   ConfigurePrimitive(paths=["/Chair_*"], active=false) → disables all
+class SceneDuplicator
+{
+public:
+  halp_meta(name, "Scene Duplicator")
+  halp_meta(category, "Visuals/3D/Scene")
+  halp_meta(c_name, "scene_duplicator")
+  halp_meta(authors, "ossia team")
+  halp_meta(
+      manual_url,
+      "https://ossia.io/score-docs/processes/scene-duplicator.html")
+  halp_meta(uuid, "9e7a4b3d-5f2c-4a8b-9d1e-6c3f8b5d2a7e")
+
+  enum Pattern
+  {
+    Grid,
+    Ring,
+    Line
+  };
+
+  struct ins
+  {
+    struct
+    {
+      halp_meta(name, "Scene In");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_in;
+
+    // Port-driven rebuild: controls trigger rebuild(); upstream
+    // scene_in changes detected in operator()().
+    struct : halp::combobox_t<"Pattern", Pattern>
+    {
+      struct range
+      {
+        std::string_view values[3]{"Grid", "Ring", "Line"};
+        int init{0};
+      };
+      void update(SceneDuplicator& n) { n.rebuild(); }
+    } pattern;
+
+    struct : halp::spinbox_i32<"Count", halp::irange{1, 4096, 4}>
+    { void update(SceneDuplicator& n) { n.rebuild(); } } count;
+    struct : halp::hslider_f32<"Spacing", halp::range{0.01, 1000., 2.}>
+    { void update(SceneDuplicator& n) { n.rebuild(); } } spacing;
+    struct : halp::hslider_f32<"Radius", halp::range{0.01, 1000., 5.}>
+    { void update(SceneDuplicator& n) { n.rebuild(); } } radius;
+    // Grid mode: grid is `cols × rows` with cols ≈ round(sqrt(count)).
+    // Exposed as a control so the user can force a specific aspect.
+    // 0 = auto (square-ish).
+    struct : halp::spinbox_i32<"Grid cols", halp::irange{0, 256, 0}>
+    { void update(SceneDuplicator& n) { n.rebuild(); } } grid_cols;
+  } inputs;
+
+  struct outs
+  {
+    struct
+    {
+      halp_meta(name, "Scene Out");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_out;
+  } outputs;
+
+  void rebuild();
+  void operator()();
+
+  // Stable shared_ptr cached while inputs are unchanged — keeps
+  // ScenePreprocessor's fingerprint fast-path warm.
+  std::shared_ptr<const ossia::scene_state> m_cached_out;
+  uint8_t m_pending_dirty{0xFF};
+  const ossia::scene_state* m_cached_in_state{};
+  int64_t m_cached_in_version{-1};
+  int64_t m_version_counter{0};
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneFilter/Executor.cpp b/src/plugins/score-plugin-threedim/Threedim/SceneFilter/Executor.cpp
new file mode 100644
index 0000000000..edaf5cacce
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/SceneFilter/Executor.cpp
@@ -0,0 +1,66 @@
+#include "Executor.hpp"
+
+#include <Gfx/GfxApplicationPlugin.hpp>
+#include <Gfx/GfxContext.hpp>
+#include <Gfx/GfxExecNode.hpp>
+#include <Gfx/Graph/SceneFilterNode.hpp>
+#include <Process/Dataflow/Port.hpp>
+#include <Process/ExecutionContext.hpp>
+#include <Threedim/SceneFilter/Process.hpp>
+
+#include <ossia/dataflow/port.hpp>
+
+#include <score/document/DocumentContext.hpp>
+
+namespace Gfx::SceneFilter
+{
+class scene_filter_exec_node final : public gfx_exec_node
+{
+public:
+  scene_filter_exec_node(GfxExecutionAction& ctx)
+      : gfx_exec_node{ctx}
+  {
+  }
+
+  void init()
+  {
+    auto node = std::make_unique<score::gfx::SceneFilterNode>();
+    id = exec_context->ui->register_node(std::move(node));
+  }
+
+  ~scene_filter_exec_node() { exec_context->ui->unregister_node(id); }
+
+  std::string label() const noexcept override { return "Gfx::SceneFilter_node"; }
+};
+
+ProcessExecutorComponent::ProcessExecutorComponent(
+    Gfx::SceneFilter::Model& element,
+    const Execution::Context& ctx,
+    QObject* parent)
+    : ProcessComponent_T{element, ctx, "sceneFilterComponent", parent}
+{
+  auto n = ossia::make_node<scene_filter_exec_node>(
+      *ctx.execState, ctx.doc.plugin<DocumentPlugin>().exec);
+
+  n->add_geometry();
+  {
+    auto ctrl = qobject_cast<Process::ControlInlet*>(element.inlets()[1]);
+    auto& p = n->add_control();
+    ctrl->setupExecution(*n->root_inputs().back(), this);
+    p->value = ctrl->value();
+    QObject::connect(
+        ctrl, &Process::ControlInlet::valueChanged, this,
+        con_unvalidated{ctx, 1, 0, n});
+  }
+  n->add_geometry_out();
+  n->init();
+
+  this->node = n;
+  m_ossia_process = std::make_shared<ossia::node_process>(n);
+}
+
+void ProcessExecutorComponent::cleanup()
+{
+  ProcessComponent_T::cleanup();
+}
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/Splat/Executor.hpp b/src/plugins/score-plugin-threedim/Threedim/SceneFilter/Executor.hpp
similarity index 68%
rename from src/plugins/score-plugin-threedim/Threedim/Splat/Executor.hpp
rename to src/plugins/score-plugin-threedim/Threedim/SceneFilter/Executor.hpp
index a171183670..7e37bba397 100644
--- a/src/plugins/score-plugin-threedim/Threedim/Splat/Executor.hpp
+++ b/src/plugins/score-plugin-threedim/Threedim/SceneFilter/Executor.hpp
@@ -3,13 +3,14 @@
 
 #include <ossia/dataflow/node_process.hpp>
 
-namespace Gfx::Splat
+namespace Gfx::SceneFilter
 {
 class Model;
 class ProcessExecutorComponent final
-    : public Execution::ProcessComponent_T<Gfx::Splat::Model, ossia::node_process>
+    : public Execution::
+          ProcessComponent_T<Gfx::SceneFilter::Model, ossia::node_process>
 {
-  COMPONENT_METADATA("1df594a9-f028-4c73-82d3-4d8c4a2ebc5b")
+  COMPONENT_METADATA("f1a2b3c4-d5e6-4a7b-8c9d-0e1f2a3b4c5d")
 public:
   ProcessExecutorComponent(
       Model& element, const Execution::Context& ctx, QObject* parent);
diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneFilter/Metadata.hpp b/src/plugins/score-plugin-threedim/Threedim/SceneFilter/Metadata.hpp
new file mode 100644
index 0000000000..e527a011ca
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/SceneFilter/Metadata.hpp
@@ -0,0 +1,22 @@
+#pragma once
+#include <Process/ProcessMetadata.hpp>
+
+namespace Gfx::SceneFilter
+{
+class Model;
+}
+
+PROCESS_METADATA(
+    , Gfx::SceneFilter::Model, "c2d8e9a4-3f5b-4e7c-9a1d-6b7e8c2f1a3b",
+    "scenefilter",
+    "Scene Filter",
+    Process::ProcessCategory::Visual,
+    "Visuals/3D/Scene",
+    "Filter the hierarchy of a scene_spec (visibility, layers, names)",
+    "ossia team",
+    (QStringList{"gfx", "scene", "filter", "3d", "hierarchy"}),
+    {},
+    {},
+    QUrl{},
+    Process::ProcessFlags::SupportsAll
+)
diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneFilter/Process.cpp b/src/plugins/score-plugin-threedim/Threedim/SceneFilter/Process.cpp
new file mode 100644
index 0000000000..82fdb4a812
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/SceneFilter/Process.cpp
@@ -0,0 +1,76 @@
+#include "Process.hpp"
+
+#include <score/application/ApplicationComponents.hpp>
+
+#include <Process/Dataflow/Port.hpp>
+#include <Process/Dataflow/WidgetInlets.hpp>
+
+#include <Gfx/Graph/Node.hpp>
+#include <Gfx/TexturePort.hpp>
+
+#include <wobjectimpl.h>
+
+W_OBJECT_IMPL(Gfx::SceneFilter::Model)
+namespace Gfx::SceneFilter
+{
+
+Model::Model(
+    const TimeVal& duration, const Id<Process::ProcessModel>& id, QObject* parent)
+    : Process::ProcessModel{duration, id, "gfxProcess", parent}
+{
+  metadata().setInstanceName(*this);
+  init();
+}
+
+Model::~Model() = default;
+
+void Model::init()
+{
+  if(m_inlets.empty() && m_outlets.empty())
+  {
+    m_inlets.push_back(new GeometryInlet{"Scene In", Id<Process::Port>(0), this});
+    m_inlets.push_back(new Process::ComboBox{
+        std::vector<std::pair<QString, ossia::value>>{
+            {QStringLiteral("pass through"),     0},
+            {QStringLiteral("keep visible only"),1}},
+        0, "Mode", Id<Process::Port>(1), this});
+    m_outlets.push_back(new GeometryOutlet{"Scene Out", Id<Process::Port>(0), this});
+  }
+}
+
+QString Model::prettyName() const noexcept
+{
+  return tr("Scene Filter");
+}
+
+}
+
+template <>
+void DataStreamReader::read(const Gfx::SceneFilter::Model& proc)
+{
+  readPorts(*this, proc.m_inlets, proc.m_outlets);
+  insertDelimiter();
+}
+
+template <>
+void DataStreamWriter::write(Gfx::SceneFilter::Model& proc)
+{
+  writePorts(
+      *this, components.interfaces<Process::PortFactoryList>(), proc.m_inlets,
+      proc.m_outlets, &proc);
+  checkDelimiter();
+}
+
+template <>
+void JSONReader::read(const Gfx::SceneFilter::Model& proc)
+{
+  readPorts(*this, proc.m_inlets, proc.m_outlets);
+}
+
+template <>
+void JSONWriter::write(Gfx::SceneFilter::Model& proc)
+{
+  writePorts(
+      *this, components.interfaces<Process::PortFactoryList>(), proc.m_inlets,
+      proc.m_outlets, &proc);
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneFilter/Process.hpp b/src/plugins/score-plugin-threedim/Threedim/SceneFilter/Process.hpp
new file mode 100644
index 0000000000..2c2b3e140e
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/SceneFilter/Process.hpp
@@ -0,0 +1,37 @@
+#pragma once
+#include <Gfx/CommandFactory.hpp>
+#include <Threedim/SceneFilter/Metadata.hpp>
+#include <Process/GenericProcessFactory.hpp>
+#include <Process/Process.hpp>
+
+namespace Gfx::SceneFilter
+{
+class Model final : public Process::ProcessModel
+{
+  SCORE_SERIALIZE_FRIENDS
+  PROCESS_METADATA_IMPL(Gfx::SceneFilter::Model)
+  W_OBJECT(Model)
+
+public:
+  Model(
+      const TimeVal& duration,
+      const Id<Process::ProcessModel>& id,
+      QObject* parent);
+
+  template <typename Impl>
+  Model(Impl& vis, QObject* parent)
+      : Process::ProcessModel{vis, parent}
+  {
+    vis.writeTo(*this);
+    init();
+  }
+
+  ~Model() override;
+
+private:
+  void init();
+  QString prettyName() const noexcept override;
+};
+
+using ProcessFactory = Process::ProcessFactory_T<Gfx::SceneFilter::Model>;
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneFromMeshes.cpp b/src/plugins/score-plugin-threedim/Threedim/SceneFromMeshes.cpp
new file mode 100644
index 0000000000..f91909607d
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/SceneFromMeshes.cpp
@@ -0,0 +1,211 @@
+#include "SceneFromMeshes.hpp"
+
+#include <cstring>
+#include <memory>
+#include <utility>
+
+namespace Threedim
+{
+
+namespace
+{
+
+// Map a Threedim::mesh::extras[].semantic (which is halp::attribute_semantic)
+// onto the ossia::attribute_semantic enum. Both use the same naming
+// convention for the common cases; fall back to `custom` for anything the
+// halp enum encodes that ossia doesn't name explicitly. The extra's
+// `.name` field is kept alongside custom attributes so downstream shaders
+// can match by string.
+static ossia::attribute_semantic
+translateExtraSemantic(const Threedim::extra_attribute& e) noexcept
+{
+  using H = halp::attribute_semantic;
+  switch(e.semantic)
+  {
+    case H::position:  return ossia::attribute_semantic::position;
+    case H::normal:    return ossia::attribute_semantic::normal;
+    case H::tangent:   return ossia::attribute_semantic::tangent;
+    case H::bitangent: return ossia::attribute_semantic::bitangent;
+    case H::texcoord0: return ossia::attribute_semantic::texcoord0;
+    case H::texcoord1: return ossia::attribute_semantic::texcoord1;
+    case H::color0:    return ossia::attribute_semantic::color0;
+    case H::color1:    return ossia::attribute_semantic::color1;
+    default:           return ossia::attribute_semantic::custom;
+  }
+}
+
+// Translate halp::attribute_format → ossia::vertex_format. halp encodes
+// both base type and component count in a single enum value (float1..4,
+// sint1..4, uint1..4, etc). `components` is carried separately on the
+// extra_attribute for shader layout but the conversion is format-driven.
+// Unknown → float3 as a safe default.
+static ossia::vertex_format
+translateFormat(halp::attribute_format f, int /*components*/) noexcept
+{
+  using F = halp::attribute_format;
+  switch(f)
+  {
+    case F::float1:     return ossia::vertex_format::float1;
+    case F::float2:     return ossia::vertex_format::float2;
+    case F::float3:     return ossia::vertex_format::float3;
+    case F::float4:     return ossia::vertex_format::float4;
+    case F::half1:      return ossia::vertex_format::half1;
+    case F::half2:      return ossia::vertex_format::half2;
+    case F::half3:      return ossia::vertex_format::half3;
+    case F::half4:      return ossia::vertex_format::half4;
+    case F::uint1:      return ossia::vertex_format::uint32x1;
+    case F::uint2:      return ossia::vertex_format::uint32x2;
+    case F::uint3:      return ossia::vertex_format::uint32x3;
+    case F::uint4:      return ossia::vertex_format::uint32x4;
+    case F::sint1:      return ossia::vertex_format::sint32x1;
+    case F::sint2:      return ossia::vertex_format::sint32x2;
+    case F::sint3:      return ossia::vertex_format::sint32x3;
+    case F::sint4:      return ossia::vertex_format::sint32x4;
+    case F::unormbyte1: return ossia::vertex_format::unorm8x1;
+    case F::unormbyte2: return ossia::vertex_format::unorm8x2;
+    case F::unormbyte4: return ossia::vertex_format::unorm8x4;
+    case F::ushort1:    return ossia::vertex_format::uint16x1;
+    case F::ushort2:    return ossia::vertex_format::uint16x2;
+    case F::ushort4:    return ossia::vertex_format::uint16x4;
+    case F::sshort1:    return ossia::vertex_format::sint16x1;
+    case F::sshort2:    return ossia::vertex_format::sint16x2;
+    case F::sshort4:    return ossia::vertex_format::sint16x4;
+    default:            break;
+  }
+  return ossia::vertex_format::float3;
+}
+
+} // namespace
+
+std::shared_ptr<ossia::scene_state> sceneStateFromMeshes(
+    std::vector<Threedim::mesh> meshes,
+    Threedim::float_vec buffer,
+    std::string_view source_label)
+{
+  if(meshes.empty() || buffer.empty())
+    return nullptr;
+
+  // One CPU buffer shared across every mesh part. The buffer_resource holds
+  // a shared_ptr<const void>; we stash the float_vec inside a shared_ptr
+  // deleter to preserve its lifetime and keep the .data() address stable.
+  // vertex_count == total element count across all attrs is irrelevant to
+  // the consumer — each mesh_primitive carries its own per-primitive count.
+  const int64_t buffer_bytes = (int64_t)(buffer.size() * sizeof(float));
+  auto buf_owner = std::make_shared<Threedim::float_vec>(std::move(buffer));
+  std::shared_ptr<const void> buf_handle(buf_owner, buf_owner->data());
+
+  auto vertex_buf = std::make_shared<ossia::buffer_resource>();
+  vertex_buf->resource = ossia::buffer_data{
+      .data = std::move(buf_handle),
+      .byte_size = buffer_bytes,
+      .usage_hint = ossia::buffer_data::usage::vertex_buffer};
+  vertex_buf->content_hash = (uint64_t)(uintptr_t)buf_owner->data();
+  ossia::buffer_resource_ptr shared_buf{std::move(vertex_buf)};
+
+  auto roots = std::make_shared<std::vector<ossia::scene_node_ptr>>();
+  roots->reserve(meshes.size());
+
+  for(std::size_t i = 0; i < meshes.size(); ++i)
+  {
+    const auto& m = meshes[i];
+    if(m.vertices <= 0)
+      continue;
+
+    ossia::mesh_primitive prim;
+    // Stable id keyed on the shared buffer pointer + index, matching
+    // the scene_node id below. Required by the registry's mesh-slab
+    // allocator: a 0 id makes the slab uncacheable and the mesh
+    // disappears from rendering.
+    prim.stable_id
+        = (uint64_t)((uintptr_t)shared_buf.get()) ^ ((uint64_t)i + 1);
+    prim.vertex_buffers.push_back(shared_buf);
+    prim.vertex_count = (uint32_t)m.vertices;
+    prim.topology = m.points ? ossia::primitive_topology::points
+                             : ossia::primitive_topology::triangles;
+    prim.index_type = ossia::index_format::none;
+
+    // Local-space AABB over the position stream (tightly packed float3).
+    // buf_owner owns the floats; m.pos_offset is the element offset to
+    // the first position component. Enables per-draw GPU culling.
+    {
+      const float* positions = buf_owner->data() + m.pos_offset;
+      prim.bounds = ossia::compute_aabb_from_positions(
+          positions, (std::size_t)m.vertices);
+    }
+
+    // Byte-offset of each non-interleaved attribute block in the shared
+    // vertex buffer. Convert element-offset (floats) to bytes.
+    auto push_attr = [&](ossia::attribute_semantic sem,
+                         ossia::vertex_format fmt, int64_t elem_offset,
+                         uint32_t stride)
+    {
+      ossia::vertex_attribute a{};
+      a.semantic     = sem;
+      a.format       = fmt;
+      a.buffer_index = 0;
+      a.byte_offset  = (uint32_t)(elem_offset * (int64_t)sizeof(float));
+      a.byte_stride  = stride;
+      a.rate         = ossia::vertex_attribute::input_rate::per_vertex;
+      prim.attributes.push_back(a);
+    };
+
+    push_attr(ossia::attribute_semantic::position,
+              ossia::vertex_format::float3, m.pos_offset,
+              3 * sizeof(float));
+    if(m.normals)
+      push_attr(ossia::attribute_semantic::normal,
+                ossia::vertex_format::float3, m.normal_offset,
+                3 * sizeof(float));
+    if(m.texcoord)
+      push_attr(ossia::attribute_semantic::texcoord0,
+                ossia::vertex_format::float2, m.texcoord_offset,
+                2 * sizeof(float));
+    if(m.colors)
+      push_attr(ossia::attribute_semantic::color0,
+                ossia::vertex_format::float4, m.color_offset,
+                4 * sizeof(float));
+    if(m.tangents)
+      push_attr(ossia::attribute_semantic::tangent,
+                ossia::vertex_format::float4, m.tangent_offset,
+                4 * sizeof(float));
+
+    for(const auto& extra : m.extras)
+    {
+      auto sem = translateExtraSemantic(extra);
+      auto fmt = translateFormat(extra.format, extra.components);
+      const uint32_t stride = (uint32_t)(extra.components * sizeof(float));
+      push_attr(sem, fmt, extra.offset, stride);
+    }
+
+    auto mesh_comp = std::make_shared<ossia::mesh_component>();
+    mesh_comp->primitives.push_back(std::move(prim));
+
+    auto children = std::make_shared<std::vector<ossia::scene_payload>>();
+    children->push_back(ossia::mesh_component_ptr{std::move(mesh_comp)});
+
+    auto node = std::make_shared<ossia::scene_node>();
+    node->id.value = (uint64_t)((uintptr_t)shared_buf.get())
+                     ^ ((uint64_t)i + 1);
+    node->name = source_label.empty()
+                     ? std::string("mesh_" + std::to_string(i))
+                     : std::string(source_label);
+    if(meshes.size() > 1)
+    {
+      node->name += '#';
+      node->name += std::to_string(i);
+    }
+    node->children = std::move(children);
+    roots->push_back(std::move(node));
+  }
+
+  if(roots->empty())
+    return nullptr;
+
+  auto state = std::make_shared<ossia::scene_state>();
+  state->roots = std::move(roots);
+  state->version = 1;
+  state->dirty_index = 1;
+  return state;
+}
+
+} // namespace Threedim
diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneFromMeshes.hpp b/src/plugins/score-plugin-threedim/Threedim/SceneFromMeshes.hpp
new file mode 100644
index 0000000000..8d6df3aaea
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/SceneFromMeshes.hpp
@@ -0,0 +1,33 @@
+#pragma once
+#include <Threedim/TinyObj.hpp>
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <memory>
+#include <string_view>
+
+namespace Threedim
+{
+
+// Convert a vector of Threedim::mesh (produced by TinyObjFromFile,
+// PlyFromFile, or the new vcglib-STL / vcglib-OFF bridges) into a
+// scene_state containing one scene_node per mesh part, each with a
+// mesh_component backing onto a single shared CPU buffer.
+//
+// All mesh parts share the same `float_vec` — the scene's mesh_primitives
+// reference it via buffer_resource_ptr with per-attribute byte offsets
+// into the same vertex buffer. This matches the layout tinyobj / miniply
+// already produce: attributes are non-interleaved, each one a contiguous
+// span in the parent buffer, with pos_offset / texcoord_offset / …
+// in *elements* (floats), not bytes.
+//
+// `source_label` is used as the scene_node name; it should be the source
+// filename (or `.` when unknown), purely for inspector readability.
+//
+// On empty input returns a null pointer; caller keeps the previous state.
+std::shared_ptr<ossia::scene_state> sceneStateFromMeshes(
+    std::vector<Threedim::mesh> meshes,
+    Threedim::float_vec buffer,
+    std::string_view source_label = {});
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneGraphFilter.cpp b/src/plugins/score-plugin-threedim/Threedim/SceneGraphFilter.cpp
new file mode 100644
index 0000000000..f88d9cfb3d
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/SceneGraphFilter.cpp
@@ -0,0 +1,568 @@
+#include "SceneGraphFilter.hpp"
+
+#include <ossia/network/value/value.hpp>
+
+#include <algorithm>
+
+namespace Threedim
+{
+
+namespace
+{
+
+// ───── Glob matching ─────────────────────────────────────────────────
+// Minimal glob: `*` matches anything except `/`, `**` matches across
+// slashes, `?` matches a single non-slash character, everything else
+// is literal. Good enough for path-style filters; `std::regex` is the
+// fallback if users want full regex later.
+bool glob_match(std::string_view pattern, std::string_view text) noexcept
+{
+  std::size_t pi = 0, ti = 0;
+  std::size_t star_pi = std::string_view::npos;
+  std::size_t star_ti = 0;
+  bool star_double = false;
+
+  while(ti < text.size())
+  {
+    if(pi < pattern.size())
+    {
+      char pc = pattern[pi];
+      if(pc == '*')
+      {
+        // Detect `**` for slash-crossing wildcard.
+        star_double = (pi + 1 < pattern.size() && pattern[pi + 1] == '*');
+        if(star_double)
+          pi += 2;
+        else
+          pi += 1;
+        star_pi = pi;
+        star_ti = ti;
+        continue;
+      }
+      if(pc == '?')
+      {
+        if(text[ti] == '/')
+        {
+          // `?` can't cross slashes; bail to backtrack below.
+        }
+        else
+        {
+          ++pi;
+          ++ti;
+          continue;
+        }
+      }
+      else if(pc == text[ti])
+      {
+        ++pi;
+        ++ti;
+        continue;
+      }
+    }
+    // Mismatch — backtrack to last star.
+    if(star_pi != std::string_view::npos)
+    {
+      // `*` can't eat a slash; `**` can.
+      if(!star_double && text[star_ti] == '/')
+        return false;
+      pi = star_pi;
+      ++star_ti;
+      ti = star_ti;
+      continue;
+    }
+    return false;
+  }
+  // Consume trailing stars.
+  while(pi < pattern.size() && pattern[pi] == '*')
+    ++pi;
+  return pi == pattern.size();
+}
+
+// Return true if any pattern in `patterns` matches `text`.
+bool any_match(
+    const std::vector<std::string>& patterns, std::string_view text) noexcept
+{
+  for(const auto& pat : patterns)
+    if(glob_match(pat, text))
+      return true;
+  return false;
+}
+
+// ───── Predicate context ─────────────────────────────────────────────
+
+struct FilterCtx
+{
+  SceneGraphFilter::Mode mode;
+  bool invert;
+  SceneGraphFilter::Component component;
+  const std::vector<std::string>& paths;
+  const std::vector<std::string>& names;
+  const std::vector<std::string>& material_tags;
+  const ossia::scene_state* state;
+  // Tier-1 extensions: schema-field + property predicates.
+  SceneGraphFilter::AlphaMode alpha_mode;
+  SceneGraphFilter::Purpose purpose;
+  bool caster_flag;
+  std::string_view prop_key;
+  SceneGraphFilter::PropertyOp prop_op;
+  std::string_view prop_value;
+};
+
+// True if the payload carried by a scene_node has the component kind
+// we're looking for. Used by ByComponent mode.
+bool node_has_component(
+    const ossia::scene_node& n, SceneGraphFilter::Component which) noexcept
+{
+  if(!n.has_children())
+    return false;
+  for(const auto& p : *n.children)
+  {
+    switch(which)
+    {
+      case SceneGraphFilter::Mesh:
+        if(ossia::get_if<ossia::mesh_component_ptr>(&p))
+          return true;
+        break;
+      case SceneGraphFilter::Light:
+        if(ossia::get_if<ossia::light_component_ptr>(&p))
+          return true;
+        break;
+      case SceneGraphFilter::Camera:
+        if(ossia::get_if<ossia::camera_component_ptr>(&p))
+          return true;
+        break;
+      case SceneGraphFilter::Instance:
+        if(ossia::get_if<ossia::instance_component_ptr>(&p))
+          return true;
+        break;
+      case SceneGraphFilter::Skeleton:
+        if(ossia::get_if<ossia::skeleton_component_ptr>(&p))
+          return true;
+        break;
+    }
+  }
+  return false;
+}
+
+// Does this node match the current mode's predicate before `invert` is
+// applied? `path` is the slash-joined name chain from the root.
+bool node_matches(
+    const ossia::scene_node& n, std::string_view path,
+    const FilterCtx& ctx) noexcept
+{
+  switch(ctx.mode)
+  {
+    case SceneGraphFilter::PassThrough:
+      return true;
+    case SceneGraphFilter::VisibleOnly:
+      return n.visible;
+    case SceneGraphFilter::ByPath:
+      return any_match(ctx.paths, path);
+    case SceneGraphFilter::ByName:
+      return any_match(ctx.names, n.name);
+    case SceneGraphFilter::ByComponent:
+      return node_has_component(n, ctx.component);
+    case SceneGraphFilter::ByMaterialTag: {
+      // Check every mesh_component primitive's material tag against
+      // the pattern list. mesh_primitive holds a direct
+      // material_component_ptr — no index lookup into scene_state.materials.
+      if(!n.has_children())
+        return false;
+      for(const auto& p : *n.children)
+      {
+        const auto* mesh = ossia::get_if<ossia::mesh_component_ptr>(&p);
+        if(!mesh || !*mesh)
+          continue;
+        for(const auto& prim : (*mesh)->primitives)
+        {
+          if(prim.material
+             && any_match(ctx.material_tags, prim.material->tag))
+            return true;
+        }
+      }
+      return false;
+    }
+
+    case SceneGraphFilter::SetVisibility:
+      // SetVisibility uses the same predicate chain as ByName in the
+      // caller — this case is a hint to the walker, not a true filter.
+      // Fall through to "match everything" so the flag flip runs on
+      // every node. The real gating happens at the caller level using
+      // name-list matching.
+      return true;
+
+    // ─── Schema-field predicates (Tier 1 extension) ─────────────────
+    case SceneGraphFilter::ByAlphaMode: {
+      // Match when any primitive under this node has a material with
+      // the selected alphaMode. Per-primitive check because one
+      // scene_node can hold a mesh with multiple primitives using
+      // different alpha modes.
+      if(!n.has_children())
+        return false;
+      const auto want = static_cast<ossia::alpha_mode>(ctx.alpha_mode);
+      for(const auto& p : *n.children)
+      {
+        const auto* mesh = ossia::get_if<ossia::mesh_component_ptr>(&p);
+        if(!mesh || !*mesh)
+          continue;
+        for(const auto& prim : (*mesh)->primitives)
+        {
+          if(prim.material && prim.material->alpha == want)
+            return true;
+        }
+      }
+      return false;
+    }
+
+    case SceneGraphFilter::ByShadowCaster:
+    case SceneGraphFilter::ByReflectionCaster: {
+      // Read the selected bool flag from any of this node's materials.
+      // Matches when any primitive's material has the flag == caster_flag.
+      if(!n.has_children())
+        return false;
+      for(const auto& p : *n.children)
+      {
+        const auto* mesh = ossia::get_if<ossia::mesh_component_ptr>(&p);
+        if(!mesh || !*mesh)
+          continue;
+        for(const auto& prim : (*mesh)->primitives)
+        {
+          if(!prim.material)
+            continue;
+          const bool flag
+              = (ctx.mode == SceneGraphFilter::ByShadowCaster)
+                  ? prim.material->shadow_caster
+                  : prim.material->reflection_caster;
+          if(flag == ctx.caster_flag)
+            return true;
+        }
+      }
+      return false;
+    }
+
+    case SceneGraphFilter::ByPurpose:
+      return static_cast<uint8_t>(n.purpose)
+             == static_cast<uint8_t>(ctx.purpose);
+
+    case SceneGraphFilter::ByNodeProperty:
+    case SceneGraphFilter::ByMaterialProperty: {
+      if(ctx.prop_key.empty())
+        return false;
+      auto match_prop
+          = [&](const ossia::scene_property_map& props) -> bool {
+        auto it = props.find(std::string(ctx.prop_key));
+        if(it == props.end())
+          return false;
+        // Stringify the stored value for comparison. ossia::value is
+        // variant-typed; value_to_pretty_string covers int/float/
+        // string/bool/impulse uniformly.
+        const std::string lhs = ossia::value_to_pretty_string(it->second);
+        const std::string_view rhs = ctx.prop_value;
+        switch(ctx.prop_op)
+        {
+          case SceneGraphFilter::PropEqual:       return lhs == rhs;
+          case SceneGraphFilter::PropNotEqual:    return lhs != rhs;
+          case SceneGraphFilter::PropContains:    return lhs.find(rhs) != std::string::npos;
+          case SceneGraphFilter::PropLessThan:
+          case SceneGraphFilter::PropGreaterThan: {
+            // Numeric compare when both sides parse as float; fall
+            // back to lexicographic compare otherwise. Covers the
+            // common "alpha_cutoff > 0.5" case without a full DSL.
+            try
+            {
+              const double l = std::stod(lhs);
+              const double r = std::stod(std::string(rhs));
+              return ctx.prop_op == SceneGraphFilter::PropLessThan
+                         ? l < r : l > r;
+            }
+            catch(...)
+            {
+              return ctx.prop_op == SceneGraphFilter::PropLessThan
+                         ? lhs < rhs : lhs > rhs;
+            }
+          }
+        }
+        return false;
+      };
+
+      if(ctx.mode == SceneGraphFilter::ByNodeProperty)
+        return match_prop(n.properties);
+
+      // ByMaterialProperty — check every primitive's material.
+      if(!n.has_children())
+        return false;
+      for(const auto& p : *n.children)
+      {
+        const auto* mesh = ossia::get_if<ossia::mesh_component_ptr>(&p);
+        if(!mesh || !*mesh)
+          continue;
+        for(const auto& prim : (*mesh)->primitives)
+        {
+          if(prim.material && match_prop(prim.material->properties))
+            return true;
+        }
+      }
+      return false;
+    }
+  }
+  return true;
+}
+
+// ───── Tree walker ───────────────────────────────────────────────────
+// Recursively copy the subtree, dropping nodes whose (possibly
+// inverted) predicate says no. Subtrees with no match anywhere are
+// returned as the original shared_ptr (structural sharing).
+
+struct Walker
+{
+  const FilterCtx& ctx;
+
+  // Does `node` or any descendant match? Memoization would help here
+  // if the tree gets big; for now linear scan on each parent. glTF
+  // scenes are typically shallow enough that this is fine.
+  bool subtree_has_match(
+      const ossia::scene_node& n, std::string path) const noexcept
+  {
+    if(node_matches(n, path, ctx))
+      return true;
+    if(!n.has_children())
+      return false;
+    for(const auto& p : *n.children)
+    {
+      if(auto* sub = ossia::get_if<ossia::scene_node_ptr>(&p))
+      {
+        if(!*sub)
+          continue;
+        std::string childPath
+            = path + '/' + (*sub)->name;
+        if(subtree_has_match(**sub, std::move(childPath)))
+          return true;
+      }
+    }
+    return false;
+  }
+
+  // Returns the rewritten node, or nullptr if this node (and its
+  // entire subtree) should be dropped.
+  ossia::scene_node_ptr rewrite(
+      const ossia::scene_node_ptr& src, const std::string& path) const
+  {
+    if(!src)
+      return nullptr;
+
+    const bool self_matches = node_matches(*src, path, ctx);
+
+    // SetVisibility mode: don't drop anything, just toggle `visible`
+    // on matches. `invert` flips the sense: Invert=false → matches
+    // become hidden; Invert=true → matches become visible.
+    if(ctx.mode == SceneGraphFilter::SetVisibility)
+    {
+      const bool target_visible = ctx.invert;
+      const bool need_change
+          = self_matches && (src->visible != target_visible);
+
+      // Recurse so descendants can also toggle.
+      ossia::scene_node_ptr recursed_self = src;
+      if(src->has_children())
+      {
+        auto new_children
+            = std::make_shared<std::vector<ossia::scene_payload>>();
+        new_children->reserve(src->children->size());
+        bool child_changed = false;
+        for(const auto& payload : *src->children)
+        {
+          if(auto* sub = ossia::get_if<ossia::scene_node_ptr>(&payload))
+          {
+            if(!*sub)
+            {
+              new_children->push_back(payload);
+              continue;
+            }
+            std::string childPath = path + '/' + (*sub)->name;
+            auto rw = rewrite(*sub, childPath);
+            if(rw.get() != sub->get())
+              child_changed = true;
+            new_children->push_back(rw ? rw : *sub);
+          }
+          else
+          {
+            new_children->push_back(payload);
+          }
+        }
+        if(child_changed)
+        {
+          auto copy = std::make_shared<ossia::scene_node>(*src);
+          copy->children = std::move(new_children);
+          copy->dirty_index = src->dirty_index + 1;
+          recursed_self = copy;
+        }
+      }
+
+      if(need_change)
+      {
+        auto copy = std::make_shared<ossia::scene_node>(*recursed_self);
+        copy->visible = target_visible;
+        copy->dirty_index = recursed_self->dirty_index + 1;
+        return copy;
+      }
+      return recursed_self;
+    }
+
+    const bool keep_self = ctx.invert ? !self_matches : self_matches;
+
+    // In modes other than PassThrough: if this node doesn't match AND
+    // no descendant does, drop the whole subtree.
+    if(ctx.mode != SceneGraphFilter::PassThrough && !keep_self
+       && !subtree_has_match(*src, path))
+      return nullptr;
+
+    // If no filtering is active (mode 0) and we reach here, share.
+    if(ctx.mode == SceneGraphFilter::PassThrough)
+      return src;
+
+    // Recurse into children, rebuilding the payload list.
+    if(!src->has_children())
+      return keep_self ? src : nullptr;
+
+    auto new_children
+        = std::make_shared<std::vector<ossia::scene_payload>>();
+    new_children->reserve(src->children->size());
+    bool any_dropped = false;
+    for(const auto& payload : *src->children)
+    {
+      if(auto* sub = ossia::get_if<ossia::scene_node_ptr>(&payload))
+      {
+        if(!*sub)
+        {
+          new_children->push_back(payload);
+          continue;
+        }
+        std::string childPath = path + '/' + (*sub)->name;
+        auto rw = rewrite(*sub, childPath);
+        if(rw)
+          new_children->push_back(rw);
+        else
+          any_dropped = true;
+      }
+      else
+      {
+        // Non-scene_node payloads (meshes, lights, transforms, etc.)
+        // follow the node they're on: keep iff the node was kept.
+        if(keep_self)
+          new_children->push_back(payload);
+        else
+          any_dropped = true;
+      }
+    }
+
+    if(!keep_self && new_children->empty())
+      return nullptr; // nothing survived; drop the node wrapper too
+
+    // Share-if-unchanged: when no child was rewritten AND no child
+    // was dropped AND the node itself is kept, just return the
+    // original pointer.
+    if(!any_dropped && new_children->size() == src->children->size())
+    {
+      bool identical = true;
+      for(std::size_t i = 0; i < new_children->size(); ++i)
+      {
+        if(auto* a = ossia::get_if<ossia::scene_node_ptr>(&(*new_children)[i]))
+        {
+          auto* b = ossia::get_if<ossia::scene_node_ptr>(
+              &(*src->children)[i]);
+          if(!b || a->get() != b->get())
+          {
+            identical = false;
+            break;
+          }
+        }
+      }
+      if(identical)
+        return src;
+    }
+
+    auto copy = std::make_shared<ossia::scene_node>(*src);
+    copy->children = std::move(new_children);
+    copy->dirty_index = src->dirty_index + 1;
+    return copy;
+  }
+};
+
+} // namespace
+
+void SceneGraphFilter::rebuild()
+{
+  const auto& in = inputs.scene_in.scene;
+  if(!in.state)
+  {
+    m_cached_out.reset();
+    m_pending_dirty = 0;
+    return;
+  }
+
+  const auto* in_state = in.state.get();
+  const int64_t in_version = in.state->version;
+
+  // PassThrough is the free path.
+  if(inputs.mode.value == PassThrough)
+  {
+    m_cached_out = in.state;
+    m_cached_in_state = in_state;
+    m_cached_in_version = in_version;
+    m_pending_dirty = 0xFF;
+    return;
+  }
+
+  FilterCtx ctx{
+      .mode = Mode(inputs.mode.value),
+      .invert = inputs.invert.value,
+      .component = Component(inputs.component.value),
+      .paths = inputs.paths.value,
+      .names = inputs.names.value,
+      .material_tags = inputs.material_tags.value,
+      .state = in.state.get(),
+      .alpha_mode = AlphaMode(inputs.alpha_mode.value),
+      .purpose = Purpose(inputs.purpose.value),
+      .caster_flag = inputs.caster_flag.value,
+      .prop_key = inputs.prop_key.value,
+      .prop_op = PropertyOp(inputs.prop_op.value),
+      .prop_value = inputs.prop_value.value};
+
+  Walker w{ctx};
+  auto new_roots
+      = std::make_shared<std::vector<ossia::scene_node_ptr>>();
+  if(in.state->roots)
+  {
+    new_roots->reserve(in.state->roots->size());
+    for(const auto& r : *in.state->roots)
+    {
+      if(auto rw = w.rewrite(r, r ? ("/" + r->name) : std::string{}))
+        new_roots->push_back(std::move(rw));
+    }
+  }
+
+  auto new_state = std::make_shared<ossia::scene_state>(*in.state);
+  new_state->roots = std::move(new_roots);
+  new_state->version = ++m_version_counter;
+  new_state->dirty_index = in.state->dirty_index + 1;
+
+  m_cached_out = std::move(new_state);
+  m_cached_in_state = in_state;
+  m_cached_in_version = in_version;
+  m_pending_dirty = 0xFF;
+}
+
+void SceneGraphFilter::operator()()
+{
+  const auto* in_state = inputs.scene_in.scene.state.get();
+  const int64_t in_version = in_state ? in_state->version : -1;
+  const bool upstream_changed
+      = m_cached_in_state != in_state || m_cached_in_version != in_version;
+  if(upstream_changed || (!m_cached_out && in_state))
+    rebuild();
+  outputs.scene_out.scene.state = m_cached_out;
+  outputs.scene_out.dirty = m_pending_dirty;
+  m_pending_dirty = 0;
+}
+
+} // namespace Threedim
diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneGraphFilter.hpp b/src/plugins/score-plugin-threedim/Threedim/SceneGraphFilter.hpp
new file mode 100644
index 0000000000..c54de80f9f
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/SceneGraphFilter.hpp
@@ -0,0 +1,227 @@
+#pragma once
+#include <halp/controls.hpp>
+#include <halp/meta.hpp>
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace Threedim
+{
+
+// Scene-graph filter. Takes a scene in, emits a scene out whose node
+// tree has been culled/pruned by a predicate selected via `mode`. The
+// dropped nodes and their descendants are excluded from flattening
+// downstream.
+//
+// Predicates run against each scene_node during the walk. Subtrees
+// whose nodes all match are returned by shared_ptr identity (no
+// cloning) so downstream caches stay warm on untouched branches.
+//
+// Path syntax: slash-joined scene_node::name chain from roots, glob
+// wildcards (`*` matches anything except `/`, `**` matches across
+// slashes). Example: `/*/Wheels/**` includes everything under any
+// root whose first-level child is named "Wheels".
+class SceneGraphFilter
+{
+public:
+  halp_meta(name, "Scene Graph Filter")
+  halp_meta(category, "Visuals/3D/Scene")
+  halp_meta(c_name, "scene_graph_filter")
+  halp_meta(authors, "ossia team")
+  halp_meta(
+      manual_url,
+      "https://ossia.io/score-docs/processes/scene-graph-filter.html")
+  halp_meta(uuid, "3c7e9a5d-2f4b-4e6c-8b1a-0d5f7e3a9c8b")
+
+  enum Mode
+  {
+    PassThrough,
+    VisibleOnly,
+    ByPath,
+    ByName,
+    ByComponent,
+    ByMaterialTag,
+    // SetVisibility mode: matching nodes have their `visible` flag
+    // flipped to !Invert (Invert=false → hidden, Invert=true → shown).
+    // Non-matching nodes kept untouched. Unlike the filter modes above
+    // this DOESN'T drop nodes — they stay in the tree so downstream
+    // material / transform / light data is preserved, just
+    // render-invisible.
+    SetVisibility,
+
+    // Schema-field predicates. Operate on well-known
+    // material_component / scene_node fields — no string hashing,
+    // no glob. Each mode reads one field and compares against the
+    // inline control.
+    ByAlphaMode,        // material.alpha == (selected enum)
+    ByShadowCaster,     // material.shadow_caster == (selected bool)
+    ByReflectionCaster, // material.reflection_caster == (selected bool)
+    ByPurpose,          // scene_node.purpose == (selected enum)
+
+    // Property-dict predicates. Read scene_node::properties or
+    // material_component::properties by key and compare against a
+    // literal. Value type is inferred from the control (string/float/
+    // int). Useful for user-authored metadata — USD extra attributes,
+    // glTF material.extras JSON, custom layer tags.
+    ByNodeProperty,     // scene_node.properties[key] matches value
+    ByMaterialProperty  // material.properties[key] matches value
+  };
+
+  enum Component
+  {
+    Mesh,
+    Light,
+    Camera,
+    Instance,
+    Skeleton
+  };
+
+  enum AlphaMode
+  {
+    AlphaOpaque = 0,
+    AlphaMask   = 1,
+    AlphaBlend  = 2
+  };
+
+  enum Purpose
+  {
+    PurposeDefault = 0,
+    PurposeRender  = 1,
+    PurposeProxy   = 2,
+    PurposeGuide   = 3
+  };
+
+  // Operator for property matches — extends beyond string-glob to
+  // support numeric thresholds without a full predicate-DSL rollout.
+  enum PropertyOp
+  {
+    PropEqual,
+    PropNotEqual,
+    PropLessThan,
+    PropGreaterThan,
+    PropContains  // substring match when value is string
+  };
+
+  struct ins
+  {
+    struct
+    {
+      halp_meta(name, "Scene In");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_in;
+
+    // Port-driven rebuild: controls trigger rebuild() via update();
+    // upstream scene_in changes detected in operator()().
+    struct : halp::combobox_t<"Mode", Mode>
+    {
+      struct range
+      {
+        std::string_view values[13]{
+            "Pass through", "Visible only", "By path", "By name",
+            "By component", "By material tag", "Set visibility",
+            "By alpha mode", "By shadow caster", "By reflection caster",
+            "By purpose", "By node property", "By material property"};
+        int init{0};
+      };
+      void update(SceneGraphFilter& n) { n.rebuild(); }
+    } mode;
+
+    // When true: drop nodes that match the predicate (the list acts
+    // as an exclude filter). When false (default): keep matching
+    // nodes, drop the rest.
+    struct : halp::toggle<"Invert">
+    { void update(SceneGraphFilter& n) { n.rebuild(); } } invert;
+
+    // List inlets — user edits inline in the inspector. A halp
+    // `val_port<vector<string>>` renders an editable N-row widget.
+    // Each mode uses the relevant list; others are ignored.
+    struct : halp::val_port<"Paths", std::vector<std::string>>
+    { void update(SceneGraphFilter& n) { n.rebuild(); } } paths;
+    struct : halp::val_port<"Names", std::vector<std::string>>
+    { void update(SceneGraphFilter& n) { n.rebuild(); } } names;
+    struct : halp::val_port<"Material tags", std::vector<std::string>>
+    { void update(SceneGraphFilter& n) { n.rebuild(); } } material_tags;
+
+    struct : halp::combobox_t<"Component", Component>
+    {
+      struct range
+      {
+        std::string_view values[5]{
+            "Mesh", "Light", "Camera", "Instance", "Skeleton"};
+        int init{0};
+      };
+      void update(SceneGraphFilter& n) { n.rebuild(); }
+    } component;
+
+    // Schema-field selectors. Unused in most modes; each dropdown is
+    // read only by its corresponding Mode.
+    struct : halp::combobox_t<"Alpha mode", AlphaMode>
+    {
+      struct range
+      { std::string_view values[3]{"Opaque", "Mask", "Blend"}; int init{0}; };
+      void update(SceneGraphFilter& n) { n.rebuild(); }
+    } alpha_mode;
+
+    struct : halp::combobox_t<"Purpose", Purpose>
+    {
+      struct range
+      {
+        std::string_view values[4]{"Default", "Render", "Proxy", "Guide"};
+        int init{0};
+      };
+      void update(SceneGraphFilter& n) { n.rebuild(); }
+    } purpose;
+
+    struct : halp::toggle<"Caster flag">
+    { void update(SceneGraphFilter& n) { n.rebuild(); } } caster_flag;
+
+    // Property-match inputs (ByNodeProperty / ByMaterialProperty).
+    // Key + operator + literal; value parsed as float when numeric,
+    // string otherwise. Missing keys never match (predicate false).
+    struct : halp::val_port<"Property key", std::string>
+    { void update(SceneGraphFilter& n) { n.rebuild(); } } prop_key;
+
+    struct : halp::combobox_t<"Property op", PropertyOp>
+    {
+      struct range
+      {
+        std::string_view values[5]{
+            "equal", "not equal", "less than", "greater than",
+            "contains (string)"};
+        int init{0};
+      };
+      void update(SceneGraphFilter& n) { n.rebuild(); }
+    } prop_op;
+
+    struct : halp::val_port<"Property value", std::string>
+    { void update(SceneGraphFilter& n) { n.rebuild(); } } prop_value;
+  } inputs;
+
+  struct outs
+  {
+    struct
+    {
+      halp_meta(name, "Scene Out");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_out;
+  } outputs;
+
+  void rebuild();
+  void operator()();
+
+  // Cache the last emitted scene_state so unchanged inputs don't churn
+  // downstream identity caches.
+  std::shared_ptr<const ossia::scene_state> m_cached_out;
+  uint8_t m_pending_dirty{0xFF};
+  const ossia::scene_state* m_cached_in_state{};
+  int64_t m_cached_in_version{-1};
+  int64_t m_version_counter{0};
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneGroup.cpp b/src/plugins/score-plugin-threedim/Threedim/SceneGroup.cpp
new file mode 100644
index 0000000000..972edf985a
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/SceneGroup.cpp
@@ -0,0 +1,235 @@
+#include "SceneGroup.hpp"
+
+#include <Gfx/Graph/RenderList.hpp>
+#include <Gfx/Graph/SceneGPUState.hpp>
+
+#include <ossia/detail/ptr_set.hpp>
+
+#include <algorithm>
+#include <cstring>
+
+namespace Threedim
+{
+
+namespace
+{
+// Concatenate a shared vector from two nullable inputs while deduping
+// by shared_ptr identity. Reuses the lone non-null input's shared_ptr
+// when only one contributes — the same identity-preserving passthrough
+// merge_scenes does. When both contribute, an entry from `b` is dropped
+// when its underlying object pointer already appeared in `a`. This is
+// the SceneGroup safety net for users who wire the same upstream to
+// more than one of the four input slots: each slot would otherwise
+// contribute the same component vectors and the downstream visitor
+// would walk every cloud / mesh / light N times.
+template <typename T>
+std::shared_ptr<const std::vector<T>> mergeSharedVec(
+    const std::shared_ptr<const std::vector<T>>& a,
+    const std::shared_ptr<const std::vector<T>>& b)
+{
+  if(!a || a->empty())
+    return b;
+  if(!b || b->empty())
+    return a;
+  // Same shared_ptr-vector instance on both sides: nothing to dedup,
+  // return one copy. Cheaper than building a fresh vector + ptr_set.
+  if(a == b)
+    return a;
+  auto merged = std::make_shared<std::vector<T>>();
+  merged->reserve(a->size() + b->size());
+  ossia::ptr_set<const typename T::element_type*> seen;
+  for(const auto& x : *a)
+  {
+    if(x && seen.insert(x.get()).second)
+      merged->push_back(x);
+  }
+  for(const auto& x : *b)
+  {
+    if(x && seen.insert(x.get()).second)
+      merged->push_back(x);
+  }
+  return merged;
+}
+} // namespace
+
+void SceneGroup::rebuild()
+{
+  const ossia::scene_spec* inputs_list[4] = {
+      &inputs.scene0.scene, &inputs.scene1.scene,
+      &inputs.scene2.scene, &inputs.scene3.scene};
+
+  // Refresh upstream identity cache (used by operator()() to detect
+  // changes) and TRS / name caches.
+  for(int i = 0; i < 4; ++i)
+  {
+    const ossia::scene_state* s = inputs_list[i]->state.get();
+    int64_t v = s ? s->version : -1;
+    m_cached_in[i] = s;
+    m_cached_ver[i] = v;
+  }
+  // Collect roots from all non-empty inputs; also concat materials /
+  // animations / cameras / skeletons additively. Dedup roots by
+  // shared_ptr identity — wiring the same upstream into more than one
+  // SceneGroup input slot is a common authoring shape (especially when
+  // a user re-uses an AssetLoader output to position the same asset in
+  // multiple slots), and without this the same scene_node would land
+  // in the parent's children list four times. The downstream
+  // ScenePreprocessor visitor would then walk it four times and emit
+  // four cloud-bucket entries, quadrupling the GPU upload of every
+  // primitive_cloud / mesh / light reachable through that root.
+  auto merged_roots
+      = std::make_shared<std::vector<ossia::scene_node_ptr>>();
+  ossia::ptr_set<const ossia::scene_node*> seen_roots;
+  std::shared_ptr<const std::vector<ossia::material_component_ptr>> mats;
+  std::shared_ptr<const std::vector<ossia::animation_component_ptr>> anims;
+  std::shared_ptr<const std::vector<ossia::camera_component_ptr>> cams;
+  std::shared_ptr<const std::vector<ossia::skeleton_component_ptr>> skels;
+  ossia::scene_environment env{};
+  ossia::scene_node_id active_cam{};
+
+  for(int i = 0; i < 4; ++i)
+  {
+    const auto& s = inputs_list[i]->state;
+    if(!s)
+      continue;
+    if(s->roots)
+      for(const auto& r : *s->roots)
+        if(r && seen_roots.insert(r.get()).second)
+          merged_roots->push_back(r);
+    mats = mergeSharedVec(mats, s->materials);
+    anims = mergeSharedVec(anims, s->animations);
+    cams = mergeSharedVec(cams, s->cameras);
+    skels = mergeSharedVec(skels, s->skeletons);
+    // First contributor's environment + active_camera wins.
+    if(i == 0 || !env.skybox_texture.native_handle)
+      env = s->environment;
+    if(active_cam.value == 0 && s->active_camera_id.value != 0)
+      active_cam = s->active_camera_id;
+  }
+
+  // Build the wrapping parent node.
+  ossia::scene_transform xform;
+  xform.translation[0] = inputs.position.value.x;
+  xform.translation[1] = inputs.position.value.y;
+  xform.translation[2] = inputs.position.value.z;
+  auto q = QQuaternion::fromEulerAngles(
+      inputs.rotation.value.x, inputs.rotation.value.y,
+      inputs.rotation.value.z);
+  xform.rotation[0] = q.x();
+  xform.rotation[1] = q.y();
+  xform.rotation[2] = q.z();
+  xform.rotation[3] = q.scalar();
+  xform.scale[0] = inputs.scale.value.x;
+  xform.scale[1] = inputs.scale.value.y;
+  xform.scale[2] = inputs.scale.value.z;
+  xform.raw_slot = m_xform_ref;
+
+  auto children
+      = std::make_shared<std::vector<ossia::scene_payload>>();
+  children->reserve(merged_roots->size() + 1);
+  children->push_back(xform);
+  for(auto& r : *merged_roots)
+    children->push_back(r);
+
+  auto parent = std::make_shared<ossia::scene_node>();
+  parent->name
+      = inputs.name.value.empty() ? std::string{"Group"}
+                                  : inputs.name.value;
+  parent->children = std::move(children);
+
+  auto roots = std::make_shared<std::vector<ossia::scene_node_ptr>>();
+  roots->push_back(std::move(parent));
+
+  auto state = std::make_shared<ossia::scene_state>();
+  state->roots = std::move(roots);
+  state->materials = std::move(mats);
+  state->animations = std::move(anims);
+  state->cameras = std::move(cams);
+  state->skeletons = std::move(skels);
+  state->environment = std::move(env);
+  state->active_camera_id = active_cam;
+  state->version = ++m_version_counter;
+  state->dirty_index = 1;
+
+  m_cached_out = std::move(state);
+  m_pending_dirty = 0xFF;
+}
+
+void SceneGroup::operator()()
+{
+  // Detect upstream scene inputs + republish cached. Control changes
+  // come through their update() callbacks.
+  const ossia::scene_spec* inputs_list[4] = {
+      &inputs.scene0.scene, &inputs.scene1.scene,
+      &inputs.scene2.scene, &inputs.scene3.scene};
+  bool upstream_changed = false;
+  for(int i = 0; i < 4; ++i)
+  {
+    const auto* s = inputs_list[i]->state.get();
+    const int64_t v = s ? s->version : -1;
+    if(m_cached_in[i] != s || m_cached_ver[i] != v)
+      upstream_changed = true;
+  }
+  if(!m_cached_out || upstream_changed)
+    rebuild();
+  outputs.scene_out.scene.state = m_cached_out;
+  outputs.scene_out.dirty = m_pending_dirty;
+  m_pending_dirty = 0;
+}
+
+// Order invariant: called by GfxRenderer::initState BEFORE the first
+// operator()() and BEFORE processControlIn fires any rebuild() callback.
+// m_xform_ref populated here is therefore safe to read in rebuild()
+// without a guard. Adding prepare() to this node breaks the invariant —
+// see CpuFilterNode.hpp for details.
+void SceneGroup::init(score::gfx::RenderList& r, QRhiResourceUpdateBatch& res)
+{
+  if(!raw_transform_slot.valid())
+  {
+    raw_transform_slot = r.registry().allocate(
+        score::gfx::GpuResourceRegistry::Arena::RawTransform,
+        sizeof(score::gfx::RawLocalTransform));
+    m_xform_ref = r.registry().toOssiaRef(raw_transform_slot);
+  }
+  if(raw_transform_slot.valid())
+  {
+    score::gfx::RawLocalTransform seed{};
+    r.registry().updateSlot(res, raw_transform_slot, &seed, sizeof(seed));
+  }
+}
+
+void SceneGroup::update(
+    score::gfx::RenderList& r, QRhiResourceUpdateBatch& res, score::gfx::Edge*)
+{
+  if(!raw_transform_slot.valid())
+    return;
+
+  score::gfx::RawLocalTransform xform{};
+  xform.translation[0] = inputs.position.value.x;
+  xform.translation[1] = inputs.position.value.y;
+  xform.translation[2] = inputs.position.value.z;
+  QQuaternion q = QQuaternion::fromEulerAngles(
+      inputs.rotation.value.x, inputs.rotation.value.y,
+      inputs.rotation.value.z);
+  xform.rotation[0] = q.x();
+  xform.rotation[1] = q.y();
+  xform.rotation[2] = q.z();
+  xform.rotation[3] = q.scalar();
+  xform.scale[0] = inputs.scale.value.x;
+  xform.scale[1] = inputs.scale.value.y;
+  xform.scale[2] = inputs.scale.value.z;
+  r.registry().updateSlot(res, raw_transform_slot, &xform, sizeof(xform));
+}
+
+void SceneGroup::release(score::gfx::RenderList& r)
+{
+  if(raw_transform_slot.valid())
+    r.registry().free(raw_transform_slot);
+  m_xform_ref = {};
+  // Producer-state-drift Option A — see Light::release.
+  m_cached_out.reset();
+  for(auto& in : m_cached_in)
+    in = nullptr;
+}
+
+} // namespace Threedim
diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneGroup.hpp b/src/plugins/score-plugin-threedim/Threedim/SceneGroup.hpp
new file mode 100644
index 0000000000..d49de27fb4
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/SceneGroup.hpp
@@ -0,0 +1,93 @@
+#pragma once
+#include <Threedim/TinyObj.hpp>
+#include <halp/controls.hpp>
+#include <halp/meta.hpp>
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <Gfx/Graph/GpuResourceRegistry.hpp>
+
+#include <QQuaternion>
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+class QRhiResourceUpdateBatch;
+
+namespace score::gfx
+{
+class RenderList;
+struct Edge;
+}
+
+namespace Threedim
+{
+
+// Wraps up to 4 scene inputs under a single named parent scene_node
+// with its own TRS. The group name becomes addressable by downstream
+// filters / overrides via the usual path pattern — so
+// `SceneGroup(name="ProsceniumSet")` +
+// `SceneGraphFilter(paths=["/ProsceniumSet/**"])` is the canonical
+// "bundle and tag a subset" pattern.
+//
+// Materials / animations / cameras / skeletons / environment are
+// merged additively from all inputs (first-wins on singletons like
+// active_camera_id and environment), same convention as
+// MergeGeometries / merge_scenes.
+class SceneGroup
+{
+public:
+  halp_meta(name, "Scene Group")
+  halp_meta(category, "Visuals/3D/Scene")
+  halp_meta(c_name, "scene_group")
+  halp_meta(authors, "ossia team")
+  halp_meta(
+      manual_url,
+      "https://ossia.io/score-docs/processes/scene-group.html")
+  halp_meta(uuid, "8a3b5e2d-7c4f-4b9e-9d1a-6f8e2c5d3a7b")
+
+  struct ins
+  {
+    struct { halp_meta(name, "Scene 0"); ossia::scene_spec scene; uint8_t dirty{0}; } scene0;
+    struct { halp_meta(name, "Scene 1"); ossia::scene_spec scene; uint8_t dirty{0}; } scene1;
+    struct { halp_meta(name, "Scene 2"); ossia::scene_spec scene; uint8_t dirty{0}; } scene2;
+    struct { halp_meta(name, "Scene 3"); ossia::scene_spec scene; uint8_t dirty{0}; } scene3;
+
+    // Port-driven rebuild: controls trigger rebuild(); upstream scene
+    // inputs detected in operator()().
+    struct : halp::lineedit<"Name", "">
+    { void update(SceneGroup& n) { n.rebuild(); } } name;
+    struct : PositionControl
+    { void update(SceneGroup& n) { n.rebuild(); } } position;
+    struct : RotationControl
+    { void update(SceneGroup& n) { n.rebuild(); } } rotation;
+    struct : ScaleControl
+    { void update(SceneGroup& n) { n.rebuild(); } } scale;
+  } inputs;
+
+  struct outs
+  {
+    struct { halp_meta(name, "Scene Out"); ossia::scene_spec scene; uint8_t dirty{0}; } scene_out;
+  } outputs;
+
+  void rebuild();
+  void operator()();
+
+  void init(score::gfx::RenderList& r, QRhiResourceUpdateBatch& res);
+  void update(
+      score::gfx::RenderList& r, QRhiResourceUpdateBatch& res,
+      score::gfx::Edge* e);
+  void release(score::gfx::RenderList& r);
+
+  std::shared_ptr<const ossia::scene_state> m_cached_out;
+  uint8_t m_pending_dirty{0xFF};
+  const ossia::scene_state* m_cached_in[4]{};
+  int64_t m_cached_ver[4]{-1, -1, -1, -1};
+  int64_t m_version_counter{0};
+
+  score::gfx::GpuResourceRegistry::Slot raw_transform_slot;
+  ossia::gpu_slot_ref m_xform_ref{};
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneInspector.cpp b/src/plugins/score-plugin-threedim/Threedim/SceneInspector.cpp
new file mode 100644
index 0000000000..04355ca1e4
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/SceneInspector.cpp
@@ -0,0 +1,395 @@
+#include "SceneInspector.hpp"
+
+#include <fmt/format.h>
+
+#include <algorithm>
+
+namespace Threedim
+{
+
+namespace
+{
+
+struct ComponentFlags
+{
+  bool mesh{false};
+  bool light{false};
+  bool camera{false};
+  bool skeleton{false};
+  bool instance{false};
+  bool transform{false};
+  std::string material_tag;  // First mesh primitive's material tag (or empty)
+  int vertex_count{0};       // Summed across all mesh primitives
+  int triangle_count{0};
+
+  // Space-separated compact tag string, e.g. "[mesh][trans][mat=3][v=1024 t=512]"
+  std::string tags(bool show_components, bool show_stats) const
+  {
+    std::string out;
+    if(show_components)
+    {
+      if(mesh)      out += "[mesh]";
+      if(light)     out += "[light]";
+      if(camera)    out += "[cam]";
+      if(skeleton)  out += "[skel]";
+      if(instance)  out += "[inst]";
+      if(transform) out += "[trans]";
+    }
+    if(show_stats)
+    {
+      if(mesh && (vertex_count > 0 || triangle_count > 0))
+        out += fmt::format("[v={} t={}]", vertex_count, triangle_count);
+      if(mesh && !material_tag.empty())
+        out += fmt::format("[mat={}]", material_tag);
+    }
+    return out;
+  }
+};
+
+// Scan this node's DIRECT children for non-scene-node payloads and
+// record which kinds appear. Meshes additionally contribute their
+// vertex/triangle counts for the stats output.
+ComponentFlags detectComponents(const ossia::scene_node& node) noexcept
+{
+  ComponentFlags f;
+  if(!node.has_children())
+    return f;
+  for(const auto& payload : *node.children)
+  {
+    if(auto* m = ossia::get_if<ossia::mesh_component_ptr>(&payload))
+    {
+      if(*m)
+      {
+        f.mesh = true;
+        for(const auto& prim : (*m)->primitives)
+        {
+          f.vertex_count += int(prim.vertex_count);
+          // Source primitive count for this topology. index_count == 0
+          // means non-indexed; fall back to vertex_count.
+          const int ic = int(prim.index_count);
+          const int n = (ic > 0 ? ic : int(prim.vertex_count));
+          switch(prim.topology)
+          {
+            using T = ossia::primitive_topology;
+            case T::points:
+              f.triangle_count += n;
+              break;
+            case T::lines:
+              f.triangle_count += n / 2;
+              break;
+            case T::line_strip:
+              f.triangle_count += std::max(0, n - 1);
+              break;
+            case T::triangles:
+              f.triangle_count += n / 3;
+              break;
+            case T::triangle_strip:
+            case T::triangle_fan:
+              f.triangle_count += std::max(0, n - 2);
+              break;
+            case T::patches:
+            case T::meshlets:
+              // Not a "primitive count" in the user sense; skip.
+              break;
+          }
+          if(f.material_tag.empty() && prim.material)
+            f.material_tag = prim.material->tag;
+        }
+      }
+    }
+    else if(ossia::get_if<ossia::light_component_ptr>(&payload))
+      f.light = true;
+    else if(ossia::get_if<ossia::camera_component_ptr>(&payload))
+      f.camera = true;
+    else if(ossia::get_if<ossia::skeleton_component_ptr>(&payload))
+      f.skeleton = true;
+    else if(ossia::get_if<ossia::instance_component_ptr>(&payload))
+      f.instance = true;
+    else if(ossia::get_if<ossia::scene_transform>(&payload))
+      f.transform = true;
+  }
+  return f;
+}
+
+// ───── Walker ────────────────────────────────────────────────────────
+//
+// Accumulates rows + readable + running stats. Called recursively on
+// the scene_node subtree; handles Paths/Names/Tree modes inline so the
+// tree glyphs are emitted at the right place (only Tree mode uses
+// indentation prefixes, Paths and Names emit flat rows).
+
+struct State
+{
+  SceneInspector::Mode mode;
+  bool show_components;
+  bool show_stats;
+  bool include_hidden;
+  int max_depth; // -1 = unlimited
+
+  std::vector<std::string>* rows;
+  std::string* readable;
+
+  // Running stats.
+  int node_count{0};
+  int mesh_count{0};
+  int light_count{0};
+  int camera_count{0};
+  int total_vertices{0};
+  int total_triangles{0};
+};
+
+// Emit a single row for `node` in the current mode. `path` is the
+// canonical slash-path from the root; `tree_prefix` is the box-drawing
+// indentation used only in Tree mode (e.g., "│  ├── ").
+void emitRow(
+    State& s, const ossia::scene_node& node, const std::string& path,
+    const std::string& tree_prefix)
+{
+  auto comp = detectComponents(node);
+
+  // Update running stats.
+  s.node_count++;
+  if(comp.mesh)
+    s.mesh_count++;
+  if(comp.light)
+    s.light_count++;
+  if(comp.camera)
+    s.camera_count++;
+  s.total_vertices += comp.vertex_count;
+  s.total_triangles += comp.triangle_count;
+
+  const std::string tag_suffix = comp.tags(s.show_components, s.show_stats);
+  const char* hidden_suffix = (!node.visible) ? "[hidden]"
+                              : (!node.active) ? "[inactive]"
+                                               : "";
+
+  std::string row;
+  switch(s.mode)
+  {
+    case SceneInspector::Paths:
+      row = path.empty() ? std::string("/") : path;
+      if(!tag_suffix.empty())
+      {
+        row += ' ';
+        row += tag_suffix;
+      }
+      if(*hidden_suffix)
+      {
+        row += ' ';
+        row += hidden_suffix;
+      }
+      break;
+    case SceneInspector::Names:
+      row = node.name.empty() ? std::string{"(unnamed)"} : node.name;
+      if(!tag_suffix.empty())
+      {
+        row += ' ';
+        row += tag_suffix;
+      }
+      if(*hidden_suffix)
+      {
+        row += ' ';
+        row += hidden_suffix;
+      }
+      break;
+    case SceneInspector::Tree:
+      row = tree_prefix;
+      row += node.name.empty() ? std::string{"(unnamed)"} : node.name;
+      if(!tag_suffix.empty())
+      {
+        row += ' ';
+        row += tag_suffix;
+      }
+      if(*hidden_suffix)
+      {
+        row += ' ';
+        row += hidden_suffix;
+      }
+      break;
+    case SceneInspector::Summary:
+      // Summary mode emits roots only at top level; leaves handled by
+      // the outer walker. Skip here.
+      return;
+  }
+
+  s.rows->push_back(row);
+  *s.readable += row;
+  *s.readable += '\n';
+}
+
+// Depth-first walk. `depth` is 0 at the root. `prefix_trunk` is the
+// continuation prefix inherited from ancestors ("│  " for "still more
+// siblings on that ancestor", "   " for "ancestor was last child").
+// `is_last_child` is whether this node is its parent's last child —
+// controls the ├── vs └── glyph.
+void walk(
+    State& s, const ossia::scene_node_ptr& node, const std::string& path,
+    const std::string& prefix_trunk, bool is_last_child, int depth)
+{
+  if(!node)
+    return;
+  if(!s.include_hidden && (!node->active || !node->visible))
+    return;
+  if(s.max_depth >= 0 && depth > s.max_depth)
+    return;
+
+  // Tree-mode glyph for this node.
+  std::string tree_prefix;
+  if(s.mode == SceneInspector::Tree && depth > 0)
+    tree_prefix = prefix_trunk + (is_last_child ? "└── " : "├── ");
+  // depth == 0 (root) gets no glyph — it stands alone.
+
+  emitRow(s, *node, path, tree_prefix);
+
+  if(!node->has_children())
+    return;
+
+  // Collect the subset of children that are scene_node_ptrs (we only
+  // recurse into those; scene_transform / component payloads have
+  // already been folded into the parent's row via detectComponents).
+  std::vector<const ossia::scene_node_ptr*> child_nodes;
+  child_nodes.reserve(node->children->size());
+  for(const auto& p : *node->children)
+  {
+    if(auto* sub = ossia::get_if<ossia::scene_node_ptr>(&p))
+      if(*sub)
+        child_nodes.push_back(sub);
+  }
+
+  const std::string next_trunk = (depth == 0)
+      ? std::string{}
+      : (prefix_trunk + (is_last_child ? "    " : "│   "));
+
+  for(std::size_t i = 0; i < child_nodes.size(); ++i)
+  {
+    const bool last = (i + 1 == child_nodes.size());
+    const auto& sub = *child_nodes[i];
+    std::string childPath = path + '/' + sub->name;
+    walk(s, sub, childPath, next_trunk, last, depth + 1);
+  }
+}
+
+} // namespace
+
+void SceneInspector::operator()()
+{
+  const auto& in = inputs.scene_in.scene;
+  const ossia::scene_state* in_state = in.state.get();
+  const int64_t in_version = in_state ? in_state->version : -1;
+
+  const bool unchanged
+      = m_cached_valid && m_cached_in_state == in_state
+        && m_cached_in_version == in_version
+        && m_cached_mode == inputs.mode.value
+        && m_cached_show_components == inputs.show_components.value
+        && m_cached_show_stats == inputs.show_stats.value
+        && m_cached_include_hidden == inputs.include_hidden.value
+        && m_cached_max_depth == inputs.max_depth.value;
+  if(unchanged)
+    return;
+
+  m_cached_in_state = in_state;
+  m_cached_in_version = in_version;
+  m_cached_mode = inputs.mode.value;
+  m_cached_show_components = inputs.show_components.value;
+  m_cached_show_stats = inputs.show_stats.value;
+  m_cached_include_hidden = inputs.include_hidden.value;
+  m_cached_max_depth = inputs.max_depth.value;
+  m_cached_valid = true;
+
+  auto& rows = outputs.rows.value;
+  auto& readable = outputs.readable.value;
+  rows.clear();
+  readable.clear();
+
+  outputs.node_count.value = 0;
+  outputs.mesh_count.value = 0;
+  outputs.light_count.value = 0;
+  outputs.camera_count.value = 0;
+  outputs.material_count.value = 0;
+  outputs.total_vertices.value = 0;
+  outputs.total_triangles.value = 0;
+
+  if(!in_state)
+  {
+    rows.push_back("(empty scene)");
+    readable = "(empty scene)\n";
+    return;
+  }
+
+  // Material count comes straight from the state.
+  outputs.material_count.value
+      = in_state->materials ? int(in_state->materials->size()) : 0;
+
+  State s{
+      Mode(inputs.mode.value),
+      inputs.show_components.value,
+      inputs.show_stats.value,
+      inputs.include_hidden.value,
+      inputs.max_depth.value,
+      &rows,
+      &readable,
+      0, 0, 0, 0, 0, 0};
+
+  if(inputs.mode.value == Summary)
+  {
+    // Summary: one block per root with aggregate stats, plus a global
+    // materials section + active camera if set.
+    if(in_state->roots)
+    {
+      fmt::format_to(
+          std::back_inserter(readable), "Scene: {} root(s)\n",
+          in_state->roots->size());
+      for(const auto& r : *in_state->roots)
+      {
+        if(!r)
+          continue;
+        State local = s;
+        local.rows = &rows;
+        local.readable = &readable;
+        walk(local, r, "/" + r->name, std::string{}, true, 0);
+        s.node_count += local.node_count;
+        s.mesh_count += local.mesh_count;
+        s.light_count += local.light_count;
+        s.camera_count += local.camera_count;
+        s.total_vertices += local.total_vertices;
+        s.total_triangles += local.total_triangles;
+      }
+    }
+    std::string hdr = fmt::format(
+        "== Scene Summary ==\n"
+        "  nodes:     {}\n"
+        "  meshes:    {}\n"
+        "  lights:    {}\n"
+        "  cameras:   {}\n"
+        "  materials: {}\n"
+        "  vertices:  {}\n"
+        "  triangles: {}\n",
+        s.node_count, s.mesh_count, s.light_count, s.camera_count,
+        outputs.material_count.value, s.total_vertices, s.total_triangles);
+    readable.insert(0, hdr);
+    rows.insert(rows.begin(), std::move(hdr));
+  }
+  else
+  {
+    if(in_state->roots)
+    {
+      for(const auto& r : *in_state->roots)
+      {
+        if(!r)
+          continue;
+        const std::string rootPath = "/" + r->name;
+        walk(s, r, rootPath, std::string{}, true, 0);
+      }
+    }
+  }
+
+  outputs.node_count.value = s.node_count;
+  outputs.mesh_count.value = s.mesh_count;
+  outputs.light_count.value = s.light_count;
+  outputs.camera_count.value = s.camera_count;
+  outputs.total_vertices.value = s.total_vertices;
+  outputs.total_triangles.value = s.total_triangles;
+}
+
+} // namespace Threedim
diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneInspector.hpp b/src/plugins/score-plugin-threedim/Threedim/SceneInspector.hpp
new file mode 100644
index 0000000000..98a5e883c4
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/SceneInspector.hpp
@@ -0,0 +1,105 @@
+#pragma once
+#include <halp/controls.hpp>
+#include <halp/meta.hpp>
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <cstdint>
+#include <string>
+#include <vector>
+
+namespace Threedim
+{
+
+// Read-only introspection node for scene_spec. Walks the incoming
+// scene tree and emits:
+//   - `Rows`: a list of strings, one per node. In Paths mode each row
+//     is a canonical slash-path (`/Root/Body/Wheels`) you can copy
+//     directly into SceneGraphFilter(paths=...) / ConfigurePrimitive /
+//     SceneSelector. In Tree mode each row is indented with
+//     box-drawing glyphs for visual hierarchy. In Names mode each row
+//     is a bare node name.
+//   - `Readable`: a formatted multi-line dump of the same information,
+//     suitable to pipe into Ui::TextBox for a wider-view inspector.
+//   - Scalar stats: node / mesh / light / camera / material counts
+//     plus totalled triangle and vertex counts.
+//
+// Bridges the "what paths exist in this scene?" question that was
+// previously unanswerable from the user's side — filter/selector
+// nodes need string patterns, and without a way to enumerate the
+// tree the user has to guess. Drop this node between a loader and a
+// filter, read the Rows list, paste the path you want into the
+// downstream node.
+class SceneInspector
+{
+public:
+  halp_meta(name, "Scene Inspector")
+  halp_meta(category, "Visuals/3D/Scene")
+  halp_meta(c_name, "scene_inspector")
+  halp_meta(authors, "ossia team")
+  halp_meta(
+      manual_url,
+      "https://ossia.io/score-docs/processes/scene-inspector.html")
+  halp_meta(uuid, "b5f2c8a3-4d1e-4b7f-9e6c-3a8d5f0b2c9e")
+
+  enum Mode
+  {
+    Paths,     // canonical slash-paths, directly copy-pasteable
+    Names,     // bare node names (may have duplicates)
+    Tree,      // indented with ├──/└── glyphs
+    Summary    // high-level per-root summary + counts
+  };
+
+  struct ins
+  {
+    struct
+    {
+      halp_meta(name, "Scene In");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_in;
+
+    struct : halp::combobox_t<"Mode", Mode>
+    {
+      struct range
+      {
+        std::string_view values[4]{"Paths", "Names", "Tree", "Summary"};
+        int init{0};
+      };
+    } mode;
+
+    halp::toggle<"Show components"> show_components;
+    halp::toggle<"Show stats"> show_stats;
+    halp::toggle<"Include hidden"> include_hidden;
+    halp::spinbox_i32<"Max depth", halp::irange{-1, 64, -1}> max_depth;
+  } inputs;
+
+  struct outs
+  {
+    halp::val_port<"Rows", std::vector<std::string>> rows;
+    halp::val_port<"Readable", std::string> readable;
+
+    halp::val_port<"Node count", int> node_count;
+    halp::val_port<"Mesh count", int> mesh_count;
+    halp::val_port<"Light count", int> light_count;
+    halp::val_port<"Camera count", int> camera_count;
+    halp::val_port<"Material count", int> material_count;
+    halp::val_port<"Total triangles", int> total_triangles;
+    halp::val_port<"Total vertices", int> total_vertices;
+  } outputs;
+
+  void operator()();
+
+  // Identity + version cache: if inputs haven't changed we skip the
+  // whole walk. Matches the pattern used by SceneGraphFilter etc.
+  const ossia::scene_state* m_cached_in_state{};
+  int64_t m_cached_in_version{-1};
+  int m_cached_mode{-1};
+  bool m_cached_show_components{false};
+  bool m_cached_show_stats{false};
+  bool m_cached_include_hidden{false};
+  int m_cached_max_depth{-2};
+  bool m_cached_valid{false};
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/ScenePreprocessor/Executor.cpp b/src/plugins/score-plugin-threedim/Threedim/ScenePreprocessor/Executor.cpp
new file mode 100644
index 0000000000..9acf7a5c23
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/ScenePreprocessor/Executor.cpp
@@ -0,0 +1,65 @@
+#include "Executor.hpp"
+
+#include <Gfx/GfxApplicationPlugin.hpp>
+#include <Gfx/GfxContext.hpp>
+#include <Gfx/GfxExecNode.hpp>
+#include <Gfx/Graph/ScenePreprocessorNode.hpp>
+#include <Process/Dataflow/Port.hpp>
+#include <Process/ExecutionContext.hpp>
+#include <Threedim/ScenePreprocessor/Process.hpp>
+
+#include <ossia/dataflow/port.hpp>
+
+#include <score/document/DocumentContext.hpp>
+
+namespace Gfx::ScenePreprocessor
+{
+class scene_preprocessor_exec_node final : public gfx_exec_node
+{
+public:
+  scene_preprocessor_exec_node(GfxExecutionAction& ctx)
+      : gfx_exec_node{ctx}
+  {
+  }
+
+  void init()
+  {
+    auto node = std::make_unique<score::gfx::ScenePreprocessorNode>();
+    id = exec_context->ui->register_node(std::move(node));
+  }
+
+  ~scene_preprocessor_exec_node() { exec_context->ui->unregister_node(id); }
+
+  std::string label() const noexcept override { return "Gfx::ScenePreprocessor_node"; }
+};
+
+ProcessExecutorComponent::ProcessExecutorComponent(
+    Gfx::ScenePreprocessor::Model& element,
+    const Execution::Context& ctx,
+    QObject* parent)
+    : ProcessComponent_T{element, ctx, "scenePreprocessorComponent", parent}
+{
+  auto n = ossia::make_node<scene_preprocessor_exec_node>(
+      *ctx.execState, ctx.doc.plugin<DocumentPlugin>().exec);
+
+  // Port 0: Scene input
+  n->add_geometry();
+  // Single Geometry outlet — material-texture arrays (base_color,
+  // metal_rough, normal, emissive) and the skybox ride along as
+  // auxiliary_texture entries on the emitted geometry; scene-wide
+  // UBOs/SSBOs (camera, env, scene_lights/materials, per_draws,
+  // indirect, scene_counts) ride along as auxiliary_buffer entries.
+  // Consumer shaders bind everything by name.
+  n->add_geometry_out();
+
+  n->init();
+
+  this->node = n;
+  m_ossia_process = std::make_shared<ossia::node_process>(n);
+}
+
+void ProcessExecutorComponent::cleanup()
+{
+  ProcessComponent_T::cleanup();
+}
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/ScenePreprocessor/Executor.hpp b/src/plugins/score-plugin-threedim/Threedim/ScenePreprocessor/Executor.hpp
new file mode 100644
index 0000000000..449087e5b1
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/ScenePreprocessor/Executor.hpp
@@ -0,0 +1,24 @@
+#pragma once
+#include <Process/Execution/ProcessComponent.hpp>
+
+#include <ossia/dataflow/node_process.hpp>
+
+namespace Gfx::ScenePreprocessor
+{
+class Model;
+class ProcessExecutorComponent final
+    : public Execution::
+          ProcessComponent_T<Gfx::ScenePreprocessor::Model, ossia::node_process>
+{
+  COMPONENT_METADATA("d7e2f8b4-9a3c-4e1b-8f6d-0c5a2b7e9f1d")
+public:
+  ProcessExecutorComponent(
+      Model& element,
+      const Execution::Context& ctx,
+      QObject* parent);
+  void cleanup() override;
+};
+
+using ProcessExecutorComponentFactory
+    = Execution::ProcessComponentFactory_T<ProcessExecutorComponent>;
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/ScenePreprocessor/Metadata.hpp b/src/plugins/score-plugin-threedim/Threedim/ScenePreprocessor/Metadata.hpp
new file mode 100644
index 0000000000..422142fe8f
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/ScenePreprocessor/Metadata.hpp
@@ -0,0 +1,22 @@
+#pragma once
+#include <Process/ProcessMetadata.hpp>
+
+namespace Gfx::ScenePreprocessor
+{
+class Model;
+}
+
+PROCESS_METADATA(
+    , Gfx::ScenePreprocessor::Model, "a8f2c6d0-1b4e-4c7a-9d3f-5e8b2c1a7f0d",
+    "scenepreprocessor",                             // Internal name
+    "Scene Preprocessor",                            // Pretty name
+    Process::ProcessCategory::Visual,             // Category
+    "Visuals/3D/Scene",                           // Category
+    "Flattens a scene_spec hierarchy into a GPU-resident geometry_spec", // Description
+    "ossia team",                                 // Author
+    (QStringList{"gfx", "scene", "geometry", "3d"}), // Tags
+    {},                                           // Inputs
+    {},                                           // Outputs
+    QUrl{},                                       // Doc url
+    Process::ProcessFlags::SupportsAll            // Flags
+)
diff --git a/src/plugins/score-plugin-threedim/Threedim/ScenePreprocessor/Process.cpp b/src/plugins/score-plugin-threedim/Threedim/ScenePreprocessor/Process.cpp
new file mode 100644
index 0000000000..804b887a5a
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/ScenePreprocessor/Process.cpp
@@ -0,0 +1,83 @@
+#include "Process.hpp"
+
+#include <score/application/ApplicationComponents.hpp>
+
+#include <Process/Dataflow/Port.hpp>
+#include <Process/Dataflow/WidgetInlets.hpp>
+
+#include <Gfx/Graph/Node.hpp>
+#include <Gfx/TexturePort.hpp>
+
+#include <wobjectimpl.h>
+
+W_OBJECT_IMPL(Gfx::ScenePreprocessor::Model)
+namespace Gfx::ScenePreprocessor
+{
+
+Model::Model(
+    const TimeVal& duration, const Id<Process::ProcessModel>& id, QObject* parent)
+    : Process::ProcessModel{duration, id, "gfxProcess", parent}
+{
+  metadata().setInstanceName(*this);
+  init();
+}
+
+Model::~Model() = default;
+
+void Model::init()
+{
+  if(m_inlets.empty() && m_outlets.empty())
+  {
+    m_inlets.push_back(new GeometryInlet{"Scene In", Id<Process::Port>(0), this});
+    // Single Geometry Out — all material-texture arrays (base_color,
+    // metal_rough, normal, emissive), camera / env / scene UBOs and the
+    // environment skybox ride along as auxiliary_buffer / auxiliary_texture
+    // entries on the emitted geometry. Consumer shaders auto-resolve them
+    // by name via try_bind_from_geometry / try_bind_texture_from_geometry;
+    // no manual cable needed.
+    m_outlets.push_back(new GeometryOutlet{"Geometry Out", Id<Process::Port>(0), this});
+  }
+}
+
+QString Model::prettyName() const noexcept
+{
+  return tr("Scene Preprocessor");
+}
+
+}
+
+template <>
+void DataStreamReader::read(const Gfx::ScenePreprocessor::Model& proc)
+{
+  readPorts(*this, proc.m_inlets, proc.m_outlets);
+  insertDelimiter();
+}
+
+template <>
+void DataStreamWriter::write(Gfx::ScenePreprocessor::Model& proc)
+{
+  writePorts(
+      *this,
+      components.interfaces<Process::PortFactoryList>(),
+      proc.m_inlets,
+      proc.m_outlets,
+      &proc);
+  checkDelimiter();
+}
+
+template <>
+void JSONReader::read(const Gfx::ScenePreprocessor::Model& proc)
+{
+  readPorts(*this, proc.m_inlets, proc.m_outlets);
+}
+
+template <>
+void JSONWriter::write(Gfx::ScenePreprocessor::Model& proc)
+{
+  writePorts(
+      *this,
+      components.interfaces<Process::PortFactoryList>(),
+      proc.m_inlets,
+      proc.m_outlets,
+      &proc);
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/ScenePreprocessor/Process.hpp b/src/plugins/score-plugin-threedim/Threedim/ScenePreprocessor/Process.hpp
new file mode 100644
index 0000000000..0cf96bf394
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/ScenePreprocessor/Process.hpp
@@ -0,0 +1,37 @@
+#pragma once
+#include <Gfx/CommandFactory.hpp>
+#include <Threedim/ScenePreprocessor/Metadata.hpp>
+#include <Process/GenericProcessFactory.hpp>
+#include <Process/Process.hpp>
+
+namespace Gfx::ScenePreprocessor
+{
+class Model final : public Process::ProcessModel
+{
+  SCORE_SERIALIZE_FRIENDS
+  PROCESS_METADATA_IMPL(Gfx::ScenePreprocessor::Model)
+  W_OBJECT(Model)
+
+public:
+  Model(
+      const TimeVal& duration,
+      const Id<Process::ProcessModel>& id,
+      QObject* parent);
+
+  template <typename Impl>
+  Model(Impl& vis, QObject* parent)
+      : Process::ProcessModel{vis, parent}
+  {
+    vis.writeTo(*this);
+    init();
+  }
+
+  ~Model() override;
+
+private:
+  void init();
+  QString prettyName() const noexcept override;
+};
+
+using ProcessFactory = Process::ProcessFactory_T<Gfx::ScenePreprocessor::Model>;
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneResourceRoute.cpp b/src/plugins/score-plugin-threedim/Threedim/SceneResourceRoute.cpp
new file mode 100644
index 0000000000..44894ff71b
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/SceneResourceRoute.cpp
@@ -0,0 +1,68 @@
+#include "SceneResourceRoute.hpp"
+
+namespace Threedim
+{
+
+void SceneResourceRoute::rebuild()
+{
+  if(!m_state)
+    m_state = std::make_shared<ossia::scene_state>();
+
+  // Reset the environment / shadow bits we own. Partial-producer
+  // contract: this node contributes exactly one field; everything else
+  // stays at defaults and gets filtered out by merge_scenes' per-field
+  // overlay (it only picks up texture handles with non-null
+  // native_handle, and params_set bits we don't light — we don't).
+  m_state->environment = {};
+  m_state->shadow_cascades = {};
+
+  void* handle = inputs.texture.texture.handle;
+  if(handle)
+  {
+    switch(inputs.target.value)
+    {
+      case SceneResourceTarget::Skybox:
+        m_state->environment.skybox_texture.native_handle = handle;
+        break;
+      case SceneResourceTarget::IrradianceMap:
+        m_state->environment.irradiance_map.native_handle = handle;
+        break;
+      case SceneResourceTarget::PrefilteredMap:
+        m_state->environment.prefiltered_map.native_handle = handle;
+        break;
+      case SceneResourceTarget::BRDFLut:
+        m_state->environment.brdf_lut.native_handle = handle;
+        break;
+      case SceneResourceTarget::ShadowMapArray:
+        m_state->shadow_cascades.shadow_map_array.native_handle = handle;
+        break;
+    }
+  }
+
+  m_state->version = ++m_version;
+  m_state->dirty_index = m_version;
+
+  m_cached_handle = handle;
+  m_cached_target = inputs.target.value;
+  m_pending_dirty = 0xFF;
+}
+
+void SceneResourceRoute::operator()()
+{
+  // The halp GPU-texture input doesn't fire a port-update event on
+  // native-handle swap (only on port re-wiring), so we poll here and
+  // rebuild when either the handle or the target changed. Stable
+  // scene_state identity means the no-change case is a cheap
+  // shared_ptr forward without re-allocating.
+  void* handle = inputs.texture.texture.handle;
+  const bool changed = !m_state || handle != m_cached_handle
+                       || inputs.target.value != m_cached_target;
+  if(changed)
+    rebuild();
+
+  outputs.scene_out.scene.state = m_state;
+  outputs.scene_out.dirty = m_pending_dirty;
+  m_pending_dirty = 0;
+}
+
+} // namespace Threedim
diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneResourceRoute.hpp b/src/plugins/score-plugin-threedim/Threedim/SceneResourceRoute.hpp
new file mode 100644
index 0000000000..a247397199
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/SceneResourceRoute.hpp
@@ -0,0 +1,93 @@
+#pragma once
+#include <halp/controls.hpp>
+#include <halp/meta.hpp>
+#include <halp/texture.hpp>
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <cstdint>
+#include <memory>
+
+namespace Threedim
+{
+
+// Level 1 of the "resource → scene field" routing design. Takes a GPU
+// texture handle from any upstream producer (CSF shader output, video,
+// ISF post-pass, asset loader, …) and stamps it onto a named field of
+// `scene_spec`. The emitted scene_spec is a partial contribution with
+// only that one field populated — `merge_scenes` overlays it onto the
+// rest of the scene_state the ScenePreprocessor receives from other
+// producers, so this node composes freely with EnvironmentLoader /
+// CubemapLoader / further SceneResourceRoute instances.
+//
+// Core use case is IBL wiring: an IrradianceConvolve / PrefilterGGX /
+// BrdfLut shader's output plugs in here and lands on
+// `scene_environment.{irradiance_map, prefiltered_map, brdf_lut}` with
+// zero bespoke glue code per target. Shadow-map generation passes will
+// target `scene_state.shadow_cascades.shadow_map_array` the same way.
+//
+// Pattern mirrors CubemapComposer / InjectTexture: CPU-side producer,
+// port-driven rebuild + handle-change detection in operator()().
+enum class SceneResourceTarget : int
+{
+  Skybox,           // scene_environment.skybox_texture
+  IrradianceMap,    // scene_environment.irradiance_map
+  PrefilteredMap,   // scene_environment.prefiltered_map
+  BRDFLut,          // scene_environment.brdf_lut
+  ShadowMapArray,   // scene_state.shadow_cascades.shadow_map_array
+};
+
+class SceneResourceRoute
+{
+public:
+  halp_meta(name, "Scene Resource Route")
+  halp_meta(category, "Visuals/3D/Scene")
+  halp_meta(c_name, "scene_resource_route")
+  halp_meta(authors, "ossia team")
+  halp_meta(
+      manual_url,
+      "https://ossia.io/score-docs/processes/scene-resource-route.html")
+  halp_meta(uuid, "c2f7a341-8e69-4b0d-b3f8-2d7e4c5a9f1b")
+
+  struct ins
+  {
+    // Accepts any GPU texture kind — 2D, cubemap, array. Downstream
+    // consumer shaders (classic_pbr_ibl, classic_pbr_shadowed) declare
+    // their own sampler shape (samplerCube / sampler2DArray / sampler2D)
+    // and it's the authoring's responsibility to match the two.
+    halp::gpu_texture_input<"Texture"> texture;
+
+    // Port-driven rebuild: target changes fire rebuild(); upstream
+    // handle flips are caught by operator()() since the halp GPU-texture
+    // input doesn't emit a port-update event when only the native
+    // handle swaps.
+    struct : halp::enum_t<SceneResourceTarget, "Target Field">
+    {
+      void update(SceneResourceRoute& n) { n.rebuild(); }
+    } target;
+  } inputs;
+
+  struct outs
+  {
+    struct
+    {
+      halp_meta(name, "Scene");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_out;
+  } outputs;
+
+  void rebuild();
+  void operator()();
+
+  // Cached output scene_state — stable identity across frames (so
+  // downstream scene-identity caches stay hot) and mutated in place on
+  // target / handle changes.
+  std::shared_ptr<ossia::scene_state> m_state;
+  int64_t m_version{0};
+  void* m_cached_handle{};
+  SceneResourceTarget m_cached_target{SceneResourceTarget::Skybox};
+  uint8_t m_pending_dirty{0xFF};
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneSelector.cpp b/src/plugins/score-plugin-threedim/Threedim/SceneSelector.cpp
new file mode 100644
index 0000000000..b300f71063
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/SceneSelector.cpp
@@ -0,0 +1,206 @@
+#include "SceneSelector.hpp"
+
+#include <algorithm>
+
+namespace Threedim
+{
+
+namespace
+{
+
+// Duplicated glob matcher; tiny, cheaper than adding a shared header.
+bool selector_glob_match(std::string_view pattern, std::string_view text) noexcept
+{
+  std::size_t pi = 0, ti = 0;
+  std::size_t star_pi = std::string_view::npos;
+  std::size_t star_ti = 0;
+  bool star_double = false;
+  while(ti < text.size())
+  {
+    if(pi < pattern.size())
+    {
+      char pc = pattern[pi];
+      if(pc == '*')
+      {
+        star_double = (pi + 1 < pattern.size() && pattern[pi + 1] == '*');
+        pi += star_double ? 2 : 1;
+        star_pi = pi;
+        star_ti = ti;
+        continue;
+      }
+      if(pc == '?' && text[ti] != '/')
+      {
+        ++pi;
+        ++ti;
+        continue;
+      }
+      if(pc == text[ti])
+      {
+        ++pi;
+        ++ti;
+        continue;
+      }
+    }
+    if(star_pi != std::string_view::npos)
+    {
+      if(!star_double && text[star_ti] == '/')
+        return false;
+      pi = star_pi;
+      ++star_ti;
+      ti = star_ti;
+      continue;
+    }
+    return false;
+  }
+  while(pi < pattern.size() && pattern[pi] == '*')
+    ++pi;
+  return pi == pattern.size();
+}
+
+// DFS until the first match. Accumulates the found-node plus a hint
+// whether the found node itself is the root of the subtree (so we
+// know whether to apply the ZeroOut transform rebase).
+ossia::scene_node_ptr selector_findByPath(
+    const ossia::scene_node_ptr& n, std::string_view pat, const std::string& path)
+{
+  if(!n)
+    return nullptr;
+  if(selector_glob_match(pat, path))
+    return n;
+  if(!n->has_children())
+    return nullptr;
+  for(const auto& p : *n->children)
+  {
+    if(auto* sub = ossia::get_if<ossia::scene_node_ptr>(&p))
+    {
+      if(!*sub)
+        continue;
+      std::string childPath = path + '/' + (*sub)->name;
+      if(auto r = selector_findByPath(*sub, pat, childPath))
+        return r;
+    }
+  }
+  return nullptr;
+}
+
+ossia::scene_node_ptr
+findByName(const ossia::scene_node_ptr& n, std::string_view wanted)
+{
+  if(!n)
+    return nullptr;
+  if(n->name == wanted)
+    return n;
+  if(!n->has_children())
+    return nullptr;
+  for(const auto& p : *n->children)
+  {
+    if(auto* sub = ossia::get_if<ossia::scene_node_ptr>(&p))
+      if(auto r = findByName(*sub, wanted))
+        return r;
+  }
+  return nullptr;
+}
+
+// Strip the first scene_transform payload from a scene_node's children
+// list — used for the ZeroOut rebase mode. The node layout convention
+// (GltfParser / FbxParser / ConfigurePrimitive / etc.) puts the TRS
+// as the first child payload; dropping it leaves the subtree at the
+// world origin.
+ossia::scene_node_ptr stripLeadingTransform(const ossia::scene_node_ptr& n)
+{
+  if(!n || !n->has_children())
+    return n;
+  if(n->children->empty())
+    return n;
+  if(!ossia::get_if<ossia::scene_transform>(&(*n->children)[0]))
+    return n;
+
+  auto clone_children
+      = std::make_shared<std::vector<ossia::scene_payload>>(
+          n->children->begin() + 1, n->children->end());
+  auto copy = std::make_shared<ossia::scene_node>(*n);
+  copy->children = std::move(clone_children);
+  copy->dirty_index = n->dirty_index + 1;
+  return copy;
+}
+
+} // namespace
+
+void SceneSelector::rebuild()
+{
+  const auto& in = inputs.scene_in.scene;
+  if(!in.state)
+  {
+    m_cached_out.reset();
+    m_pending_dirty = 0;
+    return;
+  }
+
+  const auto* s = in.state.get();
+  const int64_t v = in.state->version;
+
+  ossia::scene_node_ptr found;
+  const auto mode = Mode(inputs.mode.value);
+  if(in.state->roots)
+  {
+    switch(mode)
+    {
+      case ByIndex: {
+        const auto idx = std::size_t(std::max(0, inputs.index.value));
+        if(idx < in.state->roots->size())
+          found = (*in.state->roots)[idx];
+        break;
+      }
+      case ByName: {
+        for(const auto& r : *in.state->roots)
+        {
+          if((found = findByName(r, inputs.path.value)))
+            break;
+        }
+        break;
+      }
+      default: {
+        for(const auto& r : *in.state->roots)
+        {
+          const std::string base = r ? ("/" + r->name) : std::string{};
+          if((found = selector_findByPath(r, inputs.path.value, base)))
+            break;
+        }
+        break;
+      }
+    }
+  }
+
+  if(found && inputs.rebase.value == ZeroOut)
+    found = stripLeadingTransform(found);
+
+  auto new_roots
+      = std::make_shared<std::vector<ossia::scene_node_ptr>>();
+  if(found)
+    new_roots->push_back(std::move(found));
+
+  auto new_state = std::make_shared<ossia::scene_state>(*in.state);
+  new_state->roots = std::move(new_roots);
+  new_state->version = ++m_version_counter;
+  new_state->dirty_index = in.state->dirty_index + 1;
+
+  m_cached_out = std::move(new_state);
+  m_cached_in_state = s;
+  m_cached_in_version = v;
+  m_pending_dirty = 0xFF;
+}
+
+void SceneSelector::operator()()
+{
+  const auto* in_state = inputs.scene_in.scene.state.get();
+  const int64_t in_version = in_state ? in_state->version : -1;
+  const bool upstream_changed
+      = m_cached_in_state != in_state || m_cached_in_version != in_version;
+  if(upstream_changed || (!m_cached_out && in_state))
+    rebuild();
+  outputs.scene_out.scene.state = m_cached_out;
+  outputs.scene_out.dirty = m_pending_dirty;
+  m_pending_dirty = 0;
+}
+
+} // namespace Threedim
diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneSelector.hpp b/src/plugins/score-plugin-threedim/Threedim/SceneSelector.hpp
new file mode 100644
index 0000000000..785ae9d3a6
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/SceneSelector.hpp
@@ -0,0 +1,114 @@
+#pragma once
+#include <halp/controls.hpp>
+#include <halp/meta.hpp>
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+namespace Threedim
+{
+
+// Extracts a subtree from an incoming scene and emits it as a fresh
+// scene_spec. The Solaris "Extract" pattern: if SceneGraphFilter is
+// Prune (keeps the tree shape, drops non-matches), SceneSelector is
+// Extract (gathers the matches and forgets the ancestors).
+//
+// Use case: pull out the camera, a light rig, or a character subtree
+// so it can be re-transformed / re-materialized and then merged back
+// in via SceneGroup.
+//
+// Rebase modes:
+//   Preserve       : emit the subtree root as-is, so its transform
+//                    remains in its original parent frame (the
+//                    ancestors are gone but the transform still
+//                    matches where it was).
+//   ZeroOut        : drop the subtree's own TRS so it renders at the
+//                    world origin. Useful when you want to re-place
+//                    the extracted subtree via an upstream
+//                    Transform3D / SceneGroup.
+class SceneSelector
+{
+public:
+  halp_meta(name, "Scene Selector")
+  halp_meta(category, "Visuals/3D/Scene")
+  halp_meta(c_name, "scene_selector")
+  halp_meta(authors, "ossia team")
+  halp_meta(
+      manual_url,
+      "https://ossia.io/score-docs/processes/scene-selector.html")
+  halp_meta(uuid, "6c4d8b3f-5e2a-4d1f-9c7b-8a3e5f0d7b4c")
+
+  enum Mode
+  {
+    ByPath,
+    ByName,
+    ByIndex       // index into the root list (0 = first root)
+  };
+
+  enum Rebase
+  {
+    Preserve,
+    ZeroOut
+  };
+
+  struct ins
+  {
+    struct
+    {
+      halp_meta(name, "Scene In");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_in;
+
+    // Port-driven rebuild: controls trigger rebuild(); upstream
+    // scene_in changes detected in operator()().
+    struct : halp::combobox_t<"Mode", Mode>
+    {
+      struct range
+      {
+        std::string_view values[3]{"By path", "By name", "By index"};
+        int init{0};
+      };
+      void update(SceneSelector& n) { n.rebuild(); }
+    } mode;
+
+    struct : halp::lineedit<"Path / Name", "">
+    { void update(SceneSelector& n) { n.rebuild(); } } path;
+    struct : halp::spinbox_i32<"Index", halp::irange{0, 1024, 0}>
+    { void update(SceneSelector& n) { n.rebuild(); } } index;
+
+    struct : halp::combobox_t<"Rebase", Rebase>
+    {
+      struct range
+      {
+        std::string_view values[2]{"Preserve transform", "Zero out"};
+        int init{0};
+      };
+      void update(SceneSelector& n) { n.rebuild(); }
+    } rebase;
+  } inputs;
+
+  struct outs
+  {
+    struct
+    {
+      halp_meta(name, "Scene Out");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_out;
+  } outputs;
+
+  void rebuild();
+  void operator()();
+
+  std::shared_ptr<const ossia::scene_state> m_cached_out;
+  uint8_t m_pending_dirty{0xFF};
+  const ossia::scene_state* m_cached_in_state{};
+  int64_t m_cached_in_version{-1};
+  int64_t m_version_counter{0};
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneSwitch.hpp b/src/plugins/score-plugin-threedim/Threedim/SceneSwitch.hpp
new file mode 100644
index 0000000000..c56c25710b
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/SceneSwitch.hpp
@@ -0,0 +1,110 @@
+#pragma once
+#include <halp/controls.hpp>
+#include <halp/meta.hpp>
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <cstdint>
+
+namespace Threedim
+{
+
+// N-way scene_spec switch. Pick one of up to 4 scene inputs to pass
+// through by index; live VJ-style A/B/C/D scene cutting.
+//
+// Unwired inputs are skipped — if `index` points at an empty slot, the
+// node emits an empty scene, which downstream treats as "nothing to
+// render" (no error). This makes it safe to leave slots open during
+// authoring and fill them in incrementally.
+//
+// For blending between scenes: don't do it at the scene-graph level.
+// Render each scene to its own texture (ScenePreprocessor → classic_pbr
+// → BackgroundNode with a texture output) and ISF-crossfade the
+// textures. Scene-level blending has no meaningful semantics for
+// arbitrarily-different scene trees.
+class SceneSwitch
+{
+public:
+  halp_meta(name, "Scene Switch")
+  halp_meta(category, "Visuals/3D/Scene")
+  halp_meta(c_name, "scene_switch")
+  halp_meta(authors, "ossia team")
+  halp_meta(
+      manual_url,
+      "https://ossia.io/score-docs/processes/scene-switch.html")
+  halp_meta(uuid, "7d5c3f8a-2e9b-4a1c-8f6d-5b3e0d9a7c4f")
+
+  struct ins
+  {
+    struct
+    {
+      halp_meta(name, "Scene 0");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene0;
+    struct
+    {
+      halp_meta(name, "Scene 1");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene1;
+    struct
+    {
+      halp_meta(name, "Scene 2");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene2;
+    struct
+    {
+      halp_meta(name, "Scene 3");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene3;
+
+    halp::spinbox_i32<"Index", halp::irange{0, 3, 0}> index;
+  } inputs;
+
+  // Cache for upstream-change detection (mirrors CameraSwitch.Select).
+  const ossia::scene_state* m_cached_state{};
+  int64_t m_cached_version{-1};
+  int m_cached_index{-1};
+
+  struct outs
+  {
+    struct
+    {
+      halp_meta(name, "Scene Out");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_out;
+  } outputs;
+
+  void operator()()
+  {
+    const int idx = inputs.index.value;
+    const ossia::scene_spec* picked = nullptr;
+    switch(idx)
+    {
+      case 0: picked = &inputs.scene0.scene; break;
+      case 1: picked = &inputs.scene1.scene; break;
+      case 2: picked = &inputs.scene2.scene; break;
+      case 3: picked = &inputs.scene3.scene; break;
+      default: picked = &inputs.scene0.scene; break;
+    }
+    outputs.scene_out.scene = *picked;
+
+    // Dirty flag drives downstream re-evaluation. Raise it only on
+    // real change: index switch, picked-input pointer change, or
+    // picked-input version bump. Empty slots stay quiet.
+    const auto* s = picked->state.get();
+    const int64_t v = s ? s->version : -1;
+    const bool changed = (idx != m_cached_index) || (s != m_cached_state)
+                         || (v != m_cached_version);
+    outputs.scene_out.dirty = (s && changed) ? 0xFF : 0;
+    m_cached_index = idx;
+    m_cached_state = s;
+    m_cached_version = v;
+  }
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/ShadowCascadeSetup.cpp b/src/plugins/score-plugin-threedim/Threedim/ShadowCascadeSetup.cpp
new file mode 100644
index 0000000000..8dcf1403d9
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/ShadowCascadeSetup.cpp
@@ -0,0 +1,337 @@
+#include "ShadowCascadeSetup.hpp"
+
+#include <QMatrix4x4>
+#include <QQuaternion>
+#include <QVector3D>
+
+#include <cmath>
+
+#include <algorithm>
+#include <cstring>
+
+namespace Threedim
+{
+
+namespace
+{
+
+// Compute one cascade's orthographic light view_projection matrix such
+// that every corner of the camera-frustum slice between `near` and
+// `far` maps inside the unit cube [-1, 1]³ after the light transform.
+//
+// Steps:
+//   1. Build 8 frustum-slice world-space corners from the camera
+//      view_proj inverse + near/far clip-space Zs.
+//   2. Transform them into light view-space (camera facing -Z along
+//      `lightDir`, up arbitrary-but-orthogonal).
+//   3. Axis-aligned-bounding-box → light-space ortho projection.
+QMatrix4x4 cascadeLightVP(
+    const QMatrix4x4& cameraVPInv, float nearZ, float farZ,
+    const QVector3D& lightDir)
+{
+  // Frustum corner coords in NDC. Using OpenGL-ish [-1, 1] — the host
+  // clip-space correction matrix handles the Vulkan flip downstream.
+  QVector3D corners[8] = {
+      QVector3D(-1.f, -1.f, nearZ), QVector3D( 1.f, -1.f, nearZ),
+      QVector3D(-1.f,  1.f, nearZ), QVector3D( 1.f,  1.f, nearZ),
+      QVector3D(-1.f, -1.f, farZ),  QVector3D( 1.f, -1.f, farZ),
+      QVector3D(-1.f,  1.f, farZ),  QVector3D( 1.f,  1.f, farZ)};
+
+  QVector3D world_corners[8];
+  QVector3D centroid(0, 0, 0);
+  for(int i = 0; i < 8; ++i)
+  {
+    // Unproject to world.
+    QVector4D clip(corners[i], 1.f);
+    QVector4D w = cameraVPInv * clip;
+    world_corners[i] = w.toVector3D() / w.w();
+    centroid += world_corners[i];
+  }
+  centroid /= 8.f;
+
+  // Light view: looking along lightDir, centered at the slice centroid.
+  QVector3D up(0, 1, 0);
+  if(std::abs(QVector3D::dotProduct(lightDir.normalized(), up)) > 0.95f)
+    up = QVector3D(1, 0, 0);
+  QMatrix4x4 lightView;
+  lightView.lookAt(centroid - lightDir.normalized() * 1.f, centroid, up);
+
+  // Compute AABB of slice corners in light-view space.
+  QVector3D minLS(std::numeric_limits<float>::max(),
+                  std::numeric_limits<float>::max(),
+                  std::numeric_limits<float>::max());
+  QVector3D maxLS = -minLS;
+  for(int i = 0; i < 8; ++i)
+  {
+    QVector3D ls = lightView.map(world_corners[i]);
+    minLS.setX(std::min(minLS.x(), ls.x()));
+    minLS.setY(std::min(minLS.y(), ls.y()));
+    minLS.setZ(std::min(minLS.z(), ls.z()));
+    maxLS.setX(std::max(maxLS.x(), ls.x()));
+    maxLS.setY(std::max(maxLS.y(), ls.y()));
+    maxLS.setZ(std::max(maxLS.z(), ls.z()));
+  }
+  // Expand the depth range a bit so occluders just outside the camera
+  // frustum can still cast shadows into it.
+  const float zPad = (maxLS.z() - minLS.z()) * 0.25f + 1.f;
+  minLS.setZ(minLS.z() - zPad);
+
+  QMatrix4x4 lightProj;
+  lightProj.ortho(
+      minLS.x(), maxLS.x(), minLS.y(), maxLS.y(),
+      -maxLS.z(), -minLS.z());
+
+  return lightProj * lightView;
+}
+
+// Resolve the first directional light's world direction from the scene
+// tree. Recurses through scene_nodes, accumulating parent TRS, and
+// matches any light_component whose type == directional — regardless of
+// which source node emitted it. Returns false when no directional light
+// is found.
+bool findDirectionalLight(
+    const ossia::scene_node& n, const QMatrix4x4& parentWorld,
+    QVector3D& outDir) noexcept
+{
+  QMatrix4x4 local;
+  if(n.children)
+  {
+    for(const auto& p : *n.children)
+    {
+      if(auto* xf = ossia::get_if<ossia::scene_transform>(&p))
+      {
+        local.translate(xf->translation[0], xf->translation[1], xf->translation[2]);
+        local.rotate(QQuaternion(
+            xf->rotation[3], xf->rotation[0], xf->rotation[1], xf->rotation[2]));
+        local.scale(xf->scale[0], xf->scale[1], xf->scale[2]);
+        break;
+      }
+    }
+  }
+  const QMatrix4x4 world = parentWorld * local;
+  if(n.children)
+  {
+    for(const auto& p : *n.children)
+    {
+      if(auto* lc = ossia::get_if<ossia::light_component_ptr>(&p))
+      {
+        if(*lc && (*lc)->type == ossia::light_type::directional)
+        {
+          // Directional light convention (the Light node encodes the
+          // user's direction as a rotation of canonical local -Z via
+          // QQuaternion::rotationTo, so local -Z points along the
+          // configured direction). World direction is therefore the
+          // -Z column of the world matrix.
+          QVector3D nZ = world.mapVector(QVector3D(0, 0, -1));
+          if(nZ.lengthSquared() > 1e-5f)
+          {
+            outDir = nZ.normalized();
+            return true;
+          }
+        }
+      }
+      if(auto* sub = ossia::get_if<ossia::scene_node_ptr>(&p))
+        if(*sub && findDirectionalLight(**sub, world, outDir))
+          return true;
+    }
+  }
+  return false;
+}
+
+// Resolve the active camera's view + projection matrices from the scene
+// tree. Walks the same way as findDirectionalLight: per-node TRS
+// accumulation into a world matrix, then on hitting a camera_component
+// we invert the world matrix to obtain the view. Matching policy:
+//   - if `state.active_camera_id` is non-zero, only the scene_node whose
+//     id equals it is accepted;
+//   - otherwise the first camera encountered wins (matches the "single
+//     Camera node is auto-picked" convention from Camera.hpp).
+bool findActiveCamera(
+    const ossia::scene_node& n, const QMatrix4x4& parentWorld,
+    const ossia::scene_state& state, float aspect,
+    QMatrix4x4& outView, QMatrix4x4& outProj) noexcept
+{
+  QMatrix4x4 local;
+  if(n.children)
+  {
+    for(const auto& p : *n.children)
+    {
+      if(auto* xf = ossia::get_if<ossia::scene_transform>(&p))
+      {
+        local.translate(xf->translation[0], xf->translation[1], xf->translation[2]);
+        local.rotate(QQuaternion(
+            xf->rotation[3], xf->rotation[0], xf->rotation[1], xf->rotation[2]));
+        local.scale(xf->scale[0], xf->scale[1], xf->scale[2]);
+        break;
+      }
+    }
+  }
+  const QMatrix4x4 world = parentWorld * local;
+  const bool id_filter = state.active_camera_id.value != 0;
+  const bool id_matches = !id_filter || n.id == state.active_camera_id;
+  if(n.children)
+  {
+    for(const auto& p : *n.children)
+    {
+      if(id_matches)
+      {
+        if(auto* cc = ossia::get_if<ossia::camera_component_ptr>(&p))
+        {
+          if(*cc)
+          {
+            const auto& cam = **cc;
+            outView = world.inverted();
+            outProj = QMatrix4x4{};
+            outProj.perspective(
+                cam.yfov * 180.f / float(M_PI), aspect, cam.znear, cam.zfar);
+            return true;
+          }
+        }
+      }
+      if(auto* sub = ossia::get_if<ossia::scene_node_ptr>(&p))
+        if(*sub && findActiveCamera(**sub, world, state, aspect, outView, outProj))
+          return true;
+    }
+  }
+  return false;
+}
+
+} // namespace
+
+void ShadowCascadeSetup::rebuild()
+{
+  const auto& in = inputs.scene_in.scene;
+  const ossia::scene_state* in_state = in.state.get();
+  const int64_t in_version = in_state ? in_state->version : -1;
+
+  const int count = std::clamp(inputs.cascade_count.value, 1, 8);
+  const float cur_dir[3]{
+      inputs.light_direction.value.x, inputs.light_direction.value.y,
+      inputs.light_direction.value.z};
+
+  m_cached_in_state = in_state;
+  m_cached_in_version = in_version;
+  m_cached_count = count;
+  m_cached_distance = inputs.shadow_distance.value;
+  m_cached_lambda = inputs.lambda.value;
+  m_cached_near = inputs.camera_near.value;
+  m_cached_far = inputs.camera_far.value;
+  std::copy(cur_dir, cur_dir + 3, m_cached_dir);
+
+  if(!in_state)
+  {
+    m_cached_out = in.state;
+    m_pending_dirty = 0xFF;
+    return;
+  }
+
+  // Gather inputs for cascade computation.
+  const float nearZ = inputs.camera_near.value;
+  const float farZ = std::min(inputs.camera_far.value, inputs.shadow_distance.value);
+  const float lambda = std::clamp(inputs.lambda.value, 0.f, 1.f);
+
+  // Scene-derived light direction if the control is left at (0,0,0).
+  QVector3D lightDir(cur_dir[0], cur_dir[1], cur_dir[2]);
+  if(lightDir.lengthSquared() < 1e-6f)
+  {
+    lightDir = QVector3D(-0.4f, -0.8f, -0.6f);
+    if(in_state->roots)
+    {
+      for(const auto& r : *in_state->roots)
+      {
+        QVector3D found;
+        if(r && findDirectionalLight(*r, QMatrix4x4{}, found))
+        {
+          lightDir = found;
+          break;
+        }
+      }
+    }
+  }
+  lightDir.normalize();
+
+  // Find the active camera's view_projection by walking the scene tree
+  // the same way findDirectionalLight does. The camera's placement lives
+  // on its owning scene_node's scene_transform, so view = inverse(world).
+  // Fall back to identity when the scene has no camera (the cascades
+  // will be approximate but the node stays safe to wire in early).
+  //
+  // Aspect is unknown at this stage (ScenePreprocessor is the canonical
+  // source of the render-target aspect); 16:9 is a reasonable default
+  // and the cascade fit is approximate anyway.
+  QMatrix4x4 cameraVP;
+  const float aspect = 16.f / 9.f;
+  if(in_state->roots)
+  {
+    QMatrix4x4 view, proj;
+    for(const auto& r : *in_state->roots)
+    {
+      if(r && findActiveCamera(*r, QMatrix4x4{}, *in_state, aspect, view, proj))
+      {
+        cameraVP = proj * view;
+        break;
+      }
+    }
+  }
+
+  const QMatrix4x4 cameraVPInv = cameraVP.inverted();
+
+  // Practical split scheme (Engel/Tabellion).
+  ossia::shadow_cascades_info info{};
+  info.cascade_count = uint32_t(count);
+  info.shadow_distance = inputs.shadow_distance.value;
+  info.light_direction[0] = lightDir.x();
+  info.light_direction[1] = lightDir.y();
+  info.light_direction[2] = lightDir.z();
+
+  info.split_view_depths[0] = nearZ;
+  for(int i = 1; i < count; ++i)
+  {
+    const float p = float(i) / float(count);
+    const float logSplit = nearZ * std::pow(farZ / nearZ, p);
+    const float uniSplit = nearZ + (farZ - nearZ) * p;
+    info.split_view_depths[i] = lambda * logSplit + (1.f - lambda) * uniSplit;
+  }
+  info.split_view_depths[count] = farZ;
+
+  // NDC-Z range for each cascade slice. glClipSpace uses [-1, 1]; Vulkan
+  // uses [0, 1] after clipSpaceCorr — here we work in camera clip-space
+  // pre-correction, so [-1, 1] is correct.
+  for(int i = 0; i < count; ++i)
+  {
+    // Convert view-space Z to NDC Z via the projection we computed above.
+    // Re-derive via the projection: ndcZ = (proj.z * view.z + proj.w.z) /
+    // (-view.z). Easier: just probe two world-space points at known view
+    // depths through cameraVP and read their .z.
+    QVector4D p0 = cameraVP * QVector4D(0, 0, -info.split_view_depths[i], 1);
+    QVector4D p1 = cameraVP * QVector4D(0, 0, -info.split_view_depths[i + 1], 1);
+    const float ndc0 = p0.w() != 0.f ? p0.z() / p0.w() : -1.f;
+    const float ndc1 = p1.w() != 0.f ? p1.z() / p1.w() : 1.f;
+    QMatrix4x4 m = cascadeLightVP(cameraVPInv, ndc0, ndc1, lightDir);
+    std::memcpy(info.light_view_proj[i], m.constData(), sizeof(float) * 16);
+  }
+
+  // Clone scene_state with the new cascades info.
+  auto state = std::make_shared<ossia::scene_state>(*in_state);
+  state->shadow_cascades = info;
+  state->version = ++m_version_counter;
+  state->dirty_index = m_version_counter;
+
+  m_cached_out = state;
+  m_pending_dirty = 0xFF;
+}
+
+void ShadowCascadeSetup::operator()()
+{
+  const auto* in_state = inputs.scene_in.scene.state.get();
+  const int64_t in_version = in_state ? in_state->version : -1;
+  const bool upstream_changed
+      = m_cached_in_state != in_state || m_cached_in_version != in_version;
+  if(!m_cached_out || upstream_changed)
+    rebuild();
+  outputs.scene_out.scene.state = m_cached_out;
+  outputs.scene_out.dirty = m_pending_dirty;
+  m_pending_dirty = 0;
+}
+
+} // namespace Threedim
diff --git a/src/plugins/score-plugin-threedim/Threedim/ShadowCascadeSetup.hpp b/src/plugins/score-plugin-threedim/Threedim/ShadowCascadeSetup.hpp
new file mode 100644
index 0000000000..398fb30d98
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/ShadowCascadeSetup.hpp
@@ -0,0 +1,98 @@
+#pragma once
+#include <halp/controls.hpp>
+#include <halp/meta.hpp>
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <cstdint>
+#include <memory>
+
+namespace Threedim
+{
+
+// Authors a `shadow_cascades_info` for the scene from the active camera
+// frustum and a directional-light direction. Consumed by:
+//   - a depth-only shadow_cascades pass (one draw per cascade)
+//   - classic_pbr_full's PCF sampling at final shading
+//
+// Practical-split strategy: blend uniform and logarithmic splits with a
+// λ parameter (Engel / Tabellion). λ=0 → pure uniform (equal depth
+// intervals, wastes near-plane resolution), λ=1 → pure log (near-plane
+// heavy, far cascades get almost no area). λ≈0.5 is a good default for
+// interactive scenes.
+//
+// Each cascade's light view_projection fits the camera frustum slice to
+// a square orthographic light-space box centered at the slice's world-
+// space center, oriented along the light direction.
+class ShadowCascadeSetup
+{
+public:
+  halp_meta(name, "Shadow Cascade Setup")
+  halp_meta(category, "Visuals/3D/Scene")
+  halp_meta(c_name, "shadow_cascade_setup")
+  halp_meta(authors, "ossia team")
+  halp_meta(
+      manual_url,
+      "https://ossia.io/score-docs/processes/shadow-cascade-setup.html")
+  halp_meta(uuid, "7f4d8c2a-9e5b-4f6a-a3d2-1e8c6b9d7f4a")
+
+  struct ins
+  {
+    struct
+    {
+      halp_meta(name, "Scene In");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_in;
+
+    // Port-driven rebuild: controls trigger rebuild(); upstream
+    // scene_in changes detected in operator()().
+    struct : halp::spinbox_i32<"Cascade count", halp::irange{1, 8, 4}>
+    { void update(ShadowCascadeSetup& n) { n.rebuild(); } } cascade_count;
+    struct : halp::hslider_f32<"Shadow distance", halp::range{1., 10000., 100.}>
+    { void update(ShadowCascadeSetup& n) { n.rebuild(); } } shadow_distance;
+    struct : halp::hslider_f32<"Split lambda", halp::range{0., 1., 0.5}>
+    { void update(ShadowCascadeSetup& n) { n.rebuild(); } } lambda;
+    // Manual near/far override for the camera (the scene_state doesn't
+    // currently expose the active camera's near/far on an accessible
+    // path — these let the user match them). Typical defaults work for
+    // the Camera node's default near=0.1 / far=1000.
+    struct : halp::hslider_f32<"Camera near", halp::range{0.001, 10., 0.1}>
+    { void update(ShadowCascadeSetup& n) { n.rebuild(); } } camera_near;
+    struct : halp::hslider_f32<"Camera far", halp::range{1., 100000., 1000.}>
+    { void update(ShadowCascadeSetup& n) { n.rebuild(); } } camera_far;
+    // Directional-light override. Normally inherited from the first
+    // directional light in the scene, but some pipelines (e.g. a single
+    // orbiting light without a Light node) benefit from setting this
+    // directly.
+    struct : halp::xyz_spinboxes_f32<"Light direction", halp::range{-1., 1., 0.}>
+    { void update(ShadowCascadeSetup& n) { n.rebuild(); } } light_direction;
+  } inputs;
+
+  struct outs
+  {
+    struct
+    {
+      halp_meta(name, "Scene Out");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_out;
+  } outputs;
+
+  void rebuild();
+  void operator()();
+
+  std::shared_ptr<const ossia::scene_state> m_cached_out;
+  uint8_t m_pending_dirty{0xFF};
+  const ossia::scene_state* m_cached_in_state{};
+  int64_t m_cached_in_version{-1};
+  int m_cached_count{-1};
+  float m_cached_distance{-1.f};
+  float m_cached_lambda{-1.f};
+  float m_cached_near{-1.f};
+  float m_cached_far{-1.f};
+  float m_cached_dir[3]{};
+  int64_t m_version_counter{0};
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/Splat/Executor.cpp b/src/plugins/score-plugin-threedim/Threedim/Splat/Executor.cpp
deleted file mode 100644
index 6f8cee8eda..0000000000
--- a/src/plugins/score-plugin-threedim/Threedim/Splat/Executor.cpp
+++ /dev/null
@@ -1,87 +0,0 @@
-#include "Executor.hpp"
-
-#include <Process/Dataflow/Port.hpp>
-#include <Process/ExecutionContext.hpp>
-
-#include <Gfx/GfxApplicationPlugin.hpp>
-#include <Gfx/GfxContext.hpp>
-#include <Gfx/GfxExecNode.hpp>
-#include <Gfx/TexturePort.hpp>
-
-#include <score/document/DocumentContext.hpp>
-
-#include <ossia/dataflow/port.hpp>
-
-#include <Threedim/Splat/GaussianSplatNode.hpp>
-#include <Threedim/Splat/Process.hpp>
-
-namespace Gfx::Splat
-{
-class model_display_node final : public gfx_exec_node
-{
-public:
-  model_display_node(GfxExecutionAction& ctx)
-      : gfx_exec_node{ctx}
-  {
-  }
-
-  void init()
-  {
-    auto node = std::make_unique<score::gfx::GaussianSplatNode>();
-    id = exec_context->ui->register_node(std::move(node));
-  }
-
-  ~model_display_node() { exec_context->ui->unregister_node(id); }
-
-  std::string label() const noexcept override { return "Gfx::Splat_node"; }
-};
-
-ProcessExecutorComponent::ProcessExecutorComponent(
-    Gfx::Splat::Model& element, const Execution::Context& ctx, QObject* parent)
-    : ProcessComponent_T{element, ctx, "modelComponent", parent}
-{
-  auto n = ossia::make_node<model_display_node>(
-      *ctx.execState, ctx.doc.plugin<DocumentPlugin>().exec);
-
-  for(auto* outlet : element.outlets())
-  {
-    if(auto out = qobject_cast<Gfx::TextureOutlet*>(outlet))
-    {
-      out->nodeId = n->id;
-    }
-  }
-  // Buffer input (port 0)
-  element.inlets()[0]->setupExecution(*n->add_texture(), this);
-
-  // Camera controls: Position(1), Center(2), FOV(3), Near(4), Far(5)
-  for(std::size_t i = 1; i <= 9; i++)
-  {
-    auto ctrl = qobject_cast<Process::ControlInlet*>(element.inlets()[i]);
-    auto& p = n->add_control();
-    ctrl->setupExecution(*n->root_inputs().back(), this);
-    p->value = ctrl->value();
-
-    QObject::connect(
-        ctrl, &Process::ControlInlet::valueChanged, this,
-        con_unvalidated{ctx, i - 1, 0, n});
-  }
-
-  n->add_texture_out();
-
-  n->init();
-  this->node = n;
-  m_ossia_process = std::make_shared<ossia::node_process>(n);
-}
-
-void ProcessExecutorComponent::cleanup()
-{
-  for(auto* outlet : this->process().outlets())
-  {
-    if(auto out = qobject_cast<TextureOutlet*>(outlet))
-    {
-      out->nodeId = score::gfx::invalid_node_index;
-    }
-  }
-  ProcessComponent_T::cleanup();
-}
-}
diff --git a/src/plugins/score-plugin-threedim/Threedim/Splat/GaussianSplatNode.cpp b/src/plugins/score-plugin-threedim/Threedim/Splat/GaussianSplatNode.cpp
deleted file mode 100644
index c701c1037d..0000000000
--- a/src/plugins/score-plugin-threedim/Threedim/Splat/GaussianSplatNode.cpp
+++ /dev/null
@@ -1,1064 +0,0 @@
-#include "GaussianSplatNode.hpp"
-
-#include "Gfx/Graph/RhiComputeBarrier.hpp"
-
-#include <Gfx/Graph/RenderList.hpp>
-
-#include <ossia/network/value/value_conversion.hpp>
-
-#include <QDebug>
-
-#if defined(near)
-#undef near
-#undef far
-#endif
-
-namespace score::gfx
-{
-
-GaussianSplatNode::GaussianSplatNode()
-{
-  qDebug() << "[GaussianSplat] Node created";
-
-  // Input port: Raw splat buffer (256 bytes per splat)
-  auto splatBuffer = new Port{this, {}, Types::Buffer, {}};
-
-  // Output port: Rendered image
-  auto out = new Port{this, {}, Types::Image, {}};
-
-  input.push_back(splatBuffer);
-  output.push_back(out);
-
-  this->requiresDepth = false;
-}
-
-GaussianSplatNode::~GaussianSplatNode() = default;
-
-void GaussianSplatNode::process(Message&& msg)
-{
-  ProcessNode::process(msg.token);
-
-  int32_t p = 0;
-  for(const gfx_input& m : msg.input)
-  {
-    if(auto val = ossia::get_if<ossia::value>(&m))
-    {
-      switch(p)
-      {
-        case 1:
-          this->modelPosition = ossia::convert<ossia::vec3f>(*val);
-          break;
-        case 2:
-          this->modelRotation = ossia::convert<ossia::vec3f>(*val);
-          break;
-        case 3:
-          this->modelScale = ossia::convert<ossia::vec3f>(*val);
-          break;
-        case 4:
-          this->position = ossia::convert<ossia::vec3f>(*val);
-          break;
-        case 5:
-          this->center = ossia::convert<ossia::vec3f>(*val);
-          break;
-        case 6:
-          this->fov = ossia::convert<float>(*val);
-          break;
-        case 7:
-          this->near = ossia::convert<float>(*val);
-          break;
-        case 8:
-          this->far = ossia::convert<float>(*val);
-          break;
-      }
-    }
-    p++;
-  }
-  this->materialChange();
-}
-
-score::gfx::NodeRenderer*
-GaussianSplatNode::createRenderer(RenderList& r) const noexcept
-{
-  qDebug() << "[GaussianSplat] createRenderer called, splatCount=" << splatCount;
-  return new GaussianSplatRenderer{*this};
-}
-
-GaussianSplatRenderer::GaussianSplatRenderer(const GaussianSplatNode& node)
-    : GenericNodeRenderer{node}
-    , m_node{node}
-{
-  qDebug() << "[GaussianSplat] Renderer constructed";
-}
-
-GaussianSplatRenderer::~GaussianSplatRenderer() = default;
-
-// ─────────────────────────────────────────────────────────────────────────────
-// Preprocess pipeline: raw 256B splats → compact 64B rendering splats
-// ─────────────────────────────────────────────────────────────────────────────
-
-void GaussianSplatRenderer::createPreprocessPipeline(RenderList& renderer)
-{
-  qDebug() << "[GaussianSplat] createPreprocessPipeline: splatCount="
-           << m_node.splatCount
-           << "rawBuf=" << (void*)m_rawSplatBuffer;
-
-  if(!renderer.state.rhi->isFeatureSupported(QRhi::Compute))
-  {
-    qWarning() << "[GaussianSplat] Compute shaders NOT supported!";
-    return;
-  }
-
-  auto& rhi = *renderer.state.rhi;
-  const int64_t splatCount = m_node.splatCount;
-  if(splatCount <= 0)
-  {
-    qWarning() << "[GaussianSplat] splatCount <= 0, skipping preprocess pipeline";
-    return;
-  }
-
-  // Create compact output buffer (64 bytes per splat)
-  const int64_t renderBufSize = splatCount * 64;
-  delete m_renderSplatBuffer;
-  m_renderSplatBuffer
-      = rhi.newBuffer(QRhiBuffer::Immutable, QRhiBuffer::StorageBuffer, renderBufSize);
-  if(!m_renderSplatBuffer->create())
-  {
-    qWarning() << "[GaussianSplat] Failed to create renderSplatBuffer size=" << renderBufSize;
-    delete m_renderSplatBuffer;
-    m_renderSplatBuffer = nullptr;
-    return;
-  }
-  qDebug() << "[GaussianSplat] renderSplatBuffer created, size=" << renderBufSize;
-
-  // Preprocess uniform buffer
-  if(!m_preprocessUniformBuffer)
-  {
-    m_preprocessUniformBuffer
-        = rhi.newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, 96);
-    if(!m_preprocessUniformBuffer->create())
-    {
-      qWarning() << "[GaussianSplat] Failed to create preprocessUniformBuffer";
-      delete m_preprocessUniformBuffer;
-      m_preprocessUniformBuffer = nullptr;
-      return;
-    }
-  }
-
-  // Compile preprocess shader
-  QShader preprocessShader = score::gfx::makeCompute(
-      renderer.state, GaussianSplatShaders::preprocess_shader);
-  if(!preprocessShader.isValid())
-  {
-    qWarning() << "[GaussianSplat] preprocess_shader compilation FAILED";
-    return;
-  }
-  qDebug() << "[GaussianSplat] preprocess_shader compiled OK";
-
-  // Cleanup old pipeline
-  delete m_preprocessSrb;
-  delete m_preprocessPipeline;
-
-  m_preprocessSrb = rhi.newShaderResourceBindings();
-  m_preprocessSrb->setBindings({
-      QRhiShaderResourceBinding::bufferLoad(
-          0, QRhiShaderResourceBinding::ComputeStage, m_rawSplatBuffer),
-      QRhiShaderResourceBinding::bufferLoadStore(
-          1, QRhiShaderResourceBinding::ComputeStage, m_renderSplatBuffer),
-      QRhiShaderResourceBinding::uniformBuffer(
-          2, QRhiShaderResourceBinding::ComputeStage, m_preprocessUniformBuffer),
-  });
-  if(!m_preprocessSrb->create())
-  {
-    qWarning() << "[GaussianSplat] preprocess SRB creation FAILED";
-    return;
-  }
-
-  m_preprocessPipeline = rhi.newComputePipeline();
-  m_preprocessPipeline->setShaderResourceBindings(m_preprocessSrb);
-  m_preprocessPipeline->setShaderStage(
-      {QRhiShaderStage::Compute, preprocessShader});
-  if(!m_preprocessPipeline->create())
-  {
-    qWarning() << "[GaussianSplat] preprocess pipeline creation FAILED";
-    delete m_preprocessPipeline;
-    m_preprocessPipeline = nullptr;
-    return;
-  }
-
-  qDebug() << "[GaussianSplat] preprocess pipeline created OK";
-  m_preprocessResourcesCreated = true;
-}
-
-// ─────────────────────────────────────────────────────────────────────────────
-// Sort pipelines
-// ─────────────────────────────────────────────────────────────────────────────
-
-void GaussianSplatRenderer::createSortPipelines(RenderList& renderer)
-{
-  qDebug() << "[GaussianSplat] createSortPipelines";
-
-  if(!renderer.state.rhi->isFeatureSupported(QRhi::Compute))
-  {
-    qWarning() << "[GaussianSplat] Compute not supported, no sorting";
-    return;
-  }
-  if(!m_renderSplatBuffer)
-  {
-    qWarning() << "[GaussianSplat] No renderSplatBuffer, cannot create sort pipelines";
-    return;
-  }
-
-  auto& rhi = *renderer.state.rhi;
-  const int64_t splatCount = m_node.splatCount;
-  if(splatCount <= 0)
-    return;
-
-  const int64_t numWorkgroups
-      = (splatCount + SORT_WORKGROUP_SIZE - 1) / SORT_WORKGROUP_SIZE;
-  const int64_t keyBufferSize = splatCount * sizeof(uint32_t);
-  const int64_t indexBufferSize = splatCount * sizeof(uint32_t);
-  const int64_t histogramSize = numWorkgroups * NUM_BUCKETS * sizeof(uint32_t);
-
-  auto createOrResizeBuffer
-      = [&](QRhiBuffer*& buf, int64_t size, QRhiBuffer::UsageFlags usage) {
-    if(buf && buf->size() >= size)
-      return;
-    delete buf;
-    buf = rhi.newBuffer(QRhiBuffer::Immutable, usage, size);
-    buf->create();
-  };
-
-  createOrResizeBuffer(
-      m_sortKeysBuffer, keyBufferSize, QRhiBuffer::StorageBuffer);
-  createOrResizeBuffer(
-      m_sortKeysAltBuffer, keyBufferSize, QRhiBuffer::StorageBuffer);
-  createOrResizeBuffer(
-      m_sortIndicesBuffer, indexBufferSize, QRhiBuffer::StorageBuffer);
-  createOrResizeBuffer(
-      m_sortIndicesAltBuffer, indexBufferSize, QRhiBuffer::StorageBuffer);
-  createOrResizeBuffer(
-      m_histogramBuffer, histogramSize, QRhiBuffer::StorageBuffer);
-
-  // Depth key pass uses its own uniform layout: {mat4 view, uint splatCount, float near, float far, uint pad}
-  if(!m_sortUniformBuffer)
-  {
-    m_sortUniformBuffer
-        = rhi.newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, 256);
-    m_sortUniformBuffer->create();
-  }
-
-  // Histogram/scatter passes use: {uint splatCount, uint bitOffset, uint numWorkgroups, uint pad}
-  if(!m_sortPassUniformBuffer)
-  {
-    m_sortPassUniformBuffer
-        = rhi.newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, 16);
-    m_sortPassUniformBuffer->create();
-  }
-
-  // Prefix sum pass uses: {uint numWorkgroups, uint pad0, uint pad1, uint pad2}
-  if(!m_prefixSumUniformBuffer)
-  {
-    m_prefixSumUniformBuffer
-        = rhi.newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, 16);
-    m_prefixSumUniformBuffer->create();
-  }
-
-  // Compile compute shaders
-  QShader depthKeyShader = score::gfx::makeCompute(
-      renderer.state, GaussianSplatShaders::depth_key_shader);
-  QShader histogramShader = score::gfx::makeCompute(
-      renderer.state, GaussianSplatShaders::histogram_shader);
-  QShader prefixSumShader = score::gfx::makeCompute(
-      renderer.state, GaussianSplatShaders::prefix_sum_shader);
-  QShader sortScatterShader = score::gfx::makeCompute(
-      renderer.state, GaussianSplatShaders::sort_scatter_shader);
-
-  if(!depthKeyShader.isValid())
-    qWarning() << "[GaussianSplat] depth_key_shader compilation FAILED";
-  if(!histogramShader.isValid())
-    qWarning() << "[GaussianSplat] histogram_shader compilation FAILED";
-  if(!prefixSumShader.isValid())
-    qWarning() << "[GaussianSplat] prefix_sum_shader compilation FAILED";
-  if(!sortScatterShader.isValid())
-    qWarning() << "[GaussianSplat] sort_scatter_shader compilation FAILED";
-
-  // Depth key pipeline — reads from compact m_renderSplatBuffer
-  delete m_depthKeySrb;
-  delete m_depthKeyPipeline;
-
-  m_depthKeySrb = rhi.newShaderResourceBindings();
-  m_depthKeySrb->setBindings({
-      QRhiShaderResourceBinding::bufferLoad(
-          0, QRhiShaderResourceBinding::ComputeStage, m_renderSplatBuffer),
-      QRhiShaderResourceBinding::bufferLoadStore(
-          1, QRhiShaderResourceBinding::ComputeStage, m_sortKeysBuffer),
-      QRhiShaderResourceBinding::bufferLoadStore(
-          2, QRhiShaderResourceBinding::ComputeStage, m_sortIndicesBuffer),
-      QRhiShaderResourceBinding::uniformBuffer(
-          3, QRhiShaderResourceBinding::ComputeStage, m_sortUniformBuffer),
-  });
-  m_depthKeySrb->create();
-
-  m_depthKeyPipeline = rhi.newComputePipeline();
-  m_depthKeyPipeline->setShaderResourceBindings(m_depthKeySrb);
-  m_depthKeyPipeline->setShaderStage(
-      {QRhiShaderStage::Compute, depthKeyShader});
-  if(!m_depthKeyPipeline->create())
-    qWarning() << "[GaussianSplat] depthKey pipeline creation FAILED";
-
-  // Histogram pipeline (two SRBs for ping-pong: even reads keysBuffer, odd reads keysAltBuffer)
-  delete m_histogramSrb;
-  delete m_histogramSrbAlt;
-  delete m_histogramPipeline;
-
-  m_histogramSrb = rhi.newShaderResourceBindings();
-  m_histogramSrb->setBindings({
-      QRhiShaderResourceBinding::bufferLoad(
-          0, QRhiShaderResourceBinding::ComputeStage, m_sortKeysBuffer),
-      QRhiShaderResourceBinding::bufferLoadStore(
-          1, QRhiShaderResourceBinding::ComputeStage, m_histogramBuffer),
-      QRhiShaderResourceBinding::uniformBuffer(
-          2, QRhiShaderResourceBinding::ComputeStage, m_sortPassUniformBuffer),
-  });
-  m_histogramSrb->create();
-
-  m_histogramSrbAlt = rhi.newShaderResourceBindings();
-  m_histogramSrbAlt->setBindings({
-      QRhiShaderResourceBinding::bufferLoad(
-          0, QRhiShaderResourceBinding::ComputeStage, m_sortKeysAltBuffer),
-      QRhiShaderResourceBinding::bufferLoadStore(
-          1, QRhiShaderResourceBinding::ComputeStage, m_histogramBuffer),
-      QRhiShaderResourceBinding::uniformBuffer(
-          2, QRhiShaderResourceBinding::ComputeStage, m_sortPassUniformBuffer),
-  });
-  m_histogramSrbAlt->create();
-
-  m_histogramPipeline = rhi.newComputePipeline();
-  m_histogramPipeline->setShaderResourceBindings(m_histogramSrb);
-  m_histogramPipeline->setShaderStage(
-      {QRhiShaderStage::Compute, histogramShader});
-  if(!m_histogramPipeline->create())
-    qWarning() << "[GaussianSplat] histogram pipeline creation FAILED";
-
-  // Prefix sum pipeline
-  delete m_prefixSumSrb;
-  delete m_prefixSumPipeline;
-
-  m_prefixSumSrb = rhi.newShaderResourceBindings();
-  m_prefixSumSrb->setBindings({
-      QRhiShaderResourceBinding::bufferLoadStore(
-          0, QRhiShaderResourceBinding::ComputeStage, m_histogramBuffer),
-      QRhiShaderResourceBinding::uniformBuffer(
-          1, QRhiShaderResourceBinding::ComputeStage, m_prefixSumUniformBuffer),
-  });
-  m_prefixSumSrb->create();
-
-  m_prefixSumPipeline = rhi.newComputePipeline();
-  m_prefixSumPipeline->setShaderResourceBindings(m_prefixSumSrb);
-  m_prefixSumPipeline->setShaderStage(
-      {QRhiShaderStage::Compute, prefixSumShader});
-  if(!m_prefixSumPipeline->create())
-    qWarning() << "[GaussianSplat] prefixSum pipeline creation FAILED";
-
-  // Sort scatter pipeline (ping-pong: separate read/write buffers)
-  delete m_sortSrb;
-  delete m_sortSrbAlt;
-  delete m_sortPipeline;
-
-  // Even passes: read keys/indices → write keysAlt/indicesAlt
-  m_sortSrb = rhi.newShaderResourceBindings();
-  m_sortSrb->setBindings({
-      QRhiShaderResourceBinding::bufferLoad(
-          0, QRhiShaderResourceBinding::ComputeStage, m_sortKeysBuffer),
-      QRhiShaderResourceBinding::bufferLoad(
-          1, QRhiShaderResourceBinding::ComputeStage, m_sortIndicesBuffer),
-      QRhiShaderResourceBinding::bufferLoadStore(
-          2, QRhiShaderResourceBinding::ComputeStage, m_sortKeysAltBuffer),
-      QRhiShaderResourceBinding::bufferLoadStore(
-          3, QRhiShaderResourceBinding::ComputeStage, m_sortIndicesAltBuffer),
-      QRhiShaderResourceBinding::bufferLoadStore(
-          4, QRhiShaderResourceBinding::ComputeStage, m_histogramBuffer),
-      QRhiShaderResourceBinding::uniformBuffer(
-          5, QRhiShaderResourceBinding::ComputeStage, m_sortPassUniformBuffer),
-  });
-  m_sortSrb->create();
-
-  // Odd passes: read keysAlt/indicesAlt → write keys/indices
-  m_sortSrbAlt = rhi.newShaderResourceBindings();
-  m_sortSrbAlt->setBindings({
-      QRhiShaderResourceBinding::bufferLoad(
-          0, QRhiShaderResourceBinding::ComputeStage, m_sortKeysAltBuffer),
-      QRhiShaderResourceBinding::bufferLoad(
-          1, QRhiShaderResourceBinding::ComputeStage, m_sortIndicesAltBuffer),
-      QRhiShaderResourceBinding::bufferLoadStore(
-          2, QRhiShaderResourceBinding::ComputeStage, m_sortKeysBuffer),
-      QRhiShaderResourceBinding::bufferLoadStore(
-          3, QRhiShaderResourceBinding::ComputeStage, m_sortIndicesBuffer),
-      QRhiShaderResourceBinding::bufferLoadStore(
-          4, QRhiShaderResourceBinding::ComputeStage, m_histogramBuffer),
-      QRhiShaderResourceBinding::uniformBuffer(
-          5, QRhiShaderResourceBinding::ComputeStage, m_sortPassUniformBuffer),
-  });
-  m_sortSrbAlt->create();
-
-  m_sortPipeline = rhi.newComputePipeline();
-  m_sortPipeline->setShaderResourceBindings(m_sortSrb);
-  m_sortPipeline->setShaderStage(
-      {QRhiShaderStage::Compute, sortScatterShader});
-  if(!m_sortPipeline->create())
-    qWarning() << "[GaussianSplat] sort pipeline creation FAILED";
-
-  m_sortResourcesCreated = true;
-  m_lastSplatCount = splatCount;
-  qDebug() << "[GaussianSplat] Sort pipelines created OK, workgroups=" << numWorkgroups;
-}
-
-// ─────────────────────────────────────────────────────────────────────────────
-// Render pipeline
-// ─────────────────────────────────────────────────────────────────────────────
-
-void GaussianSplatRenderer::createRenderPipeline(RenderList& renderer)
-{
-  qDebug() << "[GaussianSplat] createRenderPipeline: renderSplatBuf="
-           << (void*)m_renderSplatBuffer
-           << "sortIndicesBuf=" << (void*)m_sortIndicesBuffer
-           << "enableSorting=" << m_node.enableSorting;
-
-  if(!m_renderSplatBuffer)
-  {
-    qWarning() << "[GaussianSplat] No renderSplatBuffer, cannot create render pipeline";
-    return;
-  }
-
-  delete m_bindings;
-  delete m_pipeline;
-  m_bindings = nullptr;
-  m_pipeline = nullptr;
-
-  auto& rhi = *renderer.state.rhi;
-
-  auto [vertex, fragment] = score::gfx::makeShaders(
-      renderer.state, GaussianSplatShaders::vertex_shader,
-      GaussianSplatShaders::fragment_shader);
-
-  if(!vertex.isValid())
-    qWarning() << "[GaussianSplat] vertex_shader compilation FAILED";
-  if(!fragment.isValid())
-    qWarning() << "[GaussianSplat] fragment_shader compilation FAILED";
-
-  // All 3 bindings must always be present (the shader declares them all).
-  QRhiBuffer* indicesBuf = (m_sortIndicesBuffer && m_node.enableSorting)
-                               ? m_sortIndicesBuffer
-                               : m_dummyStorageBuffer;
-
-  qDebug() << "[GaussianSplat] Render bindings: b0=renderSplat("
-           << m_renderSplatBuffer->size() << ") b1=indices("
-           << indicesBuf->size() << ") b2=uniform("
-           << m_uniformBuffer->size() << ")";
-
-  m_bindings = rhi.newShaderResourceBindings();
-  m_bindings->setBindings({
-      QRhiShaderResourceBinding::bufferLoad(
-          0, QRhiShaderResourceBinding::VertexStage, m_renderSplatBuffer),
-      QRhiShaderResourceBinding::bufferLoad(
-          1, QRhiShaderResourceBinding::VertexStage, indicesBuf),
-      QRhiShaderResourceBinding::uniformBuffer(
-          2, QRhiShaderResourceBinding::VertexStage, m_uniformBuffer),
-  });
-  if(!m_bindings->create())
-  {
-    qWarning() << "[GaussianSplat] Render SRB creation FAILED";
-    return;
-  }
-
-  m_pipeline = rhi.newGraphicsPipeline();
-  m_pipeline->setName("GaussianSplat::pipeline");
-
-  m_pipeline->setShaderStages(
-      {{QRhiShaderStage::Vertex, vertex},
-       {QRhiShaderStage::Fragment, fragment}});
-
-  // No vertex input — quad vertices generated in shader
-  QRhiVertexInputLayout inputLayout;
-  m_pipeline->setVertexInputLayout(inputLayout);
-
-  m_pipeline->setTopology(QRhiGraphicsPipeline::Triangles);
-  m_pipeline->setCullMode(QRhiGraphicsPipeline::None);
-  // Depth test + write: provides correct occlusion as a safety net.
-  // Framework clears depth to 1.0 (far), so all valid splats pass initially.
-  // With back-to-front sorting, depth test always passes (each splat is closer).
-  // Without sorting, depth write ensures near splats occlude far ones.
-  m_pipeline->setDepthTest(true);
-  m_pipeline->setDepthWrite(true);
-
-  // Front-to-back "under" compositing (premultiplied alpha).
-  // Mathematically equivalent to back-to-front "over", but much more stable:
-  // sort-order errors among back splats are hidden by accumulated front alpha.
-  // Under: result = src * (1 - dst.alpha) + dst
-  QRhiGraphicsPipeline::TargetBlend blend;
-  blend.enable = true;
-  blend.srcColor = QRhiGraphicsPipeline::OneMinusDstAlpha;
-  blend.dstColor = QRhiGraphicsPipeline::One;
-  blend.srcAlpha = QRhiGraphicsPipeline::OneMinusDstAlpha;
-  blend.dstAlpha = QRhiGraphicsPipeline::One;
-  m_pipeline->setTargetBlends({blend});
-
-  m_pipeline->setShaderResourceBindings(m_bindings);
-
-  // Find the destination render target so we can match its sample count
-  // (must agree exactly with renderTarget->sampleCount() — Vulkan rejects
-  //  pipelines whose sampleCount differs from the render pass).
-  bool foundRenderPass = false;
-  int rtSamples = renderer.samples();
-  for(auto* edge : node.output[0]->edges)
-  {
-    auto rt = renderer.renderTargetForOutput(*edge);
-    if(rt.renderTarget)
-    {
-      m_pipeline->setRenderPassDescriptor(rt.renderPass);
-      const int s = rt.sampleCount();
-      if(s > 0)
-        rtSamples = s;
-      foundRenderPass = true;
-      break;
-    }
-  }
-  if(!foundRenderPass)
-    qWarning() << "[GaussianSplat] No render pass descriptor found from output edges!";
-
-  m_pipeline->setSampleCount(rtSamples);
-
-  if(!m_pipeline->create())
-  {
-    qWarning() << "[GaussianSplat] Render pipeline creation FAILED";
-    delete m_pipeline;
-    m_pipeline = nullptr;
-    return;
-  }
-
-  qDebug() << "[GaussianSplat] Render pipeline created OK";
-}
-
-// ─────────────────────────────────────────────────────────────────────────────
-// Init / Update
-// ─────────────────────────────────────────────────────────────────────────────
-
-void GaussianSplatRenderer::init(RenderList& renderer, QRhiResourceUpdateBatch& res)
-{
-  qDebug() << "[GaussianSplat] init: splatCount=" << m_node.splatCount
-           << "enableSorting=" << m_node.enableSorting
-           << "shDegree=" << m_node.shDegree;
-
-  auto& rhi = *renderer.state.rhi;
-
-  qDebug() << "[GaussianSplat] RHI backend:"
-           << rhi.backendName()
-           << "compute=" << rhi.isFeatureSupported(QRhi::Compute);
-
-  // Look up the pre-created input render target from the RenderList
-  auto rt_spec = m_node.resolveRenderTargetSpecs(0, renderer);
-  auto sampler = rhi.newSampler(
-      rt_spec.min_filter, rt_spec.mag_filter, QRhiSampler::Linear,
-      rt_spec.address_u, rt_spec.address_v, rt_spec.address_w);
-  sampler->setName("GaussianSplat::sampler");
-  sampler->create();
-
-  auto inputRT = renderer.renderTargetForInputPort(*m_node.input[0]);
-  auto* texture = inputRT.texture ? inputRT.texture : &renderer.emptyTexture();
-  m_samplers.push_back({sampler, texture});
-
-  // Render uniform buffer
-  const int64_t uniformSize = 3 * 64 + 16;
-  m_uniformBuffer
-      = rhi.newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, uniformSize);
-  m_uniformBuffer->create();
-
-  // Dummy storage buffer
-  m_dummyStorageBuffer
-      = rhi.newBuffer(QRhiBuffer::Immutable, QRhiBuffer::StorageBuffer, 16);
-  m_dummyStorageBuffer->create();
-
-  // Default mesh (required by base class)
-  const auto& mesh = renderer.defaultQuad();
-  defaultMeshInit(renderer, mesh, res);
-
-  qDebug() << "[GaussianSplat] init complete";
-}
-
-void GaussianSplatRenderer::update(
-    RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge)
-{
-  const int64_t splatCount = m_node.splatCount;
-
-  // Check for raw splat buffer input
-  bool bufferChanged = false;
-  if(!m_node.input.empty() && m_node.input[0])
-  {
-    auto* inputPort = m_node.input[0];
-    if(!inputPort->edges.empty())
-    {
-      auto* inputEdge = inputPort->edges[0];
-      if(inputEdge && inputEdge->source)
-      {
-        QRhiBuffer* newBuffer{};
-        score::gfx::NodeRenderer* src_renderer
-            = inputEdge->source->node->renderedNodes.at(&renderer);
-        if(src_renderer)
-        {
-          auto bv = src_renderer->bufferForOutput(*inputEdge->source);
-          newBuffer = bv.handle;
-        }
-        if(newBuffer != m_rawSplatBuffer)
-        {
-          qDebug() << "[GaussianSplat] update: raw buffer changed,"
-                   << "old=" << (void*)m_rawSplatBuffer
-                   << "new=" << (void*)newBuffer
-                   << "size=" << newBuffer->size();
-          m_rawSplatBuffer = newBuffer;
-          ((GaussianSplatNode&)this->node).splatCount
-              = newBuffer ? newBuffer->size() / 256 : 0;
-          bufferChanged = true;
-          qDebug() << "[GaussianSplat] Loaded splats:"
-                   << ((GaussianSplatNode&)this->node).splatCount;
-        }
-      }
-      else
-      {
-        // Log only once
-        static bool logged = false;
-        if(!logged)
-        {
-          qDebug() << "[GaussianSplat] update: input edge exists but no value."
-                   << "source=" << (void*)(inputEdge ? inputEdge->source : nullptr);
-          logged = true;
-        }
-      }
-    }
-    else
-    {
-      static bool logged = false;
-      if(!logged)
-      {
-        qDebug() << "[GaussianSplat] update: input port has no edges";
-        logged = true;
-      }
-    }
-  }
-  else
-  {
-    static bool logged = false;
-    if(!logged)
-    {
-      qDebug() << "[GaussianSplat] update: no input ports";
-      logged = true;
-    }
-  }
-
-  // Recreate compute/render pipelines when buffer or count changes
-  if(bufferChanged || splatCount != m_lastSplatCount)
-  {
-    qDebug() << "[GaussianSplat] update: rebuilding pipelines,"
-             << "bufferChanged=" << bufferChanged
-             << "splatCount=" << splatCount
-             << "lastSplatCount=" << m_lastSplatCount
-             << "rawBuf=" << (void*)m_rawSplatBuffer;
-
-    if(m_rawSplatBuffer && splatCount > 0)
-    {
-      createPreprocessPipeline(renderer);
-      if(m_node.enableSorting)
-        createSortPipelines(renderer);
-      createRenderPipeline(renderer);
-    }
-    else
-    {
-      qDebug() << "[GaussianSplat] update: cannot build pipelines (no buffer or count=0)";
-    }
-    m_lastSplatCount = splatCount;
-  }
-
-  // Compute view and projection matrices from camera parameters
-  auto& state = renderer.state;
-
-  // Build model matrix from position/rotation/scale
-  QMatrix4x4 model;
-  model.translate(
-      m_node.modelPosition[0], m_node.modelPosition[1], m_node.modelPosition[2]);
-  model.rotate(m_node.modelRotation[0], 1, 0, 0); // pitch
-  model.rotate(m_node.modelRotation[1], 0, 1, 0); // yaw
-  model.rotate(m_node.modelRotation[2], 0, 0, 1); // roll
-  model.scale(m_node.modelScale[0], m_node.modelScale[1], m_node.modelScale[2]);
-
-  QMatrix4x4 view;
-  view.lookAt(
-      QVector3D{m_node.position[0], m_node.position[1], m_node.position[2]},
-      QVector3D{m_node.center[0], m_node.center[1], m_node.center[2]},
-      QVector3D{0, 1, 0});
-
-  // modelView bakes the model transform so shaders don't need a separate model matrix
-  QMatrix4x4 modelView = view * model;
-
-  QMatrix4x4 proj;
-  const float aspect
-      = float(state.renderSize.width()) / float(state.renderSize.height());
-  proj.perspective(m_node.fov, aspect, m_node.near, m_node.far);
-
-  QMatrix4x4 clip = renderer.state.rhi->clipSpaceCorrMatrix();
-
-  struct
-  {
-    float viewport[2];
-    float _pad0;
-    uint32_t useSorting;
-  } tail;
-
-  tail.viewport[0] = float(state.renderSize.width());
-  tail.viewport[1] = float(state.renderSize.height());
-  tail._pad0 = 0.f;
-  tail.useSorting = m_node.enableSorting && m_sortResourcesCreated ? 1u : 0u;
-
-  char buf[3 * 64 + 16];
-  memcpy(buf, modelView.constData(), 64);
-  memcpy(buf + 64, proj.constData(), 64);
-  memcpy(buf + 128, clip.constData(), 64);
-  memcpy(buf + 192, &tail, 16);
-
-  res.updateDynamicBuffer(m_uniformBuffer, 0, sizeof(buf), buf);
-
-  // Update preprocess uniforms
-  if(m_preprocessUniformBuffer && m_rawSplatBuffer)
-  {
-    struct
-    {
-      float viewMatrix[16];
-      float camPos[3];
-      uint32_t splatCount;
-      uint32_t shDegree;
-      float scaleMod;
-      uint32_t _pad0;
-      uint32_t _pad1;
-    } ppUniforms;
-
-    memcpy(ppUniforms.viewMatrix, modelView.constData(), 64);
-
-    // Camera position in model space for SH evaluation
-    QVector3D worldCamPos{m_node.position[0], m_node.position[1], m_node.position[2]};
-    QVector3D modelCamPos = model.inverted().map(worldCamPos);
-    ppUniforms.camPos[0] = modelCamPos.x();
-    ppUniforms.camPos[1] = modelCamPos.y();
-    ppUniforms.camPos[2] = modelCamPos.z();
-    ppUniforms.splatCount = splatCount;
-    ppUniforms.shDegree = m_node.shDegree;
-    ppUniforms.scaleMod = m_node.scaleFactor;
-
-    res.updateDynamicBuffer(
-        m_preprocessUniformBuffer, 0, sizeof(ppUniforms), &ppUniforms);
-  }
-
-  // Update sort uniforms
-  if(m_sortUniformBuffer && m_node.enableSorting)
-  {
-    struct
-    {
-      float viewMatrix[16];
-      uint32_t splatCount;
-      float nearPlane;
-      float farPlane;
-      uint32_t _pad;
-    } sortUniforms;
-
-    memcpy(sortUniforms.viewMatrix, modelView.constData(), 64);
-    sortUniforms.splatCount = splatCount;
-    sortUniforms.nearPlane = m_node.near;
-    sortUniforms.farPlane = m_node.far;
-
-    res.updateDynamicBuffer(
-        m_sortUniformBuffer, 0, sizeof(sortUniforms), &sortUniforms);
-  }
-}
-
-// ─────────────────────────────────────────────────────────────────────────────
-// Compute passes: preprocess → sort
-// ─────────────────────────────────────────────────────────────────────────────
-
-void GaussianSplatRenderer::runInitialPasses(
-    RenderList& renderer, QRhiCommandBuffer& cb,
-    QRhiResourceUpdateBatch*& res, Edge& edge)
-{
-  const int64_t splatCount = m_node.splatCount;
-  if(splatCount <= 0 || !m_rawSplatBuffer)
-  {
-    static bool logged = false;
-    if(!logged)
-    {
-      qDebug() << "[GaussianSplat] runInitialPasses: SKIPPED (splatCount="
-               << splatCount << "rawBuf=" << (void*)m_rawSplatBuffer << ")";
-      logged = true;
-    }
-    return;
-  }
-
-  const int64_t numWorkgroups
-      = (splatCount + SORT_WORKGROUP_SIZE - 1) / SORT_WORKGROUP_SIZE;
-
-  // ── Pass 1: SH preprocess (raw → compact) ────────────────────────────
-  if(m_preprocessResourcesCreated && m_preprocessPipeline)
-  {
-    cb.beginComputePass(res, QRhiCommandBuffer::BeginPassFlag::ExternalContent);
-
-    cb.setComputePipeline(m_preprocessPipeline);
-    cb.setShaderResources(m_preprocessSrb);
-    cb.dispatch(numWorkgroups, 1, 1);
-
-    cb.beginExternal();
-    insertComputeBarrier(*renderer.state.rhi, cb);
-    cb.endExternal();
-    cb.endComputePass();
-  }
-  else
-  {
-    static bool logged = false;
-    if(!logged)
-    {
-      qDebug() << "[GaussianSplat] runInitialPasses: preprocess SKIPPED"
-               << "(created=" << m_preprocessResourcesCreated
-               << "pipeline=" << (void*)m_preprocessPipeline << ")";
-      logged = true;
-    }
-  }
-
-  // ── Pass 2..N: Depth sort ─────────────────────────────────────────────
-  if(!m_node.enableSorting || !m_sortResourcesCreated || !m_depthKeyPipeline
-     || !m_prefixSumPipeline)
-  {
-    static bool loggedSkip = false;
-    if(!loggedSkip)
-    {
-      qDebug() << "[GaussianSplat] SORT SKIPPED:"
-               << "enableSorting=" << m_node.enableSorting
-               << "sortResourcesCreated=" << m_sortResourcesCreated
-               << "depthKeyPipeline=" << (void*)m_depthKeyPipeline
-               << "prefixSumPipeline=" << (void*)m_prefixSumPipeline;
-      loggedSkip = true;
-    }
-    return;
-  }
-
-  auto& rhi = *renderer.state.rhi;
-
-  // Generate depth keys from compact buffer
-  cb.beginComputePass(res, QRhiCommandBuffer::BeginPassFlag::ExternalContent);
-
-  cb.setComputePipeline(m_depthKeyPipeline);
-  cb.setShaderResources(m_depthKeySrb);
-  cb.dispatch(numWorkgroups, 1, 1);
-
-  cb.beginExternal();
-  insertComputeBarrier(*renderer.state.rhi, cb);
-  cb.endExternal();
-  cb.endComputePass();
-
-  // Upload prefix sum uniforms (constant across all passes)
-  {
-    res = rhi.nextResourceUpdateBatch();
-    struct
-    {
-      uint32_t numWorkgroups;
-      uint32_t _pad0;
-      uint32_t _pad1;
-      uint32_t _pad2;
-    } prefixUniforms;
-    prefixUniforms.numWorkgroups = numWorkgroups;
-    prefixUniforms._pad0 = 0;
-    prefixUniforms._pad1 = 0;
-    prefixUniforms._pad2 = 0;
-    res->updateDynamicBuffer(
-        m_prefixSumUniformBuffer, 0, sizeof(prefixUniforms), &prefixUniforms);
-    // Will be consumed by the first histogram pass below
-  }
-
-  // Radix sort: 2 passes over the top 16 bits (depth key).
-  // Bottom 16 bits (splat index) are already in order from the depth key shader,
-  // and the radix sort is stable, so equal-depth splats keep their index order.
-  for(int pass = 0; pass < 2; ++pass)
-  {
-    const uint32_t bitOffset = 16 + pass * RADIX_BITS; // bits 16-23, then 24-31
-
-    // Upload per-pass uniforms for histogram + scatter
-    {
-      struct
-      {
-        uint32_t splatCount;
-        uint32_t bitOffset;
-        uint32_t numWorkgroups;
-        uint32_t _pad;
-      } sortPassUniforms;
-      sortPassUniforms.splatCount = splatCount;
-      sortPassUniforms.bitOffset = bitOffset;
-      sortPassUniforms.numWorkgroups = numWorkgroups;
-      sortPassUniforms._pad = 0;
-
-      if(!res)
-        res = rhi.nextResourceUpdateBatch();
-      res->updateDynamicBuffer(
-          m_sortPassUniformBuffer, 0, sizeof(sortPassUniforms),
-          &sortPassUniforms);
-    }
-
-    // Histogram: count digits per workgroup
-    // Even passes read from keysBuffer, odd from keysAltBuffer
-    cb.beginComputePass(res, QRhiCommandBuffer::BeginPassFlag::ExternalContent);
-    res = nullptr;
-    cb.setComputePipeline(m_histogramPipeline);
-    cb.setShaderResources(pass % 2 == 0 ? m_histogramSrb : m_histogramSrbAlt);
-    cb.dispatch(numWorkgroups, 1, 1);
-    cb.beginExternal();
-    insertComputeBarrier(*renderer.state.rhi, cb);
-    cb.endExternal();
-    cb.endComputePass();
-
-    // Prefix sum: convert per-workgroup histograms to global prefix sums
-    // Single workgroup of 256 threads (one per digit)
-    cb.beginComputePass(res, QRhiCommandBuffer::BeginPassFlag::ExternalContent);
-    res = nullptr;
-    cb.setComputePipeline(m_prefixSumPipeline);
-    cb.setShaderResources(m_prefixSumSrb);
-    cb.dispatch(1, 1, 1);
-    cb.beginExternal();
-    insertComputeBarrier(*renderer.state.rhi, cb);
-    cb.endExternal();
-    cb.endComputePass();
-
-    // Scatter: reorder keys+indices using prefix sums (ping-pong)
-    cb.beginComputePass(res, QRhiCommandBuffer::BeginPassFlag::ExternalContent);
-    res = nullptr;
-    cb.setComputePipeline(m_sortPipeline);
-    cb.setShaderResources(pass % 2 == 0 ? m_sortSrb : m_sortSrbAlt);
-    cb.dispatch(numWorkgroups, 1, 1);
-    cb.endComputePass();
-  }
-}
-
-// ─────────────────────────────────────────────────────────────────────────────
-// Render pass
-// ─────────────────────────────────────────────────────────────────────────────
-
-void GaussianSplatRenderer::runRenderPass(
-    RenderList& renderer, QRhiCommandBuffer& cb, Edge& edge)
-{
-  if(!m_pipeline || !m_renderSplatBuffer)
-  {
-    static bool logged = false;
-    if(!logged)
-    {
-      qDebug() << "[GaussianSplat] runRenderPass: SKIPPED (pipeline="
-               << (void*)m_pipeline
-               << "renderBuf=" << (void*)m_renderSplatBuffer << ")";
-      logged = true;
-    }
-    return;
-  }
-
-  const int64_t splatCount = m_node.splatCount;
-  if(splatCount <= 0)
-    return;
-
-  static int frameCount = 0;
-  if(frameCount++ % 300 == 0)
-  {
-    bool sortActive = m_node.enableSorting && m_sortResourcesCreated;
-    qDebug() << "[GaussianSplat] runRenderPass: drawing"
-             << splatCount << "splats (frame" << frameCount << ")"
-             << "sorting=" << sortActive
-             << "preprocessOK=" << m_preprocessResourcesCreated
-             << "sortOK=" << m_sortResourcesCreated
-             << "viewport=" << renderer.state.renderSize;
-  }
-
-  cb.setGraphicsPipeline(m_pipeline);
-  cb.setShaderResources(m_bindings);
-  cb.setViewport(
-      QRhiViewport{
-          0, 0, (float)renderer.state.renderSize.width(),
-          (float)renderer.state.renderSize.height()});
-
-  // 6 vertices (2 triangles) per splat, instanced
-  cb.draw(6, splatCount, 0, 0);
-}
-
-// ─────────────────────────────────────────────────────────────────────────────
-// Cleanup
-// ─────────────────────────────────────────────────────────────────────────────
-
-void GaussianSplatRenderer::release(RenderList& r)
-{
-  qDebug() << "[GaussianSplat] release";
-
-  for(auto& sampler : m_samplers)
-    delete sampler.sampler;
-  m_samplers.clear();
-
-  // Render
-  delete m_uniformBuffer;
-  delete m_dummyStorageBuffer;
-  delete m_pipeline;
-  delete m_bindings;
-  m_uniformBuffer = nullptr;
-  m_dummyStorageBuffer = nullptr;
-  m_pipeline = nullptr;
-  m_bindings = nullptr;
-
-  // Preprocess
-  delete m_renderSplatBuffer;
-  delete m_preprocessUniformBuffer;
-  delete m_preprocessPipeline;
-  delete m_preprocessSrb;
-  m_renderSplatBuffer = nullptr;
-  m_preprocessUniformBuffer = nullptr;
-  m_preprocessPipeline = nullptr;
-  m_preprocessSrb = nullptr;
-  m_preprocessResourcesCreated = false;
-
-  // Sort
-  delete m_sortKeysBuffer;
-  delete m_sortKeysAltBuffer;
-  delete m_sortIndicesBuffer;
-  delete m_sortIndicesAltBuffer;
-  delete m_histogramBuffer;
-  delete m_sortUniformBuffer;
-  delete m_sortPassUniformBuffer;
-  delete m_prefixSumUniformBuffer;
-  delete m_depthKeyPipeline;
-  delete m_histogramPipeline;
-  delete m_prefixSumPipeline;
-  delete m_sortPipeline;
-  delete m_depthKeySrb;
-  delete m_histogramSrb;
-  delete m_histogramSrbAlt;
-  delete m_prefixSumSrb;
-  delete m_sortSrb;
-  delete m_sortSrbAlt;
-  m_sortKeysBuffer = nullptr;
-  m_sortKeysAltBuffer = nullptr;
-  m_sortIndicesBuffer = nullptr;
-  m_sortIndicesAltBuffer = nullptr;
-  m_histogramBuffer = nullptr;
-  m_sortUniformBuffer = nullptr;
-  m_sortPassUniformBuffer = nullptr;
-  m_prefixSumUniformBuffer = nullptr;
-  m_depthKeyPipeline = nullptr;
-  m_histogramPipeline = nullptr;
-  m_prefixSumPipeline = nullptr;
-  m_sortPipeline = nullptr;
-  m_depthKeySrb = nullptr;
-  m_histogramSrb = nullptr;
-  m_histogramSrbAlt = nullptr;
-  m_prefixSumSrb = nullptr;
-  m_sortSrb = nullptr;
-  m_sortSrbAlt = nullptr;
-  m_sortResourcesCreated = false;
-
-  m_rawSplatBuffer = nullptr;
-}
-
-} // namespace score::gfx
diff --git a/src/plugins/score-plugin-threedim/Threedim/Splat/GaussianSplatNode.hpp b/src/plugins/score-plugin-threedim/Threedim/Splat/GaussianSplatNode.hpp
deleted file mode 100644
index 1770d2d3b4..0000000000
--- a/src/plugins/score-plugin-threedim/Threedim/Splat/GaussianSplatNode.hpp
+++ /dev/null
@@ -1,830 +0,0 @@
-#pragma once
-
-#include <Gfx/Graph/Node.hpp>
-#include <Gfx/Graph/NodeRenderer.hpp>
-#include <Gfx/Graph/RenderList.hpp>
-#include <Gfx/Graph/Utils.hpp>
-
-#include <ossia/detail/pod_vector.hpp>
-
-// clang-format off
-#if defined(near)
-#undef near
-#undef far
-#endif
-// clang-format on
-
-namespace score::gfx
-{
-
-/**
- * @brief Gaussian Splat rendering node
- *
- * A full rendering node for 3D Gaussian Splatting.
- * Uses instanced quad rendering with EWA (Elliptical Weighted Average) projection.
- *
- * Pipeline (per frame):
- *   1. SH preprocess (compute): raw 256-byte splats → compact 64-byte splats
- *      Evaluates spherical harmonics, applies exp(scale), sigmoid(opacity)
- *   2. Depth key generation (compute): writes sortable uint keys
- *   3. Radix sort (compute): sorts indices back-to-front
- *   4. Render pass: instanced alpha-blended quads using sorted indices
- *
- * Input ports:
- *   - Raw Splat Buffer: GPU storage buffer, 256 bytes per splat
- *     (layout matches GaussianSplatData from Ply.hpp)
- *
- * Output ports:
- *   - Rendered image
- */
-struct GaussianSplatNode : public NodeModel
-{
-public:
-  GaussianSplatNode();
-  virtual ~GaussianSplatNode();
-
-  score::gfx::NodeRenderer* createRenderer(RenderList&) const noexcept override;
-  void process(Message&& msg) override;
-
-  int splatCount{};
-  float scaleFactor{1.0f};
-  bool enableSorting{true};
-  uint32_t shDegree{3}; // 0, 1, 2, or 3
-
-  // Model transform
-  ossia::vec3f modelPosition{0.f, 0.f, 0.f};
-  ossia::vec3f modelRotation{0.f, 0.f, 0.f}; // Euler angles in degrees (pitch, yaw, roll)
-  ossia::vec3f modelScale{1.f, 1.f, 1.f};
-
-  // Camera parameters
-  ossia::vec3f position{-1.f, -1.f, -1.f};
-  ossia::vec3f center{0.f, 0.f, 0.f};
-  float fov{90.f};
-  float near{0.001f};
-  float far{10000.f};
-};
-
-/**
- * @brief Renderer for GaussianSplatNode
- *
- * Rendering pipeline:
- * 1. runInitialPasses: Compute depth keys and perform GPU radix sort
- * 2. runRenderPass: Draw sorted splats with alpha blending
- */
-class GaussianSplatRenderer final : public score::gfx::GenericNodeRenderer
-{
-public:
-  explicit GaussianSplatRenderer(const GaussianSplatNode& node);
-  ~GaussianSplatRenderer();
-
-  void init(RenderList& renderer, QRhiResourceUpdateBatch& res) override;
-  void update(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) override;
-  void runInitialPasses(
-      RenderList&, QRhiCommandBuffer& commands, QRhiResourceUpdateBatch*& res,
-      Edge& edge) override;
-  void runRenderPass(RenderList&, QRhiCommandBuffer& cb, Edge& edge) override;
-  void release(RenderList&) override;
-
-private:
-  void createPreprocessPipeline(RenderList& renderer);
-  void createRenderPipeline(RenderList& renderer);
-  void createSortPipelines(RenderList& renderer);
-
-  const GaussianSplatNode& m_node;
-
-  // Render pipeline resources
-  QRhiBuffer* m_uniformBuffer{};
-  QRhiBuffer* m_dummyStorageBuffer{}; // Small buffer for unused bindings
-  QRhiGraphicsPipeline* m_pipeline{};
-  QRhiShaderResourceBindings* m_bindings{};
-
-  // SH preprocessing compute resources
-  // Converts raw 256-byte splats → compact 64-byte rendering splats
-  QRhiBuffer* m_rawSplatBuffer{};    // Input: raw PLY data (256 bytes/splat)
-  QRhiBuffer* m_renderSplatBuffer{}; // Output: compact (64 bytes/splat)
-  QRhiBuffer* m_preprocessUniformBuffer{};
-  QRhiComputePipeline* m_preprocessPipeline{};
-  QRhiShaderResourceBindings* m_preprocessSrb{};
-
-  // Sorting compute resources
-  QRhiBuffer* m_sortKeysBuffer{};         // Depth keys (float -> uint for sorting)
-  QRhiBuffer* m_sortKeysAltBuffer{};      // Double buffer for key ping-pong
-  QRhiBuffer* m_sortIndicesBuffer{};      // Sorted indices
-  QRhiBuffer* m_sortIndicesAltBuffer{};   // Double buffer for index ping-pong
-  QRhiBuffer* m_histogramBuffer{};        // Histogram for radix sort
-  QRhiBuffer* m_sortUniformBuffer{};      // Depth key pass uniforms
-  QRhiBuffer* m_sortPassUniformBuffer{};  // Histogram/scatter/prefix uniforms
-  QRhiBuffer* m_prefixSumUniformBuffer{}; // Prefix sum uniforms
-
-  QRhiComputePipeline* m_depthKeyPipeline{};
-  QRhiComputePipeline* m_histogramPipeline{};
-  QRhiComputePipeline* m_prefixSumPipeline{};
-  QRhiComputePipeline* m_sortPipeline{};
-
-  QRhiShaderResourceBindings* m_depthKeySrb{};
-  QRhiShaderResourceBindings* m_histogramSrb{};
-  QRhiShaderResourceBindings* m_histogramSrbAlt{}; // For odd passes
-  QRhiShaderResourceBindings* m_prefixSumSrb{};
-  QRhiShaderResourceBindings* m_sortSrb{};
-  QRhiShaderResourceBindings* m_sortSrbAlt{}; // For ping-pong
-
-  ossia::small_vector<Sampler, 8> m_samplers;
-
-  int64_t m_lastSplatCount{0};
-  bool m_preprocessResourcesCreated{false};
-  bool m_sortResourcesCreated{false};
-
-  static constexpr int64_t MAX_SPLATS = 50000000;
-  static constexpr int SORT_WORKGROUP_SIZE = 256;
-  static constexpr int RADIX_BITS = 8;
-  static constexpr int NUM_BUCKETS = 256; // 2^RADIX_BITS
-};
-
-// Shader sources
-namespace GaussianSplatShaders
-{
-
-//=============================================================================
-// COMPUTE SHADER: SH PREPROCESSING (raw 256B → compact 64B per splat)
-//=============================================================================
-
-/**
- * Compute shader: Preprocess raw Gaussian Splat data
- *
- * Reads raw 256-byte PLY splats and writes compact 64-byte rendering splats:
- *   - Evaluates spherical harmonics for view-dependent color
- *   - Applies exp() to log-space scale
- *   - Applies sigmoid() to raw opacity
- *   - Normalizes quaternion
- *   - Reorders rotation from (w,x,y,z) to (x,y,z,w) for the vertex shader
- */
-static constexpr auto preprocess_shader = R"_(#version 450
-layout(local_size_x = 256) in;
-
-// Raw splat: 64 floats = 256 bytes (matches PLY loader output)
-//   [0..2]   position (x,y,z)
-//   [3..5]   normal (nx,ny,nz) — unused
-//   [6..8]   SH DC (f_dc_0, f_dc_1, f_dc_2)
-//   [9..53]  SH rest (f_rest_0 .. f_rest_44)
-//   [54]     opacity (pre-sigmoid)
-//   [55..57] scale (log-space)
-//   [58..61] rotation (w,x,y,z)
-//   [62..63] padding
-
-layout(std430, binding = 0) readonly buffer RawSplatBuffer {
-    float rawData[];  // 64 floats per splat
-};
-
-// Compact rendering splat: 16 floats = 64 bytes
-//   vec4 position (xyz, 0)
-//   vec4 scale    (xyz, 0)    — already exp'd
-//   vec4 rotation (x,y,z,w)  — normalized
-//   vec4 color    (r,g,b,a)  — SH evaluated, alpha = sigmoid(opacity)
-
-struct RenderSplat {
-    vec4 position;
-    vec4 scale;
-    vec4 rotation;
-    vec4 color;
-};
-
-layout(std430, binding = 1) writeonly buffer RenderSplatBuffer {
-    RenderSplat renderSplats[];
-};
-
-layout(std140, binding = 2) uniform Params {
-    mat4 view;
-    vec3 camPos;      // Camera position in world space
-    uint splatCount;
-    uint shDegree;    // 0, 1, 2, or 3
-    float scaleMod;
-    uint _pad0;
-    uint _pad1;
-};
-
-// Spherical harmonics constants
-const float SH_C0 = 0.28209479177387814;
-
-const float SH_C1 = 0.4886025119029199;
-
-const float SH_C2[5] = float[5](
-    1.0925484305920792,
-    -1.0925484305920792,
-    0.31539156525252005,
-    -1.0925484305920792,
-    0.5462742152960396
-);
-
-const float SH_C3[7] = float[7](
-    -0.5900435899266435,
-    2.890611442640554,
-    -0.4570457994644658,
-    0.3731763325901154,
-    -0.4570457994644658,
-    1.445305721320277,
-    -0.5900435899266435
-);
-
-vec3 evaluateSH(uint base, vec3 dir) {
-    // Degree 0
-    vec3 result = SH_C0 * vec3(
-        rawData[base + 6],
-        rawData[base + 7],
-        rawData[base + 8]
-    );
-
-    if (shDegree < 1) {
-        return result + 0.5;
-    }
-
-    // Degree 1
-    float x = dir.x, y = dir.y, z = dir.z;
-
-    // f_rest layout: [0..14] = R channel rest, [15..29] = G, [30..44] = B
-    // But the INRIA convention interleaves: [0..2] = degree1 for R,G,B etc.
-    // Actually the standard layout is:
-    //   f_rest[0..14]:  coeffs 1..15 for channel 0 (R)
-    //   f_rest[15..29]: coeffs 1..15 for channel 1 (G)
-    //   f_rest[30..44]: coeffs 1..15 for channel 2 (B)
-
-    uint r = base + 9;   // f_rest_0 start
-    // Degree 1: 3 coefficients per channel, interleaved as RGB triplets
-    // Coeff indices in f_rest: R=[0,1,2], G=[15,16,17], B=[30,31,32]
-    result += SH_C1 * (
-        - y * vec3(rawData[r+0],  rawData[r+15], rawData[r+30])
-        + z * vec3(rawData[r+1],  rawData[r+16], rawData[r+31])
-        - x * vec3(rawData[r+2],  rawData[r+17], rawData[r+32])
-    );
-
-    if (shDegree < 2) {
-        return result + 0.5;
-    }
-
-    // Degree 2: 5 coefficients per channel
-    // R=[3..7], G=[18..22], B=[33..37]
-    float xx = x*x, yy = y*y, zz = z*z, xy = x*y, yz = y*z, xz = x*z;
-
-    result += SH_C2[0] * xy       * vec3(rawData[r+3],  rawData[r+18], rawData[r+33]);
-    result += SH_C2[1] * yz       * vec3(rawData[r+4],  rawData[r+19], rawData[r+34]);
-    result += SH_C2[2] * (2.*zz - xx - yy)
-                                   * vec3(rawData[r+5],  rawData[r+20], rawData[r+35]);
-    result += SH_C2[3] * xz       * vec3(rawData[r+6],  rawData[r+21], rawData[r+36]);
-    result += SH_C2[4] * (xx - yy)* vec3(rawData[r+7],  rawData[r+22], rawData[r+37]);
-
-    if (shDegree < 3) {
-        return result + 0.5;
-    }
-
-    // Degree 3: 7 coefficients per channel
-    // R=[8..14], G=[23..29], B=[38..44]
-    result += SH_C3[0] * y*(3.*xx - yy)
-                                   * vec3(rawData[r+8],  rawData[r+23], rawData[r+38]);
-    result += SH_C3[1] * xy*z     * vec3(rawData[r+9],  rawData[r+24], rawData[r+39]);
-    result += SH_C3[2] * y*(4.*zz - xx - yy)
-                                   * vec3(rawData[r+10], rawData[r+25], rawData[r+40]);
-    result += SH_C3[3] * z*(2.*zz - 3.*xx - 3.*yy)
-                                   * vec3(rawData[r+11], rawData[r+26], rawData[r+41]);
-    result += SH_C3[4] * x*(4.*zz - xx - yy)
-                                   * vec3(rawData[r+12], rawData[r+27], rawData[r+42]);
-    result += SH_C3[5] * z*(xx - yy)
-                                   * vec3(rawData[r+13], rawData[r+28], rawData[r+43]);
-    result += SH_C3[6] * x*(xx - 3.*yy)
-                                   * vec3(rawData[r+14], rawData[r+29], rawData[r+44]);
-
-    return result + 0.5;
-}
-
-void main() {
-    uint idx = gl_GlobalInvocationID.x;
-    if (idx >= splatCount) return;
-
-    uint base = idx * 64; // 64 floats per raw splat
-
-    // Position
-    vec3 pos = vec3(rawData[base], rawData[base+1], rawData[base+2]);
-
-    // View direction for SH evaluation (world space, from camera towards splat)
-    // Must match the INRIA training convention: dir = pos - campos
-    vec3 dir = normalize(pos - camPos);
-
-    // Evaluate SH for view-dependent color
-    vec3 color = evaluateSH(base, dir);
-    color = clamp(color, 0.0, 1.0);
-
-    // Opacity: sigmoid(raw_opacity)
-    float rawOpacity = rawData[base + 54];
-    float alpha = 1.0 / (1.0 + exp(-rawOpacity));
-
-    // Scale: exp(log_scale) * scaleMod
-    vec3 scale = vec3(
-        exp(rawData[base + 55]),
-        exp(rawData[base + 56]),
-        exp(rawData[base + 57])
-    ) * scaleMod;
-
-    // Rotation: PLY stores (w,x,y,z), shader expects (x,y,z,w)
-    // Normalize quaternion
-    vec4 rawRot = vec4(
-        rawData[base + 58], // w
-        rawData[base + 59], // x
-        rawData[base + 60], // y
-        rawData[base + 61]  // z
-    );
-    rawRot = normalize(rawRot);
-    vec4 rot = vec4(rawRot.y, rawRot.z, rawRot.w, rawRot.x); // xyzw
-
-    // Write compact rendering splat
-    renderSplats[idx].position = vec4(pos, 0.0);
-    renderSplats[idx].scale    = vec4(scale, 0.0);
-    renderSplats[idx].rotation = rot;
-    renderSplats[idx].color    = vec4(color, alpha);
-}
-)_";
-
-//=============================================================================
-// COMPUTE SHADERS FOR DEPTH SORTING
-//=============================================================================
-
-/**
- * Compute shader: Generate depth keys from compact rendering splats
- * Transforms view-space Z to a sortable unsigned integer key
- */
-static constexpr auto depth_key_shader = R"_(#version 450
-layout(local_size_x = 256) in;
-
-struct RenderSplat {
-    vec4 position;
-    vec4 scale;
-    vec4 rotation;
-    vec4 color;
-};
-
-layout(std430, binding = 0) readonly buffer SplatBuffer {
-    RenderSplat splats[];
-};
-
-layout(std430, binding = 1) writeonly buffer KeyBuffer {
-    uint keys[];
-};
-
-layout(std430, binding = 2) writeonly buffer IndexBuffer {
-    uint indices[];
-};
-
-layout(std140, binding = 3) uniform Params {
-    mat4 view;
-    uint splatCount;
-    float nearPlane;
-    float farPlane;
-    uint _pad;
-};
-
-void main() {
-    uint idx = gl_GlobalInvocationID.x;
-    if (idx >= splatCount) return;
-
-    // Transform to view space
-    vec4 viewPos = view * vec4(splats[idx].position.xyz, 1.0);
-    float depth = -viewPos.z; // Negate because view space Z is negative
-
-    // Front-to-back sort key: top 16 bits = depth, bottom 16 bits = splat index.
-    // The depth gives correct rendering order; the index provides stable
-    // tie-breaking for splats at similar depths (same buffer order every frame).
-    // This eliminates the "wave" artifact from coherent sort-order swaps.
-    // Combined with "under" blending for correct front-to-back compositing.
-    const uint keyMax = 0xFFFFFFFFu;
-    uint key;
-    if (depth <= nearPlane) {
-        // Behind camera: draw last, but keep stable index-based sub-order
-        key = (0xFFFFu << 16u) | (idx & 0xFFFFu);
-    } else {
-        float t = log2(depth / nearPlane) / log2(farPlane / nearPlane);
-        t = clamp(t, 0.0, 1.0);
-        uint depthKey = uint(t * 65535.0);
-        key = (depthKey << 16u) | (idx & 0xFFFFu);
-    }
-
-    keys[idx] = key;
-    indices[idx] = idx;
-}
-)_";
-
-/**
- * Compute shader: Histogram counting for radix sort
- * Counts occurrences of each digit value
- */
-static constexpr auto histogram_shader = R"_(#version 450
-layout(local_size_x = 256) in;
-
-layout(std430, binding = 0) readonly buffer KeyBuffer {
-    uint keys[];
-};
-
-layout(std430, binding = 1) buffer HistogramBuffer {
-    uint histogram[]; // 256 buckets * num_workgroups
-};
-
-layout(std140, binding = 2) uniform Params {
-    uint splatCount;
-    uint bitOffset;   // Which 8 bits to sort (0, 8, 16, 24)
-    uint numWorkgroups;
-    uint _pad;
-};
-
-shared uint localHistogram[256];
-
-void main() {
-    uint localId = gl_LocalInvocationID.x;
-    uint globalId = gl_GlobalInvocationID.x;
-    uint workgroupId = gl_WorkGroupID.x;
-
-    // Clear local histogram
-    localHistogram[localId] = 0;
-    barrier();
-
-    // Count digits in this workgroup
-    if (globalId < splatCount) {
-        uint key = keys[globalId];
-        uint digit = (key >> bitOffset) & 0xFFu;
-        atomicAdd(localHistogram[digit], 1);
-    }
-    barrier();
-
-    // Write local histogram to global memory
-    histogram[workgroupId * 256 + localId] = localHistogram[localId];
-}
-)_";
-
-/**
- * Compute shader: Prefix sum and scatter for radix sort
- * Computes exclusive prefix sum and scatters elements to sorted positions
- */
-static constexpr auto sort_scatter_shader = R"_(#version 450
-layout(local_size_x = 256) in;
-
-layout(std430, binding = 0) readonly buffer KeyBufferIn {
-    uint keysIn[];
-};
-
-layout(std430, binding = 1) readonly buffer IndexBufferIn {
-    uint indicesIn[];
-};
-
-layout(std430, binding = 2) writeonly buffer KeyBufferOut {
-    uint keysOut[];
-};
-
-layout(std430, binding = 3) writeonly buffer IndexBufferOut {
-    uint indicesOut[];
-};
-
-layout(std430, binding = 4) buffer HistogramBuffer {
-    uint histogram[]; // Global prefix sums
-};
-
-layout(std140, binding = 5) uniform Params {
-    uint splatCount;
-    uint bitOffset;
-    uint numWorkgroups;
-    uint _pad;
-};
-
-shared uint localDigits[256];
-shared uint localOffset[256];
-
-void main() {
-    uint localId = gl_LocalInvocationID.x;
-    uint globalId = gl_GlobalInvocationID.x;
-    uint workgroupId = gl_WorkGroupID.x;
-
-    // Load global prefix sum for this workgroup's digit
-    localOffset[localId] = histogram[workgroupId * 256 + localId];
-
-    // Load this thread's element
-    uint key = 0u;
-    uint idx = 0u;
-    uint digit = 256u; // invalid sentinel (> any real digit)
-    bool valid = globalId < splatCount;
-    if (valid) {
-        key = keysIn[globalId];
-        idx = indicesIn[globalId];
-        digit = (key >> bitOffset) & 0xFFu;
-    }
-    localDigits[localId] = digit;
-    barrier();
-
-    if (valid) {
-        // Stable rank: count threads with LOWER ID that share the same digit.
-        // This is deterministic (no atomicAdd race), so the sort is stable
-        // and identical across frames — eliminates flickering.
-        uint rank = 0u;
-        for (uint i = 0u; i < localId; i++) {
-            if (localDigits[i] == digit)
-                rank++;
-        }
-
-        uint globalPos = localOffset[digit] + rank;
-        if (globalPos < splatCount) {
-            keysOut[globalPos] = key;
-            indicesOut[globalPos] = idx;
-        }
-    }
-}
-)_";
-
-/**
- * Compute shader: Global prefix sum on histogram
- * Converts per-workgroup histograms to global exclusive prefix sums.
- *
- * Histogram layout: histogram[workgroup * 256 + digit]
- *
- * The output for each (workgroup, digit) pair must be the global position
- * where that workgroup should start placing elements with that digit.
- * This requires accounting for:
- *   1. All elements with smaller digits (across ALL workgroups)
- *   2. Same-digit elements from earlier workgroups
- *
- * Dispatch: (1, 1, 1) — single workgroup of 256 threads, one per digit.
- */
-static constexpr auto prefix_sum_shader = R"_(#version 450
-layout(local_size_x = 256) in;
-
-layout(std430, binding = 0) buffer HistogramBuffer {
-    uint histogram[]; // Layout: histogram[workgroup * 256 + digit]
-};
-
-layout(std140, binding = 1) uniform Params {
-    uint numWorkgroups;
-    uint _pad0;
-    uint _pad1;
-    uint _pad2;
-};
-
-shared uint digitTotal[256];
-shared uint digitPrefix[256];
-
-void main() {
-    uint digit = gl_LocalInvocationID.x; // 0-255, one thread per digit
-
-    // Step 1: Sum all workgroup counts for this digit
-    uint total = 0;
-    for (uint wg = 0; wg < numWorkgroups; wg++) {
-        total += histogram[wg * 256 + digit];
-    }
-    digitTotal[digit] = total;
-    barrier();
-
-    // Step 2: Thread 0 computes exclusive prefix sum across all digits
-    // This determines the global starting offset for each digit bucket
-    if (digit == 0) {
-        digitPrefix[0] = 0;
-        for (uint d = 1; d < 256; d++) {
-            digitPrefix[d] = digitPrefix[d-1] + digitTotal[d-1];
-        }
-    }
-    barrier();
-
-    // Step 3: Convert per-workgroup counts to global offsets
-    // For each workgroup: offset = digitPrefix[digit] + sum of same-digit counts in earlier workgroups
-    uint running = digitPrefix[digit];
-    for (uint wg = 0; wg < numWorkgroups; wg++) {
-        uint idx = wg * 256 + digit;
-        uint val = histogram[idx];
-        histogram[idx] = running;
-        running += val;
-    }
-}
-)_";
-
-//=============================================================================
-// RENDER SHADERS
-//=============================================================================
-
-static constexpr auto vertex_shader = R"_(#version 450
-
-// Quad vertex positions
-const vec2 positions[6] = vec2[6](
-    vec2(-1.0, -1.0),
-    vec2( 1.0, -1.0),
-    vec2( 1.0,  1.0),
-    vec2(-1.0, -1.0),
-    vec2( 1.0,  1.0),
-    vec2(-1.0,  1.0)
-);
-
-// Compact rendering splat (output of preprocess compute shader)
-struct RenderSplat {
-    vec4 position;  // xyz = position
-    vec4 scale;     // xyz = scale (already exp'd)
-    vec4 rotation;  // quaternion xyzw (already normalized)
-    vec4 color;     // RGBA (SH evaluated, sigmoid applied)
-};
-
-layout(std430, binding = 0) readonly buffer SplatBuffer {
-    RenderSplat splats[];
-};
-
-// Sorted indices from depth sort pass
-layout(std430, binding = 1) readonly buffer SortedIndices {
-    uint sortedIndices[];
-};
-
-layout(std140, binding = 2) uniform Uniforms {
-    mat4 view;
-    mat4 projection;
-    mat4 clipSpaceCorr;
-    vec2 viewport;
-    float _pad0;
-    uint useSorting; // 0 = no sorting, 1 = use sorted indices
-};
-
-layout(location = 0) out vec2 f_center;  // screen-space splat center (pixels)
-layout(location = 1) out vec4 f_color;
-layout(location = 2) out vec3 f_conic;
-
-mat3 quatToMat(vec4 q) {
-    float x = q.x, y = q.y, z = q.z, w = q.w;
-    // GLSL mat3 is column-major: mat3(col0, col1, col2)
-    return mat3(
-        1.0 - 2.0*(y*y + z*z), 2.0*(x*y + w*z), 2.0*(x*z - w*y),   // col 0
-        2.0*(x*y - w*z), 1.0 - 2.0*(x*x + z*z), 2.0*(y*z + w*x),   // col 1
-        2.0*(x*z + w*y), 2.0*(y*z - w*x), 1.0 - 2.0*(x*x + y*y)    // col 2
-    );
-}
-
-void main() {
-    // Get splat index (sorted or unsorted)
-    uint splatIdx = useSorting != 0 ? sortedIndices[gl_InstanceIndex] : gl_InstanceIndex;
-    RenderSplat splat = splats[splatIdx];
-    vec2 quadPos = positions[gl_VertexIndex];
-
-    // Early opacity cull: skip splats that are nearly invisible
-    if (splat.color.a < 1.0 / 255.0) {
-        gl_Position = vec4(0.0, 0.0, 2.0, 1.0);
-        return;
-    }
-
-    // View space position
-    vec4 viewPos = view * vec4(splat.position.xyz, 1.0);
-
-    // Focal lengths in pixels
-    float focal = projection[0][0] * viewport.x * 0.5;
-    float focal_y = projection[1][1] * viewport.y * 0.5;
-    float tanFovX = 0.5 * viewport.x / focal;
-    float tanFovY = 0.5 * viewport.y / focal_y;
-
-    // Frustum culling: project to clip space and check NDC bounds
-    // (matches INRIA reference: cull behind camera + outside 1.3x viewport)
-    vec4 clipPos = projection * viewPos;
-    if (clipPos.w <= 0.2) {
-        gl_Position = vec4(0.0, 0.0, 2.0, 1.0);
-        return;
-    }
-    vec3 ndc = clipPos.xyz / clipPos.w;
-    if (abs(ndc.x) > 1.3 || abs(ndc.y) > 1.3) {
-        gl_Position = vec4(0.0, 0.0, 2.0, 1.0);
-        return;
-    }
-
-    // Clamp view-space position to prevent numerical issues at screen edges
-    // (matches INRIA CUDA reference: 1.3x FOV tangent)
-    float limX = 1.3 * tanFovX;
-    float limY = 1.3 * tanFovY;
-    float txtz = viewPos.x / viewPos.z;
-    float tytz = viewPos.y / viewPos.z;
-    viewPos.x = clamp(txtz, -limX, limX) * viewPos.z;
-    viewPos.y = clamp(tytz, -limY, limY) * viewPos.z;
-
-    // Build 3D covariance from scale and rotation (already preprocessed)
-    // INRIA convention: Sigma = R * S * S^T * R^T = R * S² * R^T
-    // The principal axes are the COLUMNS of R.
-    vec3 scale = splat.scale.xyz;
-    mat3 R = quatToMat(splat.rotation);
-    mat3 S = mat3(scale.x, 0, 0, 0, scale.y, 0, 0, 0, scale.z);
-    mat3 M = R * S;
-    mat3 Sigma = M * transpose(M);
-
-    // 2D covariance via EWA projection
-    mat3 W = mat3(view);
-    float z2 = viewPos.z * viewPos.z;
-
-    // Jacobian of projection (column-major: mat3(col0, col1, col2))
-    mat3 J = mat3(
-        focal / viewPos.z, 0.0, 0.0,                                // col 0
-        0.0, focal_y / viewPos.z, 0.0,                              // col 1
-        -focal * viewPos.x / z2, -focal_y * viewPos.y / z2, 0.0     // col 2
-    );
-
-    mat3 T = J * W;
-    mat3 cov = T * Sigma * transpose(T);
-
-    float cov_xx = cov[0][0], cov_xy = cov[0][1], cov_yy = cov[1][1];
-
-    // Mip-Splatting 2D filter (Yu et al. 2024): approximate the pixel box filter
-    // as a Gaussian and convolve with the projected 2D covariance.
-    // Opacity is compensated to preserve each splat's total contribution:
-    //   alpha' = alpha * sqrt(det(Sigma) / det(Sigma + kernel_size * I))
-    float kernel_size = 0.3;
-    float det_0 = max(1e-6, cov_xx * cov_yy - cov_xy * cov_xy);
-    cov_xx += kernel_size;
-    cov_yy += kernel_size;
-    float det_1 = max(1e-6, cov_xx * cov_yy - cov_xy * cov_xy);
-    float mipCoef = sqrt(det_0 / det_1);
-
-    float det = cov_xx * cov_yy - cov_xy * cov_xy;
-    float mid = 0.5 * (cov_xx + cov_yy);
-    float disc = max(0.0, mid * mid - det);
-    float lambda1 = mid + sqrt(disc);
-    float lambda2 = mid - sqrt(disc);
-
-    // Eigenvectors of 2D covariance for ellipse-aligned quad
-    vec2 eigVec1;
-    if (abs(cov_xy) > 1e-6) {
-        eigVec1 = normalize(vec2(cov_xy, lambda1 - cov_xx));
-    } else {
-        eigVec1 = (cov_xx >= cov_yy) ? vec2(1.0, 0.0) : vec2(0.0, 1.0);
-    }
-    vec2 eigVec2 = vec2(-eigVec1.y, eigVec1.x);
-
-    float maxExtent = 2048.0;
-    float r1 = min(ceil(3.0 * sqrt(max(lambda1, 0.0))), maxExtent);
-    float r2 = min(ceil(3.0 * sqrt(max(lambda2, 0.0))), maxExtent);
-
-    // Cull degenerate or invisible splats
-    if (det < 1e-3 || max(r1, r2) < 0.1) {
-        gl_Position = vec4(0.0, 0.0, 2.0, 1.0);
-        return;
-    }
-
-    // Inverse covariance (conic) for fragment Gaussian evaluation.
-    // The cross-term sign must match the screen-space convention of gl_FragCoord:
-    //   Vulkan/Metal/D3D (clipSpaceCorr[1][1] < 0): both screen axes flip
-    //     relative to J-space, preserving the cross-product sign.
-    //   OpenGL (clipSpaceCorr[1][1] > 0): only X flips, requiring correction.
-    float inv_det = 1.0 / det;
-    float crossSign = sign(clipSpaceCorr[1][1]);
-    f_conic = vec3(cov_yy * inv_det, crossSign * cov_xy * inv_det, cov_xx * inv_det);
-
-    // Oriented quad: major axis along eigVec1, minor along eigVec2
-    vec2 pixelOffset = quadPos.x * r1 * eigVec1 + quadPos.y * r2 * eigVec2;
-    vec2 center = ndc.xy;
-    vec2 ndcOffset = pixelOffset * 2.0 / viewport;
-
-    gl_Position = clipSpaceCorr * vec4(center + ndcOffset, ndc.z, 1.0);
-
-    // Score's texture compositing pipeline flips Y when sampling for Vulkan/HLSL/Metal.
-    // To match this convention (same as ISF shaders), we undo clipSpaceCorr's Y-flip here
-    // so the compositing re-flip produces a correctly oriented final image.
-    gl_Position.y = -gl_Position.y;
-
-    // Screen-space center in pixels (matches gl_FragCoord coordinate system)
-    vec4 centerClip = clipSpaceCorr * vec4(ndc.xy, ndc.z, 1.0);
-    centerClip.y = -centerClip.y;
-    f_center = (centerClip.xy / centerClip.w * 0.5 + 0.5) * viewport;
-
-    // Fade out excessively large projected splats.
-    float alpha = splat.color.a * mipCoef;
-    float maxR = max(r1, r2);
-    float fadeRadius = 512.0;
-    if (maxR > fadeRadius) {
-        float fade = fadeRadius / maxR;
-        alpha *= fade;
-        if (alpha < 1.0 / 255.0) {
-            gl_Position = vec4(0.0, 0.0, 2.0, 1.0);
-            return;
-        }
-    }
-    f_color = vec4(splat.color.rgb, alpha);
-}
-)_";
-
-static constexpr auto fragment_shader = R"_(#version 450
-
-layout(location = 0) in vec2 f_center;  // screen-space splat center (pixels)
-layout(location = 1) in vec4 f_color;
-layout(location = 2) in vec3 f_conic;
-
-layout(location = 0) out vec4 fragColor;
-
-void main() {
-    // Pixel offset from splat center, computed per-fragment for precision.
-    // Unlike interpolated UVs, this is exact regardless of quad orientation.
-    vec2 d = gl_FragCoord.xy - f_center;
-
-    float power = -0.5 * (f_conic.x * d.x * d.x +
-                          2.0 * f_conic.y * d.x * d.y +
-                          f_conic.z * d.y * d.y);
-
-    if (power > 0.0) discard;
-
-    float gaussian = exp(power);
-    float alpha = min(0.99, gaussian * f_color.a);
-    if (alpha < 1.0/255.0) discard;
-
-    fragColor = vec4(f_color.rgb * alpha, alpha);
-}
-)_";
-
-} // namespace GaussianSplatShaders
-
-} // namespace score::gfx
diff --git a/src/plugins/score-plugin-threedim/Threedim/Splat/Metadata.hpp b/src/plugins/score-plugin-threedim/Threedim/Splat/Metadata.hpp
deleted file mode 100644
index 2433522d07..0000000000
--- a/src/plugins/score-plugin-threedim/Threedim/Splat/Metadata.hpp
+++ /dev/null
@@ -1,22 +0,0 @@
-#pragma once
-#include <Process/ProcessMetadata.hpp>
-
-namespace Gfx::Splat
-{
-class Model;
-}
-
-PROCESS_METADATA(
-    , Gfx::Splat::Model, "cdc15a16-e856-4e02-9339-7d9e48da10ce",
-    "Splat",                             // Internal name
-    "Splat",                             // Pretty name
-    Process::ProcessCategory::Visual,    // Category
-    "Visuals/Render",                    // Category
-    "Display gaussian splats",           // Description
-    "ossia team",                        // Author
-    (QStringList{"gfx", "model", "3d"}), // Tags
-    {},                                  // Inputs
-    {},                                  // Outputs
-    QUrl{},                              // Doc url
-    Process::ProcessFlags::SupportsAll | Process::ProcessFlags::ControlSurface // Flags
-)
diff --git a/src/plugins/score-plugin-threedim/Threedim/Splat/Process.cpp b/src/plugins/score-plugin-threedim/Threedim/Splat/Process.cpp
deleted file mode 100644
index 33d87fbeba..0000000000
--- a/src/plugins/score-plugin-threedim/Threedim/Splat/Process.cpp
+++ /dev/null
@@ -1,107 +0,0 @@
-#include "Process.hpp"
-
-#include <Process/Dataflow/Port.hpp>
-#include <Process/Dataflow/WidgetInlets.hpp>
-
-#include <Gfx/Graph/Node.hpp>
-#include <Gfx/TexturePort.hpp>
-
-#include <QFileInfo>
-#include <QImageReader>
-
-#include <wobjectimpl.h>
-
-W_OBJECT_IMPL(Gfx::Splat::Model)
-namespace Gfx::Splat
-{
-
-Model::Model(
-    const TimeVal& duration, const Id<Process::ProcessModel>& id, QObject* parent)
-    : Process::ProcessModel{duration, id, "gfxProcess", parent}
-{
-  metadata().setInstanceName(*this);
-
-  init();
-}
-
-Model::~Model() = default;
-
-void Model::init()
-{
-  if(m_inlets.empty() && m_outlets.empty())
-  {
-    m_outlets.push_back(new TextureOutlet{"Texture Out", Id<Process::Port>(0), this});
-    m_inlets.push_back(new TextureInlet{"Buffer In", Id<Process::Port>(0), this});
-
-    m_inlets.push_back(new Process::XYZSpinboxes{
-        ossia::vec3f{-10000., -10000., -10000.}, ossia::vec3f{10000., 10000., 10000.},
-        ossia::vec3f{0., 0., 0.}, false, "Position", Id<Process::Port>(1), this});
-    m_inlets.push_back(new Process::XYZSpinboxes{
-        ossia::vec3f{0., 0., 0.}, ossia::vec3f{359.9999999, 359.9999999, 359.9999999},
-        ossia::vec3f{}, false, "Rotation", Id<Process::Port>(2), this});
-    m_inlets.push_back(new Process::XYZSpinboxes{
-        ossia::vec3f{0.00001, 0.00001, 0.00001}, ossia::vec3f{1000., 1000., 1000.},
-        ossia::vec3f{1., 1., 1.}, false, "Scale", Id<Process::Port>(3), this});
-
-    m_inlets.push_back(new Process::XYZSpinboxes{
-        ossia::vec3f{-10000., -10000., -10000.}, ossia::vec3f{10000., 10000., 10000.},
-        ossia::vec3f{-1., -1., -1.}, false, "Camera position", Id<Process::Port>(4),
-        this});
-    m_inlets.push_back(new Process::XYZSpinboxes{
-        ossia::vec3f{-10000., -10000., -10000.}, ossia::vec3f{10000., 10000., 10000.},
-        ossia::vec3f{}, false, "Camera direction", Id<Process::Port>(5), this});
-
-    m_inlets.push_back(
-        new Process::FloatSlider{0.01, 359.999, 90., "FOV", Id<Process::Port>(6), this});
-    m_inlets.push_back(new Process::FloatSlider{
-        0.001, 1000., 0.001, "Near", Id<Process::Port>(7), this});
-    m_inlets.push_back(new Process::FloatSlider{
-        0.001, 10000., 100000., "Far", Id<Process::Port>(8), this});
-  }
-
-  std::vector<std::pair<QString, ossia::value>> projmodes{
-      {"Perspective", 0},
-      {"Fulldome (1-pass)", 1},
-  };
-
-  m_inlets.push_back(
-      new Process::ComboBox{projmodes, 0, "Camera", Id<Process::Port>(9), this});
-}
-
-QString Model::prettyName() const noexcept
-{
-  return tr("Model Display");
-}
-
-}
-template <>
-void DataStreamReader::read(const Gfx::Splat::Model& proc)
-{
-  readPorts(*this, proc.m_inlets, proc.m_outlets);
-
-  insertDelimiter();
-}
-
-template <>
-void DataStreamWriter::write(Gfx::Splat::Model& proc)
-{
-  writePorts(
-      *this, components.interfaces<Process::PortFactoryList>(), proc.m_inlets,
-      proc.m_outlets, &proc);
-
-  checkDelimiter();
-}
-
-template <>
-void JSONReader::read(const Gfx::Splat::Model& proc)
-{
-  readPorts(*this, proc.m_inlets, proc.m_outlets);
-}
-
-template <>
-void JSONWriter::write(Gfx::Splat::Model& proc)
-{
-  writePorts(
-      *this, components.interfaces<Process::PortFactoryList>(), proc.m_inlets,
-      proc.m_outlets, &proc);
-}
diff --git a/src/plugins/score-plugin-threedim/Threedim/TagAs.cpp b/src/plugins/score-plugin-threedim/Threedim/TagAs.cpp
new file mode 100644
index 0000000000..ad9bcd15c8
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/TagAs.cpp
@@ -0,0 +1,52 @@
+#include "TagAs.hpp"
+
+#include "PrimitiveCloud/FormatOverride.hpp"
+
+namespace Threedim
+{
+
+void TagAs::rebuild()
+{
+  const auto& in = inputs.scene_in.scene;
+  const ossia::scene_state* in_state = in.state.get();
+  const int64_t in_version = in_state ? in_state->version : -1;
+  const auto& cur_format = inputs.format_id.value;
+
+  m_cached_in_state = in_state;
+  m_cached_in_version = in_version;
+  m_cached_format_id = cur_format;
+
+  if(!in_state)
+  {
+    m_cached_out = in.state;
+    m_pending_dirty = 0xFF;
+    return;
+  }
+
+  // applyFormatOverride is the same helper AssetLoader uses, with the
+  // same passthrough-when-empty contract. Returns the input verbatim
+  // when format_id is empty so wiring stays cheap during edits.
+  m_cached_out = Threedim::PrimitiveCloud::applyFormatOverride(
+      in.state, cur_format);
+  m_pending_dirty = 0xFF;
+}
+
+void TagAs::operator()()
+{
+  // The upstream scene_state ptr / version can change without a
+  // port-update event (e.g. when a producer republishes the same
+  // shared_ptr after an internal mutation). Detect and rebuild.
+  const auto* in_state = inputs.scene_in.scene.state.get();
+  const int64_t in_version = in_state ? in_state->version : -1;
+  const bool upstream_changed
+      = m_cached_in_state != in_state
+        || m_cached_in_version != in_version;
+  if(!m_cached_out || upstream_changed)
+    rebuild();
+
+  outputs.scene_out.scene.state = m_cached_out;
+  outputs.scene_out.dirty = m_pending_dirty;
+  m_pending_dirty = 0;
+}
+
+} // namespace Threedim
diff --git a/src/plugins/score-plugin-threedim/Threedim/TagAs.hpp b/src/plugins/score-plugin-threedim/Threedim/TagAs.hpp
new file mode 100644
index 0000000000..483b3bb459
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/TagAs.hpp
@@ -0,0 +1,78 @@
+#pragma once
+#include <halp/controls.hpp>
+#include <halp/meta.hpp>
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+namespace Threedim
+{
+
+// Mid-pipeline format-id stamp. Walks every primitive_cloud_component
+// reachable from the upstream scene_state and shallow-clones it with
+// `format_id = inputs.format_id.value`. Heavy fields (raw_data,
+// extra_buffers, bounds) are shared via shared_ptr — no GPU upload
+// duplicates.
+//
+// Wiring:
+//   ThirdPartyProducer → TagAs(format_id="my-custom-format")
+//                       → ScenePreprocessor
+//                       → FlattenedSceneFilter(mode=12, match="my-custom-format")
+//                       → CustomDecode → CustomDraw → Window
+//
+// Use this when the upstream producer can't be modified (third-party
+// node, legacy plugin) but the cloud needs to flow through a
+// FlattenedSceneFilter in mode 12 (format_id == match_str). Empty
+// `format_id` is passthrough — no rewrite, original scene_state
+// forwarded as-is.
+class TagAs
+{
+public:
+  halp_meta(name, "Tag As Format")
+  halp_meta(category, "Visuals/3D/Scene")
+  halp_meta(c_name, "tag_as_format")
+  halp_meta(authors, "ossia team")
+  halp_meta(
+      manual_url,
+      "https://ossia.io/score-docs/processes/tag-as-format.html")
+  halp_meta(uuid, "8e3d7c2a-5f91-4b6c-a8e2-1d9f4c7b3e5a")
+
+  struct ins
+  {
+    struct
+    {
+      halp_meta(name, "Scene In");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_in;
+
+    struct : halp::lineedit<"Format ID", "">
+    { void update(TagAs& n) { n.rebuild(); } } format_id;
+  } inputs;
+
+  struct outs
+  {
+    struct
+    {
+      halp_meta(name, "Scene Out");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_out;
+  } outputs;
+
+  void rebuild();
+  void operator()();
+
+  // Cached output kept stable while inputs are unchanged — preserves
+  // ScenePreprocessor's fingerprint fast-path.
+  std::shared_ptr<const ossia::scene_state> m_cached_out;
+  uint8_t m_pending_dirty{0xFF};
+  const ossia::scene_state* m_cached_in_state{};
+  int64_t m_cached_in_version{-1};
+  std::string m_cached_format_id;
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/TangentUtils.hpp b/src/plugins/score-plugin-threedim/Threedim/TangentUtils.hpp
new file mode 100644
index 0000000000..e584ec3a35
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/TangentUtils.hpp
@@ -0,0 +1,139 @@
+#pragma once
+#include <mikktspace.h>
+
+#include <cmath>
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+namespace Threedim
+{
+
+// Generate glTF-compatible float4 tangents (xyz = unit tangent, w =
+// handedness ±1) using mikktspace from a mesh's position / normal /
+// texcoord_0 streams and an optional uint32 index buffer. Returns a
+// shared buffer of `vertex_count * 4` floats, or nullptr on failure
+// (missing streams, degenerate mesh, etc).
+//
+// For indexed meshes: mikktspace's contract is unindexed ("DO NOT use
+// an already existing index list"), but we're constrained to keep
+// indexed data. We call the mikktspace callbacks against the EXPANDED
+// (unindexed) triangle list via the index buffer, and write the
+// generated tangent back through the same index lookup. When two
+// triangles share a vertex with the same tangent (smooth surface),
+// successive writes produce the same value. At UV seams they disagree
+// and the last write wins — a known small artifact compared to
+// un-indexing the whole mesh. Vertex duplication on import is a
+// future enhancement tracked in docs/3d-pipeline-tasks.md.
+inline std::shared_ptr<std::vector<float>> generate_tangents_mikktspace(
+    const std::shared_ptr<std::vector<float>>& positions,
+    const std::shared_ptr<std::vector<float>>& normals,
+    const std::shared_ptr<std::vector<float>>& texcoords,
+    const std::shared_ptr<std::vector<uint32_t>>& indices,
+    uint32_t vertex_count)
+{
+  if(!positions || !normals || !texcoords || vertex_count == 0)
+    return {};
+  if(positions->size() < vertex_count * 3
+     || normals->size() < vertex_count * 3
+     || texcoords->size() < vertex_count * 2)
+    return {};
+
+  // Triangle count: indexed → indices/3, non-indexed → vertex_count/3.
+  const uint32_t num_faces
+      = indices ? uint32_t(indices->size() / 3)
+                : uint32_t(vertex_count / 3);
+  if(num_faces == 0)
+    return {};
+
+  auto tangents = std::make_shared<std::vector<float>>(vertex_count * 4, 0.f);
+
+  struct UserData
+  {
+    const float* positions;
+    const float* normals;
+    const float* texcoords;
+    const uint32_t* indices; // null when un-indexed
+    uint32_t num_faces;
+    std::vector<float>* tangents;
+  };
+  UserData ud{positions->data(),
+              normals->data(),
+              texcoords->data(),
+              indices ? indices->data() : nullptr,
+              num_faces,
+              tangents.get()};
+
+  auto vertexIndex
+      = [](const UserData& u, int iFace, int iVert) -> uint32_t {
+    const uint32_t flat = uint32_t(iFace) * 3u + uint32_t(iVert);
+    return u.indices ? u.indices[flat] : flat;
+  };
+
+  SMikkTSpaceInterface iface{};
+  iface.m_getNumFaces = [](const SMikkTSpaceContext* ctx) {
+    return int(static_cast<const UserData*>(ctx->m_pUserData)->num_faces);
+  };
+  iface.m_getNumVerticesOfFace = [](const SMikkTSpaceContext*, int) {
+    return 3;
+  };
+  iface.m_getPosition = [](const SMikkTSpaceContext* ctx, float out[],
+                           int iFace, int iVert) {
+    auto& u = *static_cast<const UserData*>(ctx->m_pUserData);
+    auto vi = uint32_t(iFace) * 3u + uint32_t(iVert);
+    auto v = u.indices ? u.indices[vi] : vi;
+    out[0] = u.positions[v * 3 + 0];
+    out[1] = u.positions[v * 3 + 1];
+    out[2] = u.positions[v * 3 + 2];
+  };
+  iface.m_getNormal = [](const SMikkTSpaceContext* ctx, float out[],
+                         int iFace, int iVert) {
+    auto& u = *static_cast<const UserData*>(ctx->m_pUserData);
+    auto vi = uint32_t(iFace) * 3u + uint32_t(iVert);
+    auto v = u.indices ? u.indices[vi] : vi;
+    out[0] = u.normals[v * 3 + 0];
+    out[1] = u.normals[v * 3 + 1];
+    out[2] = u.normals[v * 3 + 2];
+  };
+  iface.m_getTexCoord = [](const SMikkTSpaceContext* ctx, float out[],
+                           int iFace, int iVert) {
+    auto& u = *static_cast<const UserData*>(ctx->m_pUserData);
+    auto vi = uint32_t(iFace) * 3u + uint32_t(iVert);
+    auto v = u.indices ? u.indices[vi] : vi;
+    out[0] = u.texcoords[v * 2 + 0];
+    out[1] = u.texcoords[v * 2 + 1];
+  };
+  iface.m_setTSpaceBasic = [](const SMikkTSpaceContext* ctx,
+                              const float tangent[], float sign,
+                              int iFace, int iVert) {
+    auto& u = *static_cast<const UserData*>(ctx->m_pUserData);
+    auto vi = uint32_t(iFace) * 3u + uint32_t(iVert);
+    auto v = u.indices ? u.indices[vi] : vi;
+    auto& t = *u.tangents;
+    t[v * 4 + 0] = tangent[0];
+    t[v * 4 + 1] = tangent[1];
+    t[v * 4 + 2] = tangent[2];
+    t[v * 4 + 3] = sign;
+  };
+  (void)vertexIndex;
+
+  SMikkTSpaceContext ctx{&iface, &ud};
+  if(!genTangSpaceDefault(&ctx))
+    return {};
+
+  // Fallback for vertices never touched (rare; mostly for non-manifold
+  // meshes): orient any zero tangent along X so shader doesn't divide
+  // by zero when reconstructing the TBN.
+  for(uint32_t v = 0; v < vertex_count; ++v)
+  {
+    float* t = tangents->data() + v * 4;
+    const float len2 = t[0] * t[0] + t[1] * t[1] + t[2] * t[2];
+    if(len2 < 1e-10f)
+    {
+      t[0] = 1.f; t[1] = 0.f; t[2] = 0.f; t[3] = 1.f;
+    }
+  }
+  return tangents;
+}
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/TextToMesh.cpp b/src/plugins/score-plugin-threedim/Threedim/TextToMesh.cpp
new file mode 100644
index 0000000000..41f6674dac
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/TextToMesh.cpp
@@ -0,0 +1,454 @@
+#include "TextToMesh.hpp"
+
+#include <Gfx/Graph/RenderList.hpp>
+#include <Gfx/Graph/SceneGPUState.hpp>
+
+#include <QFont>
+#include <QPainterPath>
+#include <QPointF>
+#include <QPolygonF>
+#include <QQuaternion>
+#include <QRawFont>
+#include <QString>
+#include <QTransform>
+#include <QVector>
+
+#include <cmath>
+#include <cstring>
+#include <vector>
+
+namespace Threedim
+{
+
+namespace
+{
+
+// ─── Ear-clipping triangulator ────────────────────────────────────────
+//
+// Handles simple (non-self-intersecting, no holes) polygons in CCW
+// winding order. For each emitted triangle, the resulting indices
+// reference positions in the input polygon's order.
+//
+// Complexity: O(n²). Glyphs flatten to dozens of verts at most, so
+// acceptable. For large pts-per-glyph or paragraph text later, swap
+// for earcut.hpp.
+
+struct Vec2 { float x, y; };
+
+inline float triSign(Vec2 a, Vec2 b, Vec2 c) noexcept
+{
+  return (b.x - a.x) * (c.y - a.y) - (b.y - a.y) * (c.x - a.x);
+}
+
+inline bool pointInTri(Vec2 p, Vec2 a, Vec2 b, Vec2 c) noexcept
+{
+  const float d1 = triSign(p, a, b);
+  const float d2 = triSign(p, b, c);
+  const float d3 = triSign(p, c, a);
+  const bool neg = (d1 < 0.f) || (d2 < 0.f) || (d3 < 0.f);
+  const bool pos = (d1 > 0.f) || (d2 > 0.f) || (d3 > 0.f);
+  return !(neg && pos);
+}
+
+// Signed area × 2. Positive = CCW in a Y-up frame.
+float polyArea(const std::vector<Vec2>& p) noexcept
+{
+  float s = 0.f;
+  const std::size_t n = p.size();
+  for(std::size_t i = 0; i < n; ++i)
+  {
+    const auto& a = p[i];
+    const auto& b = p[(i + 1) % n];
+    s += a.x * b.y - b.x * a.y;
+  }
+  return s;
+}
+
+// Ear-clip `poly` into triangles; append indices (into `base_offset +
+// original polygon index`) to `out_indices`.
+void earClip(
+    const std::vector<Vec2>& poly, uint32_t base_offset,
+    std::vector<uint32_t>& out_indices)
+{
+  const std::size_t n0 = poly.size();
+  if(n0 < 3)
+    return;
+
+  // Make a working copy of the polygon with flipped winding if needed
+  // so the triangulator always sees CCW.
+  std::vector<int> idx(n0);
+  if(polyArea(poly) < 0.f)
+  {
+    for(std::size_t i = 0; i < n0; ++i)
+      idx[i] = int(n0 - 1 - i);
+  }
+  else
+  {
+    for(std::size_t i = 0; i < n0; ++i)
+      idx[i] = int(i);
+  }
+
+  int n = (int)idx.size();
+  int guard = n * 3; // bail to avoid infinite loop on degenerate input
+  while(n > 3 && guard-- > 0)
+  {
+    bool ear_found = false;
+    for(int i = 0; i < n; ++i)
+    {
+      const int ip = (i + n - 1) % n;
+      const int in_ = (i + 1) % n;
+      const Vec2 a = poly[idx[ip]];
+      const Vec2 b = poly[idx[i]];
+      const Vec2 c = poly[idx[in_]];
+      if(triSign(a, b, c) <= 0.f)
+        continue; // reflex or collinear — not an ear
+      bool blocked = false;
+      for(int j = 0; j < n; ++j)
+      {
+        if(j == ip || j == i || j == in_)
+          continue;
+        if(pointInTri(poly[idx[j]], a, b, c))
+        {
+          blocked = true;
+          break;
+        }
+      }
+      if(blocked)
+        continue;
+      out_indices.push_back(base_offset + uint32_t(idx[ip]));
+      out_indices.push_back(base_offset + uint32_t(idx[i]));
+      out_indices.push_back(base_offset + uint32_t(idx[in_]));
+      idx.erase(idx.begin() + i);
+      --n;
+      ear_found = true;
+      break;
+    }
+    if(!ear_found)
+      break; // give up on degenerate polygons
+  }
+  if(n == 3)
+  {
+    out_indices.push_back(base_offset + uint32_t(idx[0]));
+    out_indices.push_back(base_offset + uint32_t(idx[1]));
+    out_indices.push_back(base_offset + uint32_t(idx[2]));
+  }
+}
+
+// Convert a QPainterPath's filled polygons into (positions, indices),
+// appending to out_positions / out_indices. Positions are emitted as
+// (x, y_flipped, 0). `scale` maps Qt pixel coords to world units.
+void tessellatePath(
+    const QPainterPath& path, float scale, float x_origin,
+    std::vector<float>& out_positions, std::vector<uint32_t>& out_indices)
+{
+  // toFillPolygons flattens curves and returns one or more polygons
+  // representing the filled region. Holes would appear as separate
+  // polygons with opposite winding — in this v1 we treat every polygon
+  // as a solid fill.
+  const QList<QPolygonF> polys = path.toFillPolygons();
+  for(const auto& qpoly : polys)
+  {
+    if(qpoly.size() < 3)
+      continue;
+    std::vector<Vec2> poly;
+    poly.reserve(qpoly.size());
+    // Skip the closing duplicate vertex that Qt tends to append.
+    int count = qpoly.size();
+    if(count > 1 && qpoly[0] == qpoly[count - 1])
+      count--;
+    for(int i = 0; i < count; ++i)
+    {
+      const auto& p = qpoly[i];
+      // Y flip so the mesh uses a right-handed Y-up frame (Qt is Y-down).
+      poly.push_back({float(p.x() * scale + x_origin),
+                      float(-p.y() * scale)});
+    }
+    const uint32_t base = uint32_t(out_positions.size() / 3);
+    for(const auto& v : poly)
+    {
+      out_positions.push_back(v.x);
+      out_positions.push_back(v.y);
+      out_positions.push_back(0.f);
+    }
+    earClip(poly, base, out_indices);
+  }
+}
+
+} // namespace
+
+void TextToMesh::rebuild()
+{
+  const bool text_inputs_changed
+      = m_cached_text != inputs.text.value
+        || m_cached_family != inputs.font_family.value
+        || m_cached_size != inputs.font_size.value
+        || m_cached_bold != inputs.bold.value
+        || m_cached_italic != inputs.italic.value
+        || m_cached_height != inputs.height.value
+        || m_cached_center != inputs.center_x.value;
+
+  float scratch[16];
+  CachedTRS xformCache = m_cachedTRS;
+  computeTRSMatrix(inputs, scratch, xformCache);
+  m_cachedTRS = xformCache;
+
+  // Rebuild the mesh only when the text / font parameters changed.
+  // Pure TRS edits keep the same mesh_component and just bump the
+  // enclosing scene_state version.
+  if(text_inputs_changed || !m_cached_mesh)
+  {
+    m_cached_text = inputs.text.value;
+    m_cached_family = inputs.font_family.value;
+    m_cached_size = inputs.font_size.value;
+    m_cached_bold = inputs.bold.value;
+    m_cached_italic = inputs.italic.value;
+    m_cached_height = inputs.height.value;
+    m_cached_center = inputs.center_x.value;
+
+    // Build a QRawFont from the requested family. QRawFont::fromFont
+    // resolves aliases (e.g. "Sans" → the system default).
+    QFont qf(QString::fromStdString(inputs.font_family.value));
+    qf.setPixelSize(inputs.font_size.value);
+    qf.setBold(inputs.bold.value);
+    qf.setItalic(inputs.italic.value);
+    QRawFont rf = QRawFont::fromFont(qf);
+    if(!rf.isValid())
+    {
+      // Fallback: default system font at the requested size.
+      QFont def;
+      def.setPixelSize(inputs.font_size.value);
+      rf = QRawFont::fromFont(def);
+    }
+
+    const QString str = QString::fromStdString(inputs.text.value);
+    const QVector<quint32> glyphs = rf.glyphIndexesForString(str);
+    const QVector<QPointF> advances = rf.advancesForGlyphIndexes(glyphs);
+
+    // Pixel → world scale: QRawFont::pixelSize() is the nominal pixel
+    // size. Height control sets the target cap height; we approximate
+    // cap height as pixelSize × 0.7 (typical for Latin fonts).
+    const float cap_ratio = 0.7f;
+    const float pixel_to_world
+        = inputs.height.value
+          / (float(rf.pixelSize()) * cap_ratio + 1e-6f);
+
+    std::vector<float> positions;
+    std::vector<uint32_t> indices;
+    positions.reserve(glyphs.size() * 32 * 3);
+    indices.reserve(glyphs.size() * 32);
+
+    float cursor_x_px = 0.f;
+    for(int gi = 0; gi < glyphs.size(); ++gi)
+    {
+      QPainterPath gp = rf.pathForGlyph(glyphs[gi]);
+      if(!gp.isEmpty())
+        tessellatePath(
+            gp, pixel_to_world, cursor_x_px * pixel_to_world,
+            positions, indices);
+      if(gi < advances.size())
+        cursor_x_px += float(advances[gi].x());
+    }
+
+    // Optionally center the text on X — total advance is where we
+    // ended up at cursor_x_px; shift all vertices by -half.
+    if(inputs.center_x.value && !positions.empty())
+    {
+      const float half = cursor_x_px * pixel_to_world * 0.5f;
+      for(std::size_t v = 0; v < positions.size(); v += 3)
+        positions[v] -= half;
+    }
+
+    if(positions.empty() || indices.empty())
+    {
+      // Empty string or unrenderable font — keep m_wrapped_state valid
+      // (reset mesh) but clear its content so republish emits empty.
+      m_cached_mesh.reset();
+      if(!m_wrapped_state)
+        m_wrapped_state = std::make_shared<ossia::scene_state>();
+      m_wrapped_state->roots.reset();
+      m_wrapped_state->materials.reset();
+      m_wrapped_state->version = ++m_version_counter;
+      m_wrapped_state->dirty_index = m_version_counter;
+      m_pending_dirty = 0xFF;
+      return;
+    }
+
+    // Build position / normal / texcoord buffers.
+    const std::size_t vcount = positions.size() / 3;
+    auto pos_buf = std::make_shared<std::vector<float>>(std::move(positions));
+    auto nrm_buf = std::make_shared<std::vector<float>>(vcount * 3, 0.f);
+    for(std::size_t i = 0; i < vcount; ++i)
+      (*nrm_buf)[i * 3 + 2] = 1.f; // +Z normal
+    auto uv_buf = std::make_shared<std::vector<float>>(vcount * 2, 0.f);
+    auto idx_buf = std::make_shared<std::vector<uint32_t>>(std::move(indices));
+
+    auto make_res = [](std::shared_ptr<std::vector<float>> b,
+                       ossia::buffer_data::usage u) {
+      auto r = std::make_shared<ossia::buffer_resource>();
+      ossia::buffer_data bd;
+      bd.data = std::shared_ptr<const void>(b, b->data());
+      bd.byte_size = int64_t(b->size() * sizeof(float));
+      bd.usage_hint = u;
+      r->resource = std::move(bd);
+      r->dirty_index = 1;
+      return r;
+    };
+
+    ossia::mesh_primitive mp;
+    // Stable id keyed on the position-buffer pointer (changes when the
+    // text or font changes, stable while neither does). Required by
+    // the registry's mesh-slab allocator: a 0 id makes the slab
+    // uncacheable and the mesh disappears from rendering.
+    mp.stable_id = (uint64_t)((uintptr_t)pos_buf.get());
+    mp.topology = ossia::primitive_topology::triangles;
+    mp.vertex_count = uint32_t(vcount);
+    mp.index_count = uint32_t(idx_buf->size());
+    // Local-space AABB over the tessellated glyph positions. Enables GPU
+    // frustum / occlusion culling in downstream scene filters.
+    mp.bounds = ossia::compute_aabb_from_positions(pos_buf->data(), vcount);
+    // No material_component — consumer applies default factors.
+
+    uint32_t bi = 0;
+    auto push_attr = [&](std::shared_ptr<std::vector<float>> b,
+                         int floats_per_vertex,
+                         ossia::attribute_semantic sem,
+                         ossia::vertex_format fmt) {
+      mp.vertex_buffers.push_back(
+          make_res(b, ossia::buffer_data::usage::vertex_buffer));
+      ossia::vertex_attribute a;
+      a.semantic = sem;
+      a.format = fmt;
+      a.buffer_index = bi++;
+      a.byte_offset = 0;
+      a.byte_stride = uint32_t(floats_per_vertex) * sizeof(float);
+      a.rate = ossia::vertex_attribute::input_rate::per_vertex;
+      mp.attributes.push_back(a);
+    };
+    push_attr(pos_buf, 3, ossia::attribute_semantic::position, ossia::vertex_format::float3);
+    push_attr(nrm_buf, 3, ossia::attribute_semantic::normal, ossia::vertex_format::float3);
+    push_attr(uv_buf, 2, ossia::attribute_semantic::texcoord0, ossia::vertex_format::float2);
+
+    {
+      auto ib = std::make_shared<ossia::buffer_resource>();
+      ossia::buffer_data bd;
+      bd.data = std::shared_ptr<const void>(idx_buf, idx_buf->data());
+      bd.byte_size = int64_t(idx_buf->size() * sizeof(uint32_t));
+      bd.usage_hint = ossia::buffer_data::usage::index_buffer;
+      ib->resource = std::move(bd);
+      ib->dirty_index = 1;
+      mp.index_buffer = std::move(ib);
+      mp.index_type = ossia::index_format::uint32;
+    }
+
+    auto mc = std::make_shared<ossia::mesh_component>();
+    mc->primitives.push_back(std::move(mp));
+    mc->dirty_index = 1;
+    m_cached_mesh = std::move(mc);
+  }
+
+  // Build scene_node tree: root { scene_transform, mesh_component }.
+  ossia::scene_transform xform;
+  xform.translation[0] = inputs.position.value.x;
+  xform.translation[1] = inputs.position.value.y;
+  xform.translation[2] = inputs.position.value.z;
+  auto q = QQuaternion::fromEulerAngles(
+      inputs.rotation.value.x, inputs.rotation.value.y,
+      inputs.rotation.value.z);
+  xform.rotation[0] = q.x();
+  xform.rotation[1] = q.y();
+  xform.rotation[2] = q.z();
+  xform.rotation[3] = q.scalar();
+  xform.scale[0] = inputs.scale.value.x;
+  xform.scale[1] = inputs.scale.value.y;
+  xform.scale[2] = inputs.scale.value.z;
+  xform.raw_slot = m_xform_ref;
+
+  auto children = std::make_shared<std::vector<ossia::scene_payload>>();
+  children->push_back(xform);
+  children->push_back(ossia::mesh_component_ptr(m_cached_mesh));
+
+  auto node = std::make_shared<ossia::scene_node>();
+  node->name = "Text";
+  node->children = std::move(children);
+  node->dirty_index = ++m_version_counter;
+
+  auto roots = std::make_shared<std::vector<ossia::scene_node_ptr>>();
+  roots->push_back(std::move(node));
+
+  // One default material so downstream PBR has something to bind.
+  auto mat = std::make_shared<ossia::material_component>();
+  mat->base_color_factor[0] = 1.f;
+  mat->base_color_factor[1] = 1.f;
+  mat->base_color_factor[2] = 1.f;
+  mat->base_color_factor[3] = 1.f;
+  auto mats = std::make_shared<std::vector<ossia::material_component_ptr>>();
+  mats->push_back(std::move(mat));
+
+  if(!m_wrapped_state)
+    m_wrapped_state = std::make_shared<ossia::scene_state>();
+  m_wrapped_state->roots = std::move(roots);
+  m_wrapped_state->materials = std::move(mats);
+  m_wrapped_state->version = m_version_counter;
+  m_wrapped_state->dirty_index = m_version_counter;
+  m_pending_dirty = 0xFF;
+}
+
+void TextToMesh::operator()()
+{
+  if(!m_wrapped_state)
+    rebuild();
+  outputs.scene_out.scene.state = m_wrapped_state;
+  outputs.scene_out.dirty = m_pending_dirty;
+  m_pending_dirty = 0;
+}
+
+void TextToMesh::init(
+    score::gfx::RenderList& r, QRhiResourceUpdateBatch& res)
+{
+  if(!raw_transform_slot.valid())
+  {
+    raw_transform_slot = r.registry().allocate(
+        score::gfx::GpuResourceRegistry::Arena::RawTransform,
+        sizeof(score::gfx::RawLocalTransform));
+    m_xform_ref = r.registry().toOssiaRef(raw_transform_slot);
+  }
+  if(raw_transform_slot.valid())
+  {
+    score::gfx::RawLocalTransform seed{};
+    r.registry().updateSlot(res, raw_transform_slot, &seed, sizeof(seed));
+  }
+}
+
+void TextToMesh::update(
+    score::gfx::RenderList& r, QRhiResourceUpdateBatch& res, score::gfx::Edge*)
+{
+  if(!raw_transform_slot.valid())
+    return;
+
+  score::gfx::RawLocalTransform xform{};
+  xform.translation[0] = inputs.position.value.x;
+  xform.translation[1] = inputs.position.value.y;
+  xform.translation[2] = inputs.position.value.z;
+  QQuaternion q = QQuaternion::fromEulerAngles(
+      inputs.rotation.value.x, inputs.rotation.value.y,
+      inputs.rotation.value.z);
+  xform.rotation[0] = q.x();
+  xform.rotation[1] = q.y();
+  xform.rotation[2] = q.z();
+  xform.rotation[3] = q.scalar();
+  xform.scale[0] = inputs.scale.value.x;
+  xform.scale[1] = inputs.scale.value.y;
+  xform.scale[2] = inputs.scale.value.z;
+  r.registry().updateSlot(res, raw_transform_slot, &xform, sizeof(xform));
+}
+
+void TextToMesh::release(score::gfx::RenderList& r)
+{
+  if(raw_transform_slot.valid())
+    r.registry().free(raw_transform_slot);
+  m_xform_ref = {};
+  // Producer-state-drift Option A — see Light::release.
+  m_wrapped_state.reset();
+}
+
+} // namespace Threedim
diff --git a/src/plugins/score-plugin-threedim/Threedim/TextToMesh.hpp b/src/plugins/score-plugin-threedim/Threedim/TextToMesh.hpp
new file mode 100644
index 0000000000..d3f874c6c7
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/TextToMesh.hpp
@@ -0,0 +1,125 @@
+#pragma once
+#include "TransformHelper.hpp"
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <Gfx/Graph/GpuResourceRegistry.hpp>
+
+#include <Threedim/TinyObj.hpp>
+#include <halp/controls.hpp>
+#include <halp/meta.hpp>
+
+#include <cstdint>
+#include <memory>
+
+class QRhiResourceUpdateBatch;
+
+namespace score::gfx
+{
+class RenderList;
+struct Edge;
+}
+
+namespace Threedim
+{
+
+// Rasterize text into 3D geometry. Each glyph is converted to a
+// QPainterPath, flattened into polygons, and tessellated via simple
+// ear-clipping. Output is a scene_spec containing one scene_node with
+// one mesh_component whose vertices describe the text in the XY plane
+// (normal = +Z) around the origin.
+//
+// Limitations (v1):
+//   - Holes are NOT handled. Glyphs with interior holes ("O", "D", "o",
+//     "P" counter, etc.) render as solid shapes. Fix planned by adding
+//     earcut.hpp or hole-bridging to the tessellator.
+//   - Extrusion = 0 (flat). A later revision will extrude along -Z
+//     with properly-oriented side walls.
+//   - Tangents are synthesized as (1, 0, 0, 1) by ScenePreprocessor's
+//     fallback — no per-vertex tangent computed here.
+//
+// Designed for VJ / title-card use rather than typography; single-line
+// inputs only. For paragraph text, use TextToTexture on a quad.
+class TextToMesh
+{
+public:
+  halp_meta(name, "Text to Mesh")
+  halp_meta(category, "Visuals/3D/Text")
+  halp_meta(c_name, "text_to_mesh")
+  halp_meta(authors, "ossia team")
+  halp_meta(
+      manual_url,
+      "https://ossia.io/score-docs/processes/text-to-mesh.html")
+  halp_meta(uuid, "c8f2a4d5-6e9b-4d3a-b7f1-5c4e2d8a9f6b")
+
+  struct ins
+  {
+    // Port-driven rebuild: controls trigger TextToMesh::rebuild() via
+    // their update() callbacks; operator()() just republishes m_state.
+    struct : halp::lineedit<"Text", "Hello">
+    { void update(TextToMesh& n) { n.rebuild(); } } text;
+    struct : halp::lineedit<"Font family", "Sans">
+    { void update(TextToMesh& n) { n.rebuild(); } } font_family;
+    struct : halp::spinbox_i32<"Font size", halp::irange{4, 512, 72}>
+    { void update(TextToMesh& n) { n.rebuild(); } } font_size;
+    struct : halp::toggle<"Bold">
+    { void update(TextToMesh& n) { n.rebuild(); } } bold;
+    struct : halp::toggle<"Italic">
+    { void update(TextToMesh& n) { n.rebuild(); } } italic;
+    // World-space height of a capital 'H'. Glyph paths come out in
+    // pixel units from Qt; we scale them to this target so the mesh
+    // lives at a sensible world scale regardless of font_size.
+    struct : halp::hslider_f32<"Height", halp::range{0.01, 100., 1.}>
+    { void update(TextToMesh& n) { n.rebuild(); } } height;
+    // Centers the text around the origin on the X axis (vs. left-align
+    // at X=0). Useful for title cards.
+    struct : halp::toggle<"Center X">
+    { void update(TextToMesh& n) { n.rebuild(); } } center_x;
+
+    struct : PositionControl
+    { void update(TextToMesh& n) { n.rebuild(); } } position;
+    struct : RotationControl
+    { void update(TextToMesh& n) { n.rebuild(); } } rotation;
+    struct : ScaleControl
+    { void update(TextToMesh& n) { n.rebuild(); } } scale;
+  } inputs;
+
+  struct outs
+  {
+    struct
+    {
+      halp_meta(name, "Scene Out");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_out;
+  } outputs;
+
+  void rebuild();
+  void operator()();
+
+  void init(score::gfx::RenderList& r, QRhiResourceUpdateBatch& res);
+  void update(
+      score::gfx::RenderList& r, QRhiResourceUpdateBatch& res,
+      score::gfx::Edge* e);
+  void release(score::gfx::RenderList& r);
+
+  std::shared_ptr<ossia::scene_state> m_wrapped_state;
+  CachedTRS m_cachedTRS{};
+  // Mesh-rebuild cache — expensive tessellation only re-runs when text
+  // or font parameters actually change.
+  std::string m_cached_text;
+  std::string m_cached_family;
+  int m_cached_size{-1};
+  bool m_cached_bold{false};
+  bool m_cached_italic{false};
+  float m_cached_height{-1.f};
+  bool m_cached_center{false};
+  std::shared_ptr<ossia::mesh_component> m_cached_mesh;
+  int64_t m_version_counter{0};
+  uint8_t m_pending_dirty{0xFF};
+
+  score::gfx::GpuResourceRegistry::Slot raw_transform_slot;
+  ossia::gpu_slot_ref m_xform_ref{};
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/TextToTexture.hpp b/src/plugins/score-plugin-threedim/Threedim/TextToTexture.hpp
new file mode 100644
index 0000000000..4d396ef557
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/TextToTexture.hpp
@@ -0,0 +1,146 @@
+#pragma once
+#include <halp/controls.hpp>
+#include <halp/meta.hpp>
+#include <halp/texture.hpp>
+
+#include <QColor>
+#include <QFont>
+#include <QImage>
+#include <QPainter>
+
+#include <string>
+
+namespace Threedim
+{
+
+// Rasterize a text string into an RGBA texture via QPainter. Pipes into
+// any node that consumes halp::gpu_texture — most commonly
+// MaterialOverride (to show text on a mesh's base-color slot) or
+// Instancer / a billboard renderer (for text sprites).
+//
+// Re-renders only when a control (text / font / size / color / canvas
+// dimensions) changes — the update() hooks on each port fire recreate().
+class TextToTexture
+{
+public:
+  halp_meta(name, "Text to Texture")
+  halp_meta(category, "Visuals/3D/Text")
+  halp_meta(c_name, "text_to_texture")
+  halp_meta(authors, "ossia team")
+  halp_meta(
+      manual_url,
+      "https://ossia.io/score-docs/processes/text-to-texture.html")
+  halp_meta(uuid, "5d3a9b2f-7e6c-4a8d-b1f4-9c2e3d5a7b8f")
+
+  struct ins
+  {
+    struct : halp::lineedit<"Text", "Hello, world">
+    {
+      void update(TextToTexture& self) { self.recreate(); }
+    } text;
+    struct : halp::lineedit<"Font family", "Sans">
+    {
+      void update(TextToTexture& self) { self.recreate(); }
+    } font_family;
+    struct : halp::spinbox_i32<"Font size", halp::irange{4, 512, 64}>
+    {
+      void update(TextToTexture& self) { self.recreate(); }
+    } font_size;
+    struct : halp::toggle<"Bold">
+    {
+      void update(TextToTexture& self) { self.recreate(); }
+    } bold;
+    struct : halp::toggle<"Italic">
+    {
+      void update(TextToTexture& self) { self.recreate(); }
+    } italic;
+
+    struct : halp::spinbox_i32<"Canvas width", halp::irange{16, 4096, 1024}>
+    {
+      void update(TextToTexture& self) { self.recreate(); }
+    } canvas_w;
+    struct : halp::spinbox_i32<"Canvas height", halp::irange{16, 4096, 256}>
+    {
+      void update(TextToTexture& self) { self.recreate(); }
+    } canvas_h;
+
+    // Colors are vec4 (r, g, b, a) in [0, 1]. A transparent background
+    // is the useful default — drop on any mesh and you see only the
+    // glyphs.
+    struct : halp::hslider_f32<"Text R", halp::range{0., 1., 1.}> { void update(TextToTexture& s) { s.recreate(); } } fg_r;
+    struct : halp::hslider_f32<"Text G", halp::range{0., 1., 1.}> { void update(TextToTexture& s) { s.recreate(); } } fg_g;
+    struct : halp::hslider_f32<"Text B", halp::range{0., 1., 1.}> { void update(TextToTexture& s) { s.recreate(); } } fg_b;
+    struct : halp::hslider_f32<"Text A", halp::range{0., 1., 1.}> { void update(TextToTexture& s) { s.recreate(); } } fg_a;
+    struct : halp::hslider_f32<"BG R", halp::range{0., 1., 0.}> { void update(TextToTexture& s) { s.recreate(); } } bg_r;
+    struct : halp::hslider_f32<"BG G", halp::range{0., 1., 0.}> { void update(TextToTexture& s) { s.recreate(); } } bg_g;
+    struct : halp::hslider_f32<"BG B", halp::range{0., 1., 0.}> { void update(TextToTexture& s) { s.recreate(); } } bg_b;
+    struct : halp::hslider_f32<"BG A", halp::range{0., 1., 0.}> { void update(TextToTexture& s) { s.recreate(); } } bg_a;
+
+    // Text alignment inside the canvas: 0=left, 1=center, 2=right for h;
+    // 0=top, 1=center, 2=bottom for v.
+    struct : halp::spinbox_i32<"H align", halp::irange{0, 2, 1}>
+    { void update(TextToTexture& s) { s.recreate(); } } h_align;
+    struct : halp::spinbox_i32<"V align", halp::irange{0, 2, 1}>
+    { void update(TextToTexture& s) { s.recreate(); } } v_align;
+  } inputs;
+
+  struct
+  {
+    halp::texture_output<"Output", halp::rgba_texture> main;
+  } outputs;
+
+  void recreate()
+  {
+    const int w = inputs.canvas_w.value;
+    const int h = inputs.canvas_h.value;
+    if(w <= 0 || h <= 0)
+      return;
+
+    // Qt renders with premultiplied alpha; we output straight RGBA8.
+    // QImage::Format_RGBA8888 is non-premultiplied and matches what
+    // gpu_texture expects when upload-bound as RGBA8.
+    QImage img(w, h, QImage::Format_RGBA8888);
+    img.fill(QColor::fromRgbF(
+        inputs.bg_r.value, inputs.bg_g.value, inputs.bg_b.value,
+        inputs.bg_a.value));
+
+    QPainter p(&img);
+    p.setRenderHint(QPainter::Antialiasing, true);
+    p.setRenderHint(QPainter::TextAntialiasing, true);
+
+    QFont f(QString::fromStdString(inputs.font_family.value));
+    f.setPixelSize(inputs.font_size.value);
+    f.setBold(inputs.bold.value);
+    f.setItalic(inputs.italic.value);
+    p.setFont(f);
+    p.setPen(QColor::fromRgbF(
+        inputs.fg_r.value, inputs.fg_g.value, inputs.fg_b.value,
+        inputs.fg_a.value));
+
+    int flags = 0;
+    switch(inputs.h_align.value)
+    {
+      case 0: flags |= Qt::AlignLeft; break;
+      case 2: flags |= Qt::AlignRight; break;
+      default: flags |= Qt::AlignHCenter;
+    }
+    switch(inputs.v_align.value)
+    {
+      case 0: flags |= Qt::AlignTop; break;
+      case 2: flags |= Qt::AlignBottom; break;
+      default: flags |= Qt::AlignVCenter;
+    }
+    flags |= Qt::TextWordWrap;
+
+    p.drawText(
+        QRect(0, 0, w, h), flags,
+        QString::fromStdString(inputs.text.value));
+    p.end();
+
+    outputs.main.create(w, h);
+    std::memcpy(outputs.main.texture.bytes, img.constBits(), std::size_t(w) * h * 4);
+    outputs.main.upload();
+  }
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/TextureInfo.hpp b/src/plugins/score-plugin-threedim/Threedim/TextureInfo.hpp
new file mode 100644
index 0000000000..3a72043b03
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/TextureInfo.hpp
@@ -0,0 +1,96 @@
+#pragma once
+#include <fmt/format.h>
+#include <halp/controls.hpp>
+#include <halp/meta.hpp>
+#include <halp/texture.hpp>
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+
+namespace Threedim
+{
+// Tiny inspector node: takes a halp::gpu_texture_input -- a zero-copy
+// reference to the upstream's GPU texture -- and exposes its metadata
+// (width, height, format, native handle) on regular value-output ports
+// plus a single human-readable summary string.
+//
+// Wiring: when an Image-typed edge is connected to our Texture port,
+// score's CpuAnalysisNode (the GfxRenderer specialization for nodes
+// with no texture/buffer/geometry outputs) allocates a render target
+// at init time via texture_inputs_storage::init(), points the upstream
+// at it through renderTargetForInput(), and -- thanks to the
+// gpu_texture_port branch in that storage -- writes the resulting
+// QRhiTexture pointer plus its pixel size into our gpu_texture struct
+// (handle / width / height). The format enum is mapped from the
+// negotiated QRhiTexture::Format via gpp::qrhi::toTextureFormat. None of
+// the per-frame readback machinery used for halp::texture_input fires
+// for us, so this is essentially free.
+class TextureInfo
+{
+public:
+  halp_meta(name, "Texture Info")
+  halp_meta(category, "Visuals/Utilities")
+  halp_meta(c_name, "texture_info")
+  halp_meta(manual_url, "https://ossia.io/score-docs/processes/texture-info.html")
+  halp_meta(uuid, "5bd9c8e2-7f1a-4e3b-9c0d-2a4b6f8e1d72")
+
+  struct
+  {
+    halp::gpu_texture_input<"Texture"> texture;
+  } inputs;
+
+  struct
+  {
+    halp::val_port<"Width", int> width;
+    halp::val_port<"Height", int> height;
+    halp::val_port<"Format", std::string> format;
+    // Raw native handle as an opaque integer (a QRhiTexture* on every
+    // backend score supports today). Useful only for visual identity
+    // ("did the upstream rebuild this texture?").
+    halp::val_port<"Handle", int64_t> handle;
+    halp::val_port<"Readable", std::string> readable;
+  } outputs;
+
+  static std::string_view format_name(halp::gpu_texture::format_t f) noexcept
+  {
+    using F = halp::gpu_texture;
+    switch(f)
+    {
+      case F::RGBA8:
+        return "RGBA8";
+      case F::RGBA16F:
+        return "RGBA16F";
+      case F::RGBA32F:
+        return "RGBA32F";
+      case F::R8:
+        return "R8";
+      case F::R16:
+        return "R16";
+      case F::R16F:
+        return "R16F";
+      case F::R32F:
+        return "R32F";
+      default:
+        return "unknown";
+    }
+  }
+
+  void operator()()
+  {
+    const auto& t = inputs.texture.texture;
+    const auto fmt_name = format_name(t.format);
+
+    outputs.width.value = t.width;
+    outputs.height.value = t.height;
+    outputs.format.value = std::string{fmt_name};
+    outputs.handle.value = reinterpret_cast<std::int64_t>(t.handle);
+
+    auto& ret = outputs.readable.value;
+    ret.clear();
+    fmt::format_to(
+        std::back_inserter(ret), "{}x{} {} (handle=0x{:x})", t.width, t.height,
+        fmt_name, reinterpret_cast<std::uintptr_t>(t.handle));
+  }
+};
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/TinyObj.hpp b/src/plugins/score-plugin-threedim/Threedim/TinyObj.hpp
index 69fc71eeed..86810fd861 100644
--- a/src/plugins/score-plugin-threedim/Threedim/TinyObj.hpp
+++ b/src/plugins/score-plugin-threedim/Threedim/TinyObj.hpp
@@ -59,16 +59,38 @@ static void toGL(auto& from, float (&to)[N])
 inline void rebuild_transform(auto& inputs, auto& outputs)
 {
   QMatrix4x4 model{};
-  auto& pos = inputs.position;
-  auto& rot = inputs.rotation;
-  auto& sc = inputs.scale;
 
-  model.translate(pos.value.x, pos.value.y, pos.value.z);
-  model.rotate(QQuaternion::fromEulerAngles(rot.value.x, rot.value.y, rot.value.z));
-  model.scale(sc.value.x, sc.value.y, sc.value.z);
+  if constexpr(requires { inputs.position; })
+  {
+    auto& pos = inputs.position;
+    model.translate(pos.value.x, pos.value.y, pos.value.z);
+  }
+
+  if constexpr(requires { inputs.rotation; })
+  {
+    auto& rot = inputs.rotation;
+    model.rotate(QQuaternion::fromEulerAngles(rot.value.x, rot.value.y, rot.value.z));
+  }
 
-  toGL(model, outputs.geometry.transform);
-  outputs.geometry.dirty_transform = true;
+  if constexpr(requires { inputs.scale; })
+  {
+    auto& sc = inputs.scale;
+    model.scale(sc.value.x, sc.value.y, sc.value.z);
+  }
+
+  // Legacy path: writes into the halp::mesh-style `geometry` output.
+  // Scene-only loaders (GltfParser/FbxParser after the legacy outlet was
+  // removed) don't have `outputs.geometry`; we leave the Position/Rotation/
+  // Scale controls as a no-op for now. They'll be re-wired to a scene-level
+  // root transform when we add that feature to scene_spec.
+  if constexpr(requires {
+                 outputs.geometry.transform;
+                 outputs.geometry.dirty_transform;
+               })
+  {
+    toGL(model, outputs.geometry.transform);
+    outputs.geometry.dirty_transform = true;
+  }
 }
 struct PositionControl : halp::xyz_spinboxes_f32<"Position", halp::free_range_min<>>
 {
diff --git a/src/plugins/score-plugin-threedim/Threedim/Transform3D.cpp b/src/plugins/score-plugin-threedim/Threedim/Transform3D.cpp
new file mode 100644
index 0000000000..ea517a0b9a
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/Transform3D.cpp
@@ -0,0 +1,113 @@
+#include "Transform3D.hpp"
+
+#include <Gfx/Graph/RenderList.hpp>
+#include <Gfx/Graph/SceneGPUState.hpp>
+
+#include <QQuaternion>
+
+namespace Threedim
+{
+
+void Transform3D::operator()()
+{
+  const auto& in = inputs.scene_in.scene;
+  const auto* in_state = in.state.get();
+
+  if(!in_state || in_state->empty())
+  {
+    outputs.scene_out.scene = {};
+    outputs.scene_out.dirty = 0;
+    m_state.reset();
+    m_cached_in_state = nullptr;
+    m_cached_in_version = -1;
+    m_cachedTRS.valid = false;
+    return;
+  }
+
+  // Cache check: republish the prior wrapped state when neither upstream
+  // (state pointer / version) nor TRS controls changed. Stops downstream
+  // identity-keyed caches from rebuilding every frame on a stable input —
+  // see diagnostic 027.
+  const int64_t in_version = in_state->version;
+  const bool upstream_changed
+      = (m_cached_in_state != in_state) || (m_cached_in_version != in_version);
+  const bool trs_changed = transformChanged(inputs, m_cachedTRS);
+
+  if(m_state && !upstream_changed && !trs_changed)
+  {
+    outputs.scene_out.scene.state = m_state;
+    outputs.scene_out.dirty = 0;
+    return;
+  }
+
+  // Rebuild via the canonical helper: it now propagates skeletons and
+  // collections too (diagnostic 026), updates m_cachedTRS in place, and
+  // bumps m_version_counter so downstream version-keyed caches see a
+  // monotonic bump exactly when something actually changed.
+  m_state = wrapSceneWithTransform(
+      in.state, inputs, m_cachedTRS, m_version_counter, m_xform_ref);
+  m_cached_in_state = in_state;
+  m_cached_in_version = in_version;
+
+  outputs.scene_out.scene.state = m_state;
+  outputs.scene_out.dirty = 0xFF;
+}
+
+// Order invariant: called by GfxRenderer::initState BEFORE the first
+// operator()() and BEFORE processControlIn fires any rebuild() callback.
+// m_xform_ref populated here is therefore safe to read in rebuild()
+// without a guard. Adding prepare() to this node breaks the invariant —
+// see CpuFilterNode.hpp for details.
+void Transform3D::init(
+    score::gfx::RenderList& r, QRhiResourceUpdateBatch& res)
+{
+  if(!xform_slot.valid())
+  {
+    xform_slot = r.registry().allocate(
+        score::gfx::GpuResourceRegistry::Arena::RawTransform,
+        sizeof(score::gfx::RawLocalTransform));
+    m_xform_ref = r.registry().toOssiaRef(xform_slot);
+  }
+  if(xform_slot.valid())
+  {
+    score::gfx::RawLocalTransform seed{};
+    r.registry().updateSlot(res, xform_slot, &seed, sizeof(seed));
+  }
+}
+
+void Transform3D::update(
+    score::gfx::RenderList& r, QRhiResourceUpdateBatch& res, score::gfx::Edge*)
+{
+  if(!xform_slot.valid())
+    return;
+
+  score::gfx::RawLocalTransform raw{};
+  raw.translation[0] = inputs.position.value.x;
+  raw.translation[1] = inputs.position.value.y;
+  raw.translation[2] = inputs.position.value.z;
+  auto q = QQuaternion::fromEulerAngles(
+      inputs.rotation.value.x, inputs.rotation.value.y,
+      inputs.rotation.value.z);
+  raw.rotation[0] = q.x();
+  raw.rotation[1] = q.y();
+  raw.rotation[2] = q.z();
+  raw.rotation[3] = q.scalar();
+  raw.scale[0] = inputs.scale.value.x;
+  raw.scale[1] = inputs.scale.value.y;
+  raw.scale[2] = inputs.scale.value.z;
+  r.registry().updateSlot(res, xform_slot, &raw, sizeof(raw));
+}
+
+void Transform3D::release(score::gfx::RenderList& r)
+{
+  if(xform_slot.valid())
+    r.registry().free(xform_slot);
+  m_xform_ref = {};
+  // Clear cached scene_state so the next operator()() rebuilds against
+  // the post-release registry. Producer-state-drift Option A — see
+  // matching comment in Light::release.
+  m_state.reset();
+  m_cached_in_state = nullptr;
+}
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/Transform3D.hpp b/src/plugins/score-plugin-threedim/Threedim/Transform3D.hpp
new file mode 100644
index 0000000000..ac81943976
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/Transform3D.hpp
@@ -0,0 +1,100 @@
+#pragma once
+#include "TransformHelper.hpp"
+
+#include <halp/controls.hpp>
+#include <halp/meta.hpp>
+
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <Gfx/Graph/GpuResourceRegistry.hpp>
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+class QRhiResourceUpdateBatch;
+
+namespace score::gfx
+{
+class RenderList;
+struct Edge;
+}
+
+namespace Threedim
+{
+
+// Scene-in → scene-out transform: wraps the incoming scene's roots under a
+// single parent node carrying a `scene_transform` payload (TRS). Materials,
+// animations and cameras pass through by shared_ptr identity so downstream
+// identity-based caches stay hot.
+class Transform3D
+{
+public:
+  halp_meta(name, "Transform 3D")
+  halp_meta(c_name, "transform3d_avnd")
+  halp_meta(category, "Visuals/3D/Scene")
+  halp_meta(authors, "ossia team")
+  halp_meta(uuid, "7a9f2b41-4d58-4e93-b7c2-0f5d3e8a6b1c")
+
+  struct ins
+  {
+    struct
+    {
+      halp_meta(name, "Scene In");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_in;
+
+    halp::xyz_spinboxes_f32<
+        "Position", halp::range{-10000., 10000., 0.}>
+        position;
+    halp::xyz_spinboxes_f32<"Rotation", halp::range{0., 359.9999999, 0.}>
+        rotation;
+    halp::xyz_spinboxes_f32<
+        "Scale", halp::range{0.00001, 1000., 1.}>
+        scale;
+  } inputs;
+
+  struct outs
+  {
+    struct
+    {
+      halp_meta(name, "Scene Out");
+      ossia::scene_spec scene;
+      uint8_t dirty{0};
+    } scene_out;
+  } outputs;
+
+  void operator()();
+
+  // Render-thread hooks. init claims one RawTransform slot for the
+  // emitted scene_transform; update packs the current control TRS
+  // into a RawLocalTransform and uploads; release returns the slot.
+  // The preprocessor composes the world-space matrix for this slot
+  // from the scene-node parent chain CPU-side.
+  void init(score::gfx::RenderList& r, QRhiResourceUpdateBatch& res);
+  void update(
+      score::gfx::RenderList& r, QRhiResourceUpdateBatch& res,
+      score::gfx::Edge* e);
+  void release(score::gfx::RenderList& r);
+
+  score::gfx::GpuResourceRegistry::Slot xform_slot;
+
+  // Ossia-facing snapshot of xform_slot. Written once in init(),
+  // copied onto the emitted scene_transform's raw_slot every
+  // operator()() tick.
+  ossia::gpu_slot_ref m_xform_ref{};
+
+  // Cache: republish the same emitted scene_state when neither upstream
+  // (input scene_state pointer / version) nor controls (TRS) changed.
+  // Prevents downstream SceneSelector / SceneGraphFilter / SceneDuplicator /
+  // CreateCollection from rebuilding every frame, which they did when we
+  // emitted a fresh shared_ptr each tick — diagnostic 027.
+  std::shared_ptr<const ossia::scene_state> m_state;
+  const ossia::scene_state* m_cached_in_state{};
+  int64_t m_cached_in_version{-1};
+  CachedTRS m_cachedTRS{};
+  int64_t m_version_counter{0};
+};
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/TransformHelper.hpp b/src/plugins/score-plugin-threedim/Threedim/TransformHelper.hpp
new file mode 100644
index 0000000000..11babfa4a6
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/TransformHelper.hpp
@@ -0,0 +1,176 @@
+#pragma once
+#include <ossia/dataflow/geometry_port.hpp>
+
+#include <QMatrix4x4>
+#include <QQuaternion>
+
+#include <cstring>
+#include <memory>
+#include <vector>
+
+namespace Threedim
+{
+
+// Shared TRS-matrix computation for halp nodes that output a
+// `halp::mesh`-style geometry with a `transform[16]` slot plus a
+// `dirty_transform` flag (BuffersToGeometry, BuffersToGeometry2,
+// VoxelLoader, ...). Call from operator() every frame: computes a
+// column-major 4x4 TRS matrix from the XYZ controls, writes it into
+// `out_transform16`, and returns true iff the matrix changed since
+// the last call (so the caller can set `dirty_transform` accordingly).
+//
+// Cached prev values live on the caller via the CachedTRS struct —
+// identical layout across the three call sites so each node just
+// declares one member `CachedTRS m_cachedTRS{}` and passes it in.
+struct CachedTRS
+{
+  float pos[3]{0, 0, 0};
+  float rot[3]{0, 0, 0};
+  float scale[3]{1, 1, 1};
+  bool valid{false};
+};
+
+// `Inputs` is duck-typed: must expose `.position.value.{x,y,z}`, etc.
+template <typename Inputs>
+inline bool
+computeTRSMatrix(const Inputs& inputs, float out_transform16[16], CachedTRS& cache)
+{
+  const float px = inputs.position.value.x;
+  const float py = inputs.position.value.y;
+  const float pz = inputs.position.value.z;
+  const float rx = inputs.rotation.value.x;
+  const float ry = inputs.rotation.value.y;
+  const float rz = inputs.rotation.value.z;
+  const float sx = inputs.scale.value.x;
+  const float sy = inputs.scale.value.y;
+  const float sz = inputs.scale.value.z;
+
+  const bool changed
+      = !cache.valid
+        || cache.pos[0] != px || cache.pos[1] != py || cache.pos[2] != pz
+        || cache.rot[0] != rx || cache.rot[1] != ry || cache.rot[2] != rz
+        || cache.scale[0] != sx || cache.scale[1] != sy || cache.scale[2] != sz;
+
+  if(!changed)
+    return false;
+
+  // Build column-major 4x4: translate * rotate * scale, matching the
+  // convention used across the 3D plugin (QMatrix4x4's constData()
+  // returns column-major).
+  QMatrix4x4 m;
+  m.translate(px, py, pz);
+  m.rotate(QQuaternion::fromEulerAngles(rx, ry, rz));
+  m.scale(sx, sy, sz);
+  std::memcpy(out_transform16, m.constData(), sizeof(float) * 16);
+
+  cache.pos[0] = px; cache.pos[1] = py; cache.pos[2] = pz;
+  cache.rot[0] = rx; cache.rot[1] = ry; cache.rot[2] = rz;
+  cache.scale[0] = sx; cache.scale[1] = sy; cache.scale[2] = sz;
+  cache.valid = true;
+  return true;
+}
+
+// Wrap a raw scene_state under a single parent scene_node whose first child
+// is a scene_transform carrying this node's position / rotation / scale
+// controls. FlattenVisitor processes payloads in order and transforms apply
+// to subsequent siblings, so the wrap applies the TRS to every descendant.
+//
+// Used by asset-loader-style nodes (FbxParser, GltfParser, AssetLoader) to
+// compose the control-knob transform on top of the as-loaded scene without
+// touching the raw state (kept stable so downstream identity caches stay
+// warm). Shared to avoid re-duplicating the same 40 lines per loader.
+//
+// `Inputs` is duck-typed: must expose `.position.value.{x,y,z}`,
+// `.rotation.value.{x,y,z}`, `.scale.value.{x,y,z}`.
+template <typename Inputs>
+inline std::shared_ptr<const ossia::scene_state> wrapSceneWithTransform(
+    const std::shared_ptr<const ossia::scene_state>& raw,
+    const Inputs& inputs, CachedTRS& cache, int64_t& version_counter,
+    const ossia::gpu_slot_ref& xform_ref = {})
+{
+  if(!raw)
+    return nullptr;
+
+  // Skip rebuild when nothing changed: cache check also updates the cache
+  // on a real change. We rebuild when there IS no wrapped output yet (first
+  // call) OR when inputs differ from the cache; compute cache-hit separately.
+  ossia::scene_transform xform;
+  xform.translation[0] = inputs.position.value.x;
+  xform.translation[1] = inputs.position.value.y;
+  xform.translation[2] = inputs.position.value.z;
+  auto q = QQuaternion::fromEulerAngles(
+      inputs.rotation.value.x, inputs.rotation.value.y,
+      inputs.rotation.value.z);
+  xform.rotation[0] = q.x();
+  xform.rotation[1] = q.y();
+  xform.rotation[2] = q.z();
+  xform.rotation[3] = q.scalar();
+  xform.scale[0] = inputs.scale.value.x;
+  xform.scale[1] = inputs.scale.value.y;
+  xform.scale[2] = inputs.scale.value.z;
+  // Stamp the producer's RawTransform slot ref (if any) so the
+  // preprocessor composes a world matrix at the matching offset.
+  xform.raw_slot = xform_ref;
+
+  auto children = std::make_shared<std::vector<ossia::scene_payload>>();
+  children->push_back(xform);
+  if(raw->roots)
+    for(const auto& root : *raw->roots)
+      children->push_back(root);
+
+  auto parent = std::make_shared<ossia::scene_node>();
+  parent->children = std::move(children);
+
+  auto new_roots = std::make_shared<std::vector<ossia::scene_node_ptr>>();
+  new_roots->push_back(std::move(parent));
+
+  auto wrapped = std::make_shared<ossia::scene_state>();
+  wrapped->roots       = std::move(new_roots);
+  // Identity-preserving passthrough of every scene_state shared field so
+  // downstream caches stay warm. `collections` was missed in the initial
+  // landing (CreateCollection writes them onto scene_state::collections,
+  // and dropping them here silently loses the named-collection list on
+  // every TRS pass) — diagnostic 026.
+  wrapped->materials        = raw->materials;
+  wrapped->animations       = raw->animations;
+  wrapped->cameras          = raw->cameras;
+  wrapped->skeletons        = raw->skeletons;
+  wrapped->collections      = raw->collections;
+  wrapped->environment      = raw->environment;
+  wrapped->active_camera_id = raw->active_camera_id;
+  wrapped->version          = ++version_counter;
+  wrapped->dirty_index      = 1;
+
+  cache.pos[0]   = inputs.position.value.x;
+  cache.pos[1]   = inputs.position.value.y;
+  cache.pos[2]   = inputs.position.value.z;
+  cache.rot[0]   = inputs.rotation.value.x;
+  cache.rot[1]   = inputs.rotation.value.y;
+  cache.rot[2]   = inputs.rotation.value.z;
+  cache.scale[0] = inputs.scale.value.x;
+  cache.scale[1] = inputs.scale.value.y;
+  cache.scale[2] = inputs.scale.value.z;
+  cache.valid    = true;
+
+  return wrapped;
+}
+
+// Test whether the controls differ from a prior cached snapshot, without
+// applying them. Use this to gate a wrapSceneWithTransform() rebuild when
+// you want to only allocate a new wrapped state when the user moved a knob.
+template <typename Inputs>
+inline bool transformChanged(const Inputs& inputs, const CachedTRS& cache)
+{
+  return !cache.valid
+      || cache.pos[0] != inputs.position.value.x
+      || cache.pos[1] != inputs.position.value.y
+      || cache.pos[2] != inputs.position.value.z
+      || cache.rot[0] != inputs.rotation.value.x
+      || cache.rot[1] != inputs.rotation.value.y
+      || cache.rot[2] != inputs.rotation.value.z
+      || cache.scale[0] != inputs.scale.value.x
+      || cache.scale[1] != inputs.scale.value.y
+      || cache.scale[2] != inputs.scale.value.z;
+}
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/VcgImporters.cpp b/src/plugins/score-plugin-threedim/Threedim/VcgImporters.cpp
new file mode 100644
index 0000000000..744245e962
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/VcgImporters.cpp
@@ -0,0 +1,215 @@
+#include "VcgImporters.hpp"
+
+// vcglib pulls Qt / GL through its utility headers; we only need the
+// header-only trimesh + io_trimesh subset. Isolate these includes here so
+// the rest of the plugin isn't exposed to vcglib's macro soup.
+#include <vcg/complex/complex.h>
+#include <wrap/io_trimesh/import_off.h>
+#include <wrap/io_trimesh/import_stl.h>
+
+#include <string>
+
+namespace Threedim
+{
+
+namespace
+{
+
+// Minimal vcglib mesh type for STL / OFF import: per-vertex position +
+// normal + colour + bit flags, per-face vertex refs + normal + colour.
+// STL contributes position + per-face normal; OFF can contribute per-vertex
+// and per-face colours. We always request normals + colours; vcglib zero-
+// inits any it doesn't fill.
+class ImpVertex;
+class ImpFace;
+struct ImpTypes : public vcg::UsedTypes<
+                      vcg::Use<ImpVertex>::AsVertexType,
+                      vcg::Use<ImpFace>::AsFaceType>
+{};
+class ImpVertex : public vcg::Vertex<
+                      ImpTypes, vcg::vertex::Coord3f, vcg::vertex::Normal3f,
+                      vcg::vertex::Color4b, vcg::vertex::BitFlags>
+{};
+class ImpFace : public vcg::Face<
+                    ImpTypes, vcg::face::VertexRef, vcg::face::Normal3f,
+                    vcg::face::Color4b, vcg::face::BitFlags>
+{};
+class ImpMesh : public vcg::tri::TriMesh<
+                    std::vector<ImpVertex>, std::vector<ImpFace>>
+{};
+
+// Expand the loaded vcglib mesh into the flat, non-interleaved float_vec
+// layout Threedim::mesh expects: all positions, then all normals, then
+// all colours. De-indexed (one output vertex per triangle corner) because
+// STL doesn't carry per-vertex normals shared across triangles, and OFF
+// often has smooth normals but STL's "one normal per face" forces the
+// per-corner expansion anyway.
+static std::vector<Threedim::mesh>
+convertVcgToMeshes(const ImpMesh& vm, Threedim::float_vec& out, int loadmask)
+{
+  std::vector<Threedim::mesh> result;
+  if(vm.face.empty() && vm.vert.empty())
+    return result;
+
+  // Count output vertices — one per triangle corner (de-indexed).
+  const bool has_faces = !vm.face.empty();
+  const bool has_normal = (loadmask & vcg::tri::io::Mask::IOM_VERTNORMAL)
+                          || (loadmask & vcg::tri::io::Mask::IOM_FACENORMAL);
+  const bool has_color = (loadmask & vcg::tri::io::Mask::IOM_VERTCOLOR)
+                         || (loadmask & vcg::tri::io::Mask::IOM_FACECOLOR);
+
+  Threedim::mesh m{};
+  m.texcoord = false;
+  m.normals  = has_normal;
+  m.colors   = has_color;
+  m.tangents = false;
+  m.points   = !has_faces;
+  m.extras.clear();
+
+  if(has_faces)
+  {
+    const size_t corners = vm.face.size() * 3;
+    m.vertices = (int64_t)corners;
+
+    // Allocate contiguous attribute blocks. Layout matches Threedim::mesh's
+    // convention: offsets stored in elements (floats), not bytes.
+    const int64_t pos_count    = 3 * corners;
+    const int64_t nor_count    = has_normal ? 3 * corners : 0;
+    const int64_t col_count    = has_color  ? 4 * corners : 0;
+    const int64_t total_floats = pos_count + nor_count + col_count;
+
+    const int64_t pos_offset = (int64_t)out.size();
+    const int64_t nor_offset = pos_offset + pos_count;
+    const int64_t col_offset = nor_offset + nor_count;
+
+    out.resize(pos_offset + total_floats);
+
+    m.pos_offset     = pos_offset;
+    m.normal_offset  = has_normal ? nor_offset : 0;
+    m.color_offset   = has_color  ? col_offset : 0;
+
+    // Fill buffer by walking faces.
+    for(size_t fi = 0; fi < vm.face.size(); ++fi)
+    {
+      const auto& f = vm.face[fi];
+
+      // Use face normal as per-corner normal if per-vertex is unavailable
+      // (STL case). vcglib's ImporterSTL computes per-face normals.
+      const bool have_face_normal
+          = loadmask & vcg::tri::io::Mask::IOM_FACENORMAL;
+
+      for(int c = 0; c < 3; ++c)
+      {
+        const auto* v = f.cV(c);
+        const int64_t base_p = pos_offset + (fi * 3 + c) * 3;
+        out[base_p + 0] = (float)v->cP()[0];
+        out[base_p + 1] = (float)v->cP()[1];
+        out[base_p + 2] = (float)v->cP()[2];
+
+        if(has_normal)
+        {
+          const int64_t base_n = nor_offset + (fi * 3 + c) * 3;
+          const auto& n = have_face_normal ? f.cN() : v->cN();
+          out[base_n + 0] = (float)n[0];
+          out[base_n + 1] = (float)n[1];
+          out[base_n + 2] = (float)n[2];
+        }
+
+        if(has_color)
+        {
+          const int64_t base_c = col_offset + (fi * 3 + c) * 4;
+          const bool have_face_color
+              = loadmask & vcg::tri::io::Mask::IOM_FACECOLOR;
+          const auto& cc = have_face_color ? f.cC() : v->cC();
+          out[base_c + 0] = cc[0] / 255.0f;
+          out[base_c + 1] = cc[1] / 255.0f;
+          out[base_c + 2] = cc[2] / 255.0f;
+          out[base_c + 3] = cc[3] / 255.0f;
+        }
+      }
+    }
+  }
+  else
+  {
+    // Point cloud (no faces). Emit one vertex per input vertex.
+    const size_t nv = vm.vert.size();
+    m.vertices = (int64_t)nv;
+    const int64_t pos_count    = 3 * nv;
+    const int64_t nor_count    = has_normal ? 3 * nv : 0;
+    const int64_t col_count    = has_color  ? 4 * nv : 0;
+    const int64_t total_floats = pos_count + nor_count + col_count;
+
+    const int64_t pos_offset = (int64_t)out.size();
+    const int64_t nor_offset = pos_offset + pos_count;
+    const int64_t col_offset = nor_offset + nor_count;
+    out.resize(pos_offset + total_floats);
+    m.pos_offset     = pos_offset;
+    m.normal_offset  = has_normal ? nor_offset : 0;
+    m.color_offset   = has_color  ? col_offset : 0;
+
+    for(size_t i = 0; i < nv; ++i)
+    {
+      const auto& v = vm.vert[i];
+      out[pos_offset + i * 3 + 0] = (float)v.cP()[0];
+      out[pos_offset + i * 3 + 1] = (float)v.cP()[1];
+      out[pos_offset + i * 3 + 2] = (float)v.cP()[2];
+
+      if(has_normal)
+      {
+        out[nor_offset + i * 3 + 0] = (float)v.cN()[0];
+        out[nor_offset + i * 3 + 1] = (float)v.cN()[1];
+        out[nor_offset + i * 3 + 2] = (float)v.cN()[2];
+      }
+
+      if(has_color)
+      {
+        out[col_offset + i * 4 + 0] = v.cC()[0] / 255.0f;
+        out[col_offset + i * 4 + 1] = v.cC()[1] / 255.0f;
+        out[col_offset + i * 4 + 2] = v.cC()[2] / 255.0f;
+        out[col_offset + i * 4 + 3] = v.cC()[3] / 255.0f;
+      }
+    }
+  }
+
+  result.push_back(std::move(m));
+  return result;
+}
+
+template <int (*OpenFn)(ImpMesh&, const char*, int&, vcg::CallBackPos*)>
+std::vector<Threedim::mesh>
+importVcgGeneric(std::string_view filename, Threedim::float_vec& out)
+{
+  ImpMesh vm;
+  int loadmask = 0;
+  const std::string path{filename};
+  const int err = OpenFn(vm, path.c_str(), loadmask, nullptr);
+  if(err != 0)
+    return {};
+  return convertVcgToMeshes(vm, out, loadmask);
+}
+
+// Wrappers to pin the importer function pointer signature.
+static int openStl(ImpMesh& m, const char* p, int& mask, vcg::CallBackPos* cb)
+{
+  return vcg::tri::io::ImporterSTL<ImpMesh>::Open(m, p, mask, cb);
+}
+static int openOff(ImpMesh& m, const char* p, int& mask, vcg::CallBackPos* cb)
+{
+  return vcg::tri::io::ImporterOFF<ImpMesh>::Open(m, p, mask, cb);
+}
+
+} // namespace
+
+std::vector<Threedim::mesh>
+StlFromFile(std::string_view filename, Threedim::float_vec& buffer)
+{
+  return importVcgGeneric<&openStl>(filename, buffer);
+}
+
+std::vector<Threedim::mesh>
+OffFromFile(std::string_view filename, Threedim::float_vec& buffer)
+{
+  return importVcgGeneric<&openOff>(filename, buffer);
+}
+
+} // namespace Threedim
diff --git a/src/plugins/score-plugin-threedim/Threedim/VcgImporters.hpp b/src/plugins/score-plugin-threedim/Threedim/VcgImporters.hpp
new file mode 100644
index 0000000000..0306dfa972
--- /dev/null
+++ b/src/plugins/score-plugin-threedim/Threedim/VcgImporters.hpp
@@ -0,0 +1,27 @@
+#pragma once
+#include <Threedim/TinyObj.hpp>
+
+#include <string_view>
+
+namespace Threedim
+{
+
+// vcglib bridges. Load a 3D file via vcg::tri::io::Importer<Mesh>::Open
+// and convert the loaded mesh into the same flat float_vec + mesh record
+// format that TinyObjFromFile / PlyFromFile produce, so downstream
+// `sceneStateFromMeshes` (or GeometryLoader's `rebuild_geometry`)
+// consumes them uniformly.
+//
+// Adds STL and OFF support (the two remaining generally-useful formats
+// vcglib offers that we weren't already covering via tinyobj / miniply).
+// COLLADA (DAE) is a candidate for a follow-up — it carries scene
+// hierarchy + materials + skinning, and deserves a richer conversion
+// path than "dump meshes into one flat buffer".
+
+std::vector<Threedim::mesh> StlFromFile(
+    std::string_view filename, Threedim::float_vec& buffer);
+
+std::vector<Threedim::mesh> OffFromFile(
+    std::string_view filename, Threedim::float_vec& buffer);
+
+}
diff --git a/src/plugins/score-plugin-threedim/Threedim/VoxelLoader.cpp b/src/plugins/score-plugin-threedim/Threedim/VoxelLoader.cpp
index ab5d6a58bf..7c4297b591 100644
--- a/src/plugins/score-plugin-threedim/Threedim/VoxelLoader.cpp
+++ b/src/plugins/score-plugin-threedim/Threedim/VoxelLoader.cpp
@@ -162,4 +162,14 @@ std::function<void(VoxelLoader&)> VoxelLoader::ins::vox_t::process(file_type tv)
   };
 }
 
+void VoxelLoader::operator()()
+{
+  // Compute TRS matrix from position/rotation/scale controls and write
+  // into halp::mesh::transform[16]. dirty_transform is set iff the
+  // matrix actually changed vs last frame, so downstream doesn't
+  // rebuild its transform binding every frame when the knobs are idle.
+  outputs.geometry.dirty_transform
+      = computeTRSMatrix(inputs, outputs.geometry.transform, m_cachedTRS);
+}
+
 }
diff --git a/src/plugins/score-plugin-threedim/Threedim/VoxelLoader.hpp b/src/plugins/score-plugin-threedim/Threedim/VoxelLoader.hpp
index 250c087d2f..8e76a7e89f 100644
--- a/src/plugins/score-plugin-threedim/Threedim/VoxelLoader.hpp
+++ b/src/plugins/score-plugin-threedim/Threedim/VoxelLoader.hpp
@@ -1,4 +1,6 @@
 #pragma once
+#include "TransformHelper.hpp"
+
 #include <Threedim/TinyObj.hpp>
 #include <Threedim/Vox.hpp>
 #include <halp/controls.hpp>
@@ -60,6 +62,7 @@ class VoxelLoader
 
   void reload();
   void rebuild_geometry();
+  void operator()();
 
   std::vector<mesh> meshinfo{};
   float_vec complete;
@@ -67,6 +70,9 @@ class VoxelLoader
 
   // Cache the file data so mode changes can re-process
   std::string cached_filename;
+
+  // Per-frame TRS matrix cache (see TransformHelper.hpp).
+  CachedTRS m_cachedTRS{};
 };
 
 }
diff --git a/src/plugins/score-plugin-threedim/score_plugin_threedim.cpp b/src/plugins/score-plugin-threedim/score_plugin_threedim.cpp
index 44eb0b0cb7..a16237563e 100644
--- a/src/plugins/score-plugin-threedim/score_plugin_threedim.cpp
+++ b/src/plugins/score-plugin-threedim/score_plugin_threedim.cpp
@@ -9,6 +9,7 @@
 #include <score/plugins/FactorySetup.hpp>
 
 #include <Avnd/Factories.hpp>
+#include <Threedim/AnimationPlayer.hpp>
 #include <Threedim/ArrayToGeometry.hpp>
 #include <Threedim/ArrayToTexture.hpp>
 #include <Threedim/CubemapComposer.hpp>
@@ -21,17 +22,57 @@
 #include <Threedim/GeometryToBuffer.hpp>
 #include <Threedim/ModelDisplay/Executor.hpp>
 #include <Threedim/ModelDisplay/Process.hpp>
+#include <Threedim/ConfigurePrimitive.hpp>
+#include <Threedim/EnvironmentLoader.hpp>
+#include <Threedim/ExtractBuffer2.hpp>
+#include <Threedim/ExtractSceneBuffer.hpp>
+#include <Threedim/ExtractTexture.hpp>
+#include <Threedim/Instancer.hpp>
+#include <Threedim/MaterialOverride.hpp>
+#include <Threedim/PBRMesh.hpp>
+#include <Threedim/ShadowCascadeSetup.hpp>
+#include <Threedim/FlattenedSceneFilter/Executor.hpp>
+#include <Threedim/FlattenedSceneFilter/Process.hpp>
+#include <Threedim/HumanoidRetarget.hpp>
+#include <Threedim/InverseKinematics.hpp>
+#include <Gfx/FormatRegistry.hpp>
+#include <Threedim/InjectBuffer.hpp>
+#include <Threedim/InjectTexture.hpp>
+#include <Threedim/TagAs.hpp>
+#include <Threedim/MergeGeometries/Executor.hpp>
+#include <Threedim/MergeGeometries/Process.hpp>
+#include <Threedim/CreateCollection.hpp>
+#include <Threedim/SceneDuplicator.hpp>
+#include <Threedim/SceneFilter/Executor.hpp>
+#include <Threedim/SceneFilter/Process.hpp>
+#include <Threedim/SceneGraphFilter.hpp>
+#include <Threedim/SceneGroup.hpp>
+#include <Threedim/SceneInspector.hpp>
+#include <Threedim/SceneResourceRoute.hpp>
+#include <Threedim/SceneSelector.hpp>
+#include <Threedim/SceneSwitch.hpp>
+#include <Threedim/ScenePreprocessor/Executor.hpp>
+#include <Threedim/ScenePreprocessor/Process.hpp>
 #include <Threedim/Noise.hpp>
-#include <Threedim/ObjLoader.hpp>
+#include <Threedim/AssetLoader.hpp>
+#include <Threedim/BufferInfo.hpp>
+#include <Threedim/Camera.hpp>
+#include <Threedim/CameraArray.hpp>
+#include <Threedim/CameraSwitch.hpp>
+#include <Threedim/GeometryLoader.hpp>
+#include <Threedim/ImageLoader.hpp>
+#include <Threedim/Light.hpp>
+#include <Threedim/TextureInfo.hpp>
+#include <Threedim/Transform3D.hpp>
 #include <Threedim/PCLToGeometry.hpp>
 #include <Threedim/VoxelLoader.hpp>
 #include <Threedim/Primitive.hpp>
 #include <Threedim/RenderPipeline/Executor.hpp>
 #include <Threedim/RenderPipeline/Layer.hpp>
 #include <Threedim/RenderPipeline/Process.hpp>
-#include <Threedim/Splat/Executor.hpp>
-#include <Threedim/Splat/Process.hpp>
 #include <Threedim/StructureSynth.hpp>
+#include <Threedim/TextToMesh.hpp>
+#include <Threedim/TextToTexture.hpp>
 #include <Threedim/TextureToBuffer.hpp>
 #include <avendish/examples/Gpu/ArrayToBuffer.hpp>
 #include <avendish/examples/Gpu/BufferToArray.hpp>
@@ -184,27 +225,37 @@ class SSynthDropHandler final : public Process::ProcessDropHandler
   }
 };
 
-class OBJLibraryHandler final
+class AssetLibraryHandler final
     : public QObject
     , public Library::LibraryInterface
 {
   SCORE_CONCRETE("da4af155-3cb6-41df-8c10-5a002b9d97ca")
 
-  QSet<QString> acceptedFiles() const noexcept override { return {"obj", "ply"}; }
+  QSet<QString> acceptedFiles() const noexcept override
+  {
+    // Extension list must stay aligned with AssetDropHandler::fileExtensions
+    // below — the Library panel surfaces files by acceptedFiles, the canvas
+    // drag-drop accepts them via fileExtensions, and AssetLoader::process
+    // routes the underlying parser by extension. .splat and .spz arrive
+    // through the splat_binary / spz parsers (see PrimitiveCloud/SplatBinary
+    // and SpzCodec); they were missing from the Library list even though
+    // the runtime fully handles them.
+    return {"fbx", "gltf", "glb", "obj", "ply", "stl", "off",
+            "usd", "usda", "usdc", "usdz", "splat", "spz"};
+  }
 
   Library::Subcategories categories;
 
-  using proc = oscr::ProcessModel<ObjLoader>;
+  using proc = oscr::ProcessModel<AssetLoader>;
   void setup(Library::ProcessesItemModel& model, const score::GUIApplicationContext& ctx)
       override
   {
-    // TODO relaunch whenever library path changes...
     const auto& key = Metadata<ConcreteKey_k, proc>::get();
     QModelIndex node = model.find(key);
-    if (node == QModelIndex{})
+    if(node == QModelIndex{})
       return;
 
-    categories.init("Object Loader", node, ctx);
+    categories.init("Asset Loader", node, ctx);
   }
 
   std::function<void()> asyncAddPath(std::string_view path) override
@@ -224,13 +275,18 @@ class OBJLibraryHandler final
   }
 };
 
-class OBJDropHandler final : public Process::ProcessDropHandler
+class AssetDropHandler final : public Process::ProcessDropHandler
 {
   SCORE_CONCRETE("1d6cac56-2059-4fb8-9cef-19301a1fba3d")
 
-  QSet<QString> fileExtensions() const noexcept override { return {"obj", "ply"}; }
+  QSet<QString> fileExtensions() const noexcept override
+  {
+    return {"fbx", "gltf", "glb", "obj", "ply", "stl", "off",
+            "splat", "spz",
+            "usd", "usda", "usdc", "usdz"};
+  }
 
-  using proc = oscr::ProcessModel<ObjLoader>;
+  using proc = oscr::ProcessModel<AssetLoader>;
   void dropData(
       std::vector<ProcessDrop>& vec,
       const DroppedFile& data,
@@ -325,7 +381,9 @@ class VoxDropHandler final : public Process::ProcessDropHandler
 /**
  * This file instantiates the classes that are provided by this plug-in.
  */
-score_plugin_threedim::score_plugin_threedim() = default;
+score_plugin_threedim::score_plugin_threedim()
+{
+}
 score_plugin_threedim::~score_plugin_threedim() = default;
 
 std::vector<score::InterfaceBase*> score_plugin_threedim::factories(
@@ -338,7 +396,38 @@ std::vector<score::InterfaceBase*> score_plugin_threedim::factories(
   oscr::instantiate_fx<Threedim::ArrayToTexture>(fx, ctx, key);
   oscr::instantiate_fx<Threedim::Noise>(fx, ctx, key);
   oscr::instantiate_fx<Threedim::StrucSynth>(fx, ctx, key);
-  oscr::instantiate_fx<Threedim::ObjLoader>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::GeometryLoader>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::AssetLoader>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::BufferInfo>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::TextureInfo>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::ImageLoader>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::Camera>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::CameraArray>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::CameraSwitch>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::Light>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::Transform3D>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::SceneGraphFilter>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::SceneSwitch>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::SceneGroup>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::SceneSelector>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::SceneInspector>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::SceneDuplicator>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::CreateCollection>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::SceneResourceRoute>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::InjectBuffer>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::InjectTexture>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::TagAs>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::PBRMesh>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::MaterialOverride>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::ConfigurePrimitive>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::Instancer>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::ShadowCascadeSetup>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::EnvironmentLoader>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::AnimationPlayer>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::HumanoidRetarget>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::InverseKinematics>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::TextToMesh>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::TextToTexture>(fx, ctx, key);
   oscr::instantiate_fx<Threedim::VoxelLoader>(fx, ctx, key);
   oscr::instantiate_fx<Threedim::Plane>(fx, ctx, key);
   oscr::instantiate_fx<Threedim::Cube>(fx, ctx, key);
@@ -350,6 +439,9 @@ std::vector<score::InterfaceBase*> score_plugin_threedim::factories(
   oscr::instantiate_fx<Threedim::Torus>(fx, ctx, key);
   oscr::instantiate_fx<Threedim::PCLToMesh2>(fx, ctx, key);
   oscr::instantiate_fx<Threedim::ExtractBuffer>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::ExtractBuffer2>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::ExtractSceneBuffer>(fx, ctx, key);
+  oscr::instantiate_fx<Threedim::ExtractTexture>(fx, ctx, key);
   oscr::instantiate_fx<Threedim::GeometryPacker>(fx, ctx, key);
   oscr::instantiate_fx<Threedim::BuffersToGeometry>(fx, ctx, key);
   oscr::instantiate_fx<Threedim::BuffersToGeometry2>(fx, ctx, key);
@@ -357,20 +449,34 @@ std::vector<score::InterfaceBase*> score_plugin_threedim::factories(
   oscr::instantiate_fx<Threedim::SplatLoader>(fx, ctx, key);
   oscr::instantiate_fx<Threedim::CubemapLoader>(fx, ctx, key);
   oscr::instantiate_fx<Threedim::CubemapComposer>(fx, ctx, key);
+  // Splat (legacy GaussianSplatNode) factories removed: AssetLoader now
+  // routes .splat / .spz / 3DGS .ply files through primitive_cloud_component
+  // and the new ScenePreprocessor / 3dgs.tile rendering pipeline. The legacy
+  // Splat process kept its own GaussianSplatNode renderer; superseded.
+  // Existing projects that referenced the legacy Splat UUID
+  // ("cdc15a16-e856-4e02-9339-7d9e48da10ce") get a UUID-rewrite alias to
+  // AssetLoader at load time (see C-22d).
   auto add = instantiate_factories<
       score::ApplicationContext,
       FW<Process::ProcessModelFactory, Gfx::ModelDisplay::ProcessFactory,
-         Gfx::RenderPipeline::ProcessFactory, Gfx::Splat::ProcessFactory>,
+         Gfx::RenderPipeline::ProcessFactory,
+         Gfx::ScenePreprocessor::ProcessFactory,
+         Gfx::SceneFilter::ProcessFactory,
+         Gfx::FlattenedSceneFilter::ProcessFactory,
+         Gfx::MergeGeometries::ProcessFactory>,
       FW<Process::LayerFactory, Gfx::RenderPipeline::LayerFactory>,
       FW<Library::LibraryInterface, Threedim::SSynthLibraryHandler,
-         Threedim::OBJLibraryHandler, Gfx::RawRasterLibraryHandler,
+         Threedim::AssetLibraryHandler, Gfx::RawRasterLibraryHandler,
          Threedim::VoxLibraryHandler>,
       FW<Process::ProcessDropHandler, Threedim::SSynthDropHandler,
-         Threedim::OBJDropHandler, Threedim::VoxDropHandler>,
+         Threedim::AssetDropHandler, Threedim::VoxDropHandler>,
       FW<Execution::ProcessComponentFactory,
          Gfx::ModelDisplay::ProcessExecutorComponentFactory,
          Gfx::RenderPipeline::ProcessExecutorComponentFactory,
-         Gfx::Splat::ProcessExecutorComponentFactory>>(ctx, key);
+         Gfx::ScenePreprocessor::ProcessExecutorComponentFactory,
+         Gfx::SceneFilter::ProcessExecutorComponentFactory,
+         Gfx::FlattenedSceneFilter::ProcessExecutorComponentFactory,
+         Gfx::MergeGeometries::ProcessExecutorComponentFactory>>(ctx, key);
   fx.insert(
       fx.end(),
       std::make_move_iterator(add.begin()),