diff --git a/.github/workflows/win-builds.yaml b/.github/workflows/win-builds.yaml index ce25d4eb41..4d2b246a59 100644 --- a/.github/workflows/win-builds.yaml +++ b/.github/workflows/win-builds.yaml @@ -205,6 +205,7 @@ jobs: fftw:p ffmpeg:p SDL2:p + zstd:p - name: Build shell: msys2 {0} diff --git a/.gitmodules b/.gitmodules index 58b0c0c488..50ce2e7799 100755 --- a/.gitmodules +++ b/.gitmodules @@ -115,3 +115,18 @@ [submodule "3rdparty/opengametools"] path = 3rdparty/opengametools url = https://github.com/jpaver/opengametools +[submodule "3rdparty/ufbx"] + path = 3rdparty/ufbx + url = https://github.com/ufbx/ufbx +[submodule "3rdparty/fastgltf"] + path = 3rdparty/fastgltf + url = https://github.com/spnda/fastgltf +[submodule "3rdparty/OffsetAllocator"] + path = 3rdparty/OffsetAllocator + url = https://github.com/sebbbi/OffsetAllocator +[submodule "3rdparty/spz"] + path = 3rdparty/spz + url = https://github.com/nianticlabs/spz +[submodule "3rdparty/zstd"] + path = 3rdparty/zstd + url = https://github.com/facebook/zstd diff --git a/3rdparty/3rdparty.cmake b/3rdparty/3rdparty.cmake index 53694ce380..c94fe718cb 100644 --- a/3rdparty/3rdparty.cmake +++ b/3rdparty/3rdparty.cmake @@ -29,3 +29,4 @@ include(3rdparty/shmdata.cmake) include(3rdparty/snappy.cmake) include(3rdparty/sndfile.cmake) include(3rdparty/xtensor.cmake) +include(3rdparty/zstd.cmake) diff --git a/3rdparty/OffsetAllocator b/3rdparty/OffsetAllocator new file mode 160000 index 0000000000..3610a73770 --- /dev/null +++ b/3rdparty/OffsetAllocator @@ -0,0 +1 @@ +Subproject commit 3610a7377088b1e8c8f1525f458c96038a4e6fc0 diff --git a/3rdparty/avendish b/3rdparty/avendish index 7eafe1735a..77be36e03b 160000 --- a/3rdparty/avendish +++ b/3rdparty/avendish @@ -1 +1 @@ -Subproject commit 7eafe1735a2c6c20891ead7404333884a6e15971 +Subproject commit 77be36e03b7d327f6f2bee38c63b4abf63f41a2d diff --git a/3rdparty/fastgltf b/3rdparty/fastgltf new file mode 160000 index 0000000000..ce52187411 --- /dev/null +++ b/3rdparty/fastgltf @@ -0,0 +1 @@ +Subproject commit ce521874115d66679cbb33c6b2811469b04c1066 diff --git a/3rdparty/libossia b/3rdparty/libossia index 476e6e50d2..b335062f52 160000 --- a/3rdparty/libossia +++ b/3rdparty/libossia @@ -1 +1 @@ -Subproject commit 476e6e50d2ac11298b9ea2f6e4d9372973a52db0 +Subproject commit b335062f524775ad9a5ef094eec5bdcb8fd20e8d diff --git a/3rdparty/spz b/3rdparty/spz new file mode 160000 index 0000000000..7ae1621e54 --- /dev/null +++ b/3rdparty/spz @@ -0,0 +1 @@ +Subproject commit 7ae1621e54e4b42c3c9c192b366d09116e558e19 diff --git a/3rdparty/ufbx b/3rdparty/ufbx new file mode 160000 index 0000000000..83bc7cf44f --- /dev/null +++ b/3rdparty/ufbx @@ -0,0 +1 @@ +Subproject commit 83bc7cf44f76bc8622de63b809a42b5d557cd733 diff --git a/3rdparty/zstd b/3rdparty/zstd new file mode 160000 index 0000000000..885c79ba4a --- /dev/null +++ b/3rdparty/zstd @@ -0,0 +1 @@ +Subproject commit 885c79ba4ae8345e006f61bc97b270d4cf7ff076 diff --git a/3rdparty/zstd.cmake b/3rdparty/zstd.cmake new file mode 100644 index 0000000000..42e4256d67 --- /dev/null +++ b/3rdparty/zstd.cmake @@ -0,0 +1,49 @@ +if(SCORE_USE_SYSTEM_LIBRARIES) + find_package(zstd GLOBAL CONFIG) +endif() + +if(NOT TARGET zstd::libzstd_static AND NOT TARGET zstd::libzstd_shared AND NOT TARGET zstd) + set(ZSTD_BUILD_PROGRAMS OFF CACHE INTERNAL "" FORCE) + set(ZSTD_BUILD_TESTS OFF CACHE INTERNAL "" FORCE) + set(ZSTD_BUILD_SHARED OFF CACHE INTERNAL "" FORCE) + set(ZSTD_BUILD_STATIC ON CACHE INTERNAL "" FORCE) + set(ZSTD_BUILD_DICTBUILDER OFF CACHE INTERNAL "" FORCE) + + set(old_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) + set(BUILD_SHARED_LIBS OFF) + + if(NOT MSVC AND NOT CMAKE_CROSSCOMPILING) + if(CMAKE_BUILD_TYPE MATCHES ".*Deb.*") + set(old_CFLAGS "${CMAKE_C_FLAGS}") + set(old_CXXFLAGS "${CMAKE_CXX_FLAGS}") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -march=native") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native") + endif() + endif() + + add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/zstd/build/cmake" EXCLUDE_FROM_ALL) + + if(NOT MSVC AND NOT CMAKE_CROSSCOMPILING) + if(CMAKE_BUILD_TYPE MATCHES ".*Deb.*") + set(CMAKE_C_FLAGS "${old_CFLAGS}") + set(CMAKE_CXX_FLAGS "${old_CXXFLAGS}") + endif() + endif() + + set(BUILD_SHARED_LIBS ${old_BUILD_SHARED_LIBS}) +endif() + +# Make later find_package(zstd) calls (e.g. 3rdparty/spz) resolve to the +# targets configured above: some prebuilt SDKs ship zstd configs pointing to +# files that do not exist, and a not-found result would trigger FetchContent +# fallbacks that clash with the vendored targets. +file(WRITE "${CMAKE_FIND_PACKAGE_REDIRECTS_DIR}/zstd-config.cmake" [=[ +if(TARGET libzstd_static AND NOT TARGET zstd::libzstd_static) + add_library(zstd::libzstd_static INTERFACE IMPORTED GLOBAL) + target_link_libraries(zstd::libzstd_static INTERFACE libzstd_static) +endif() +if(TARGET libzstd_shared AND NOT TARGET zstd::libzstd_shared) + add_library(zstd::libzstd_shared INTERFACE IMPORTED GLOBAL) + target_link_libraries(zstd::libzstd_shared INTERFACE libzstd_shared) +endif() +]=]) \ No newline at end of file diff --git a/ci/debian.bookworm.deps.sh b/ci/debian.bookworm.deps.sh index 1e9af4c07f..861b03d01c 100755 --- a/ci/debian.bookworm.deps.sh +++ b/ci/debian.bookworm.deps.sh @@ -40,6 +40,7 @@ $SUDO apt-get install -qq --force-yes \ libavahi-compat-libdnssd-dev libsamplerate0-dev \ portaudio19-dev \ libpipewire-0.3-dev \ - libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libavformat-dev libswresample-dev + libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libavformat-dev libswresample-dev \ + libzstd-dev source ci/common.deps.sh LINUX diff --git a/ci/debian.bullseye.deps.sh b/ci/debian.bullseye.deps.sh index 74a8b15db6..88fcc2b1d4 100755 --- a/ci/debian.bullseye.deps.sh +++ b/ci/debian.bullseye.deps.sh @@ -32,6 +32,7 @@ $SUDO apt-get install -qq --force-yes -t bullseye-backports \ libavahi-compat-libdnssd-dev libsamplerate0-dev \ portaudio19-dev \ libpipewire-0.3-dev \ - libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libavformat-dev libswresample-dev + libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libavformat-dev libswresample-dev \ + libzstd-dev source ci/common.deps.sh LINUX diff --git a/ci/debian.trixie-system.deps.sh b/ci/debian.trixie-system.deps.sh index 9649c052ff..b9ffee0af3 100755 --- a/ci/debian.trixie-system.deps.sh +++ b/ci/debian.trixie-system.deps.sh @@ -55,7 +55,8 @@ $SUDO apt-get install -qq --force-yes \ libzita-alsa-pcmi-dev \ libvst3sdk-dev \ puredata-dev \ - libpd-dev + libpd-dev \ + libzstd-dev source ci/common.deps.sh LINUX diff --git a/ci/debian.trixie.deps.sh b/ci/debian.trixie.deps.sh index 9aa4fb56a2..70ff36f408 100755 --- a/ci/debian.trixie.deps.sh +++ b/ci/debian.trixie.deps.sh @@ -34,6 +34,7 @@ $SUDO apt-get install -qq --force-yes \ libavahi-compat-libdnssd-dev libsamplerate0-dev \ portaudio19-dev \ libpipewire-0.3-dev \ - libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libavformat-dev libswresample-dev + libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libavformat-dev libswresample-dev \ + libzstd-dev source ci/common.deps.sh LINUX diff --git a/ci/fedora.deps.sh b/ci/fedora.deps.sh index 77eac40f3b..f8d8117547 100755 --- a/ci/fedora.deps.sh +++ b/ci/fedora.deps.sh @@ -34,6 +34,7 @@ dnf -y install --allowerasing \ qt6-qtsvg-devel \ qt6-qtconnectivity-devel \ pipewire-devel \ - zlib-ng-compat-static zlib-ng-compat-devel + zlib-ng-compat-static zlib-ng-compat-devel \ + libzstd-devel source ci/common.deps.sh LINUX diff --git a/ci/freebsd.deps.sh b/ci/freebsd.deps.sh index bcdb633dbb..ea21bbc65a 100755 --- a/ci/freebsd.deps.sh +++ b/ci/freebsd.deps.sh @@ -18,6 +18,7 @@ pkg install -y \ libcoap \ freetype2 harfbuzz fontconfig \ alsa-lib \ - jackit + jackit \ + zstd source ci/common.deps.sh FREEBSD diff --git a/ci/nix.build.nix b/ci/nix.build.nix index fe17e697af..e174fbdf37 100644 --- a/ci/nix.build.nix +++ b/ci/nix.build.nix @@ -33,12 +33,14 @@ , rapidfuzz-cpp , re2 , rubberband +, simdjson , snappy , SDL2 , spdlog , suil , udev , xorg +, zstd }: # TODO: figure out LLVM jit @@ -88,6 +90,7 @@ clangStdenv.mkDerivation (finalAttrs: { rapidfuzz-cpp re2 rubberband + simdjson snappy SDL2 spdlog diff --git a/ci/osx.brew.deps.sh b/ci/osx.brew.deps.sh index a7e14917ad..c971f3b5b3 100755 --- a/ci/osx.brew.deps.sh +++ b/ci/osx.brew.deps.sh @@ -4,9 +4,9 @@ set +e export HOMEBREW_NO_AUTO_UPDATE=1 brew update && (brew list cmake || brew install cmake) -brew install ninja qt boost ffmpeg@7 fftw portaudio jack sdl lv2 lilv suil freetype +brew install ninja qt boost ffmpeg@7 fftw portaudio jack sdl lv2 lilv suil freetype zstd brew uninstall --ignore-dependencies qt@5 || true source ci/common.deps.sh MACOS -echo PKG_CONFIG_PATH="/opt/homebrew/opt/ffmpeg@7/lib/pkgconfig" >> "$GITHUB_ENV" \ No newline at end of file +echo PKG_CONFIG_PATH="/opt/homebrew/opt/ffmpeg@7/lib/pkgconfig" >> "$GITHUB_ENV" diff --git a/ci/suse.leap.deps.sh b/ci/suse.leap.deps.sh index 21e24e7562..650ac271e0 100755 --- a/ci/suse.leap.deps.sh +++ b/ci/suse.leap.deps.sh @@ -25,7 +25,8 @@ $SUDO zypper -n install \ qt6-qml-devel qt6-qml-private-devel \ qt6-svg-devel \ ffmpeg-4-libavcodec-devel ffmpeg-4-libavdevice-devel ffmpeg-4-libavfilter-devel ffmpeg-4-libavformat-devel ffmpeg-4-libswresample-devel \ - curl gzip + curl gzip \ + libzstd-devel curl -L -0 https://github.com/Kitware/CMake/releases/download/v3.28.1/cmake-3.28.1-linux-x86_64.tar.gz --output cmake.tgz tar xaf cmake.tgz diff --git a/ci/suse.tumbleweed.deps.sh b/ci/suse.tumbleweed.deps.sh index d8f36b9c13..153172fc89 100755 --- a/ci/suse.tumbleweed.deps.sh +++ b/ci/suse.tumbleweed.deps.sh @@ -31,5 +31,6 @@ $SUDO zypper -n install \ qt6-qml-devel qt6-qml-private-devel \ qt6-svg-devel \ ffmpeg-7-libavcodec-devel ffmpeg-7-libavdevice-devel ffmpeg-7-libavfilter-devel ffmpeg-7-libavformat-devel ffmpeg-7-libswresample-devel \ - zlib-devel zlib-devel-static + zlib-devel zlib-devel-static \ + libzstd-devel diff --git a/ci/ubuntu.2604.deps.sh b/ci/ubuntu.2604.deps.sh index b5488d2fa8..b25b76f918 100755 --- a/ci/ubuntu.2604.deps.sh +++ b/ci/ubuntu.2604.deps.sh @@ -42,7 +42,8 @@ $SUDO apt install -y \ libvulkan-dev \ libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libavformat-dev libswresample-dev \ file \ - dpkg-dev + dpkg-dev \ + libzstd-dev source ci/common.deps.sh LINUX diff --git a/ci/ubuntu.jammy.deps.sh b/ci/ubuntu.jammy.deps.sh index 43c9f7fe65..2c61376768 100755 --- a/ci/ubuntu.jammy.deps.sh +++ b/ci/ubuntu.jammy.deps.sh @@ -51,6 +51,7 @@ $SUDO apt-get install -y \ libvulkan-dev \ libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libavformat-dev libswresample-dev \ file \ - dpkg-dev + dpkg-dev \ + libzstd-dev source ci/common.deps.sh LINUX diff --git a/ci/ubuntu.lunar.deps.sh b/ci/ubuntu.lunar.deps.sh index bb49e14254..752123c08e 100755 --- a/ci/ubuntu.lunar.deps.sh +++ b/ci/ubuntu.lunar.deps.sh @@ -48,6 +48,7 @@ $SUDO apt-get install -y \ libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libavformat-dev libswresample-dev \ file \ dpkg-dev \ - lsb-release + lsb-release \ + libzstd-dev source ci/common.deps.sh LINUX diff --git a/ci/ubuntu.noble.deps.sh b/ci/ubuntu.noble.deps.sh index 4a64660239..6db2b97c53 100755 --- a/ci/ubuntu.noble.deps.sh +++ b/ci/ubuntu.noble.deps.sh @@ -43,7 +43,8 @@ $SUDO apt-get install -y \ libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libavformat-dev libswresample-dev \ file \ dpkg-dev \ - lsb-release + lsb-release \ + libzstd-dev # needed because GCC does not support -fuse-ld=lld-19 $SUDO rm -rf /usr/bin/lld /usr/bin/ld.lld diff --git a/ci/ubuntu.oracular.deps.sh b/ci/ubuntu.oracular.deps.sh index ff8630b9b6..7501e695b1 100755 --- a/ci/ubuntu.oracular.deps.sh +++ b/ci/ubuntu.oracular.deps.sh @@ -43,6 +43,7 @@ $SUDO apt-get install -y \ libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libavformat-dev libswresample-dev \ file \ dpkg-dev \ - lsb-release + lsb-release \ + libzstd-dev source ci/common.deps.sh LINUX diff --git a/ci/ubuntu.plucky.deps.sh b/ci/ubuntu.plucky.deps.sh index bf53bd6fdd..b030b6796f 100755 --- a/ci/ubuntu.plucky.deps.sh +++ b/ci/ubuntu.plucky.deps.sh @@ -44,6 +44,7 @@ $SUDO apt-get install -y \ libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libavformat-dev libswresample-dev \ file \ dpkg-dev \ - lsb-release + lsb-release \ + libzstd-dev source ci/common.deps.sh LINUX diff --git a/cmake/Deployment/Linux/Flatpak/io.ossia.score.yml b/cmake/Deployment/Linux/Flatpak/io.ossia.score.yml index 1e9c6d07cc..0d6fa0e466 100644 --- a/cmake/Deployment/Linux/Flatpak/io.ossia.score.yml +++ b/cmake/Deployment/Linux/Flatpak/io.ossia.score.yml @@ -120,6 +120,7 @@ modules: - modules/re2.yaml - modules/libcoap.yaml - modules/boost.yaml + - modules/simdjson.yaml - modules/snappy.yaml - modules/avahi.yaml - modules/suil.yaml diff --git a/cmake/Deployment/Linux/Flatpak/modules/simdjson.yaml b/cmake/Deployment/Linux/Flatpak/modules/simdjson.yaml new file mode 100644 index 0000000000..befaf599bc --- /dev/null +++ b/cmake/Deployment/Linux/Flatpak/modules/simdjson.yaml @@ -0,0 +1,16 @@ +# JSON parser used by fastgltf (glTF support in score-plugin-threedim). +# Provided here so that fastgltf does not try to download it at configure +# time, which is impossible in the sandboxed flatpak build. +name: simdjson +buildsystem: cmake-ninja +builddir: true +config-opts: + - -Wno-dev + - -DCMAKE_BUILD_TYPE=RelWithDebInfo + - -DCMAKE_POSITION_INDEPENDENT_CODE=ON + - -DBUILD_SHARED_LIBS=ON + - -DSIMDJSON_DEVELOPER_MODE=OFF +sources: + - type: archive + url: https://github.com/simdjson/simdjson/archive/refs/tags/v3.12.3.tar.gz + sha256: d0af071f2f4187d8b26b556e83ef832b634bd5feb4e2f537b9dabbd334d4e334 diff --git a/src/plugins/score-lib-process/Process/Drop/ProcessDropHandler.cpp b/src/plugins/score-lib-process/Process/Drop/ProcessDropHandler.cpp index c7dd2177d3..14f947b01c 100644 --- a/src/plugins/score-lib-process/Process/Drop/ProcessDropHandler.cpp +++ b/src/plugins/score-lib-process/Process/Drop/ProcessDropHandler.cpp @@ -4,6 +4,7 @@ #include +#include #include #include #include @@ -19,8 +20,22 @@ void ProcessDropHandler::getCustomDrops( std::vector& drops, const QMimeData& mime, const score::DocumentContext& ctx) const noexcept { - // Check for special mime handling code - return dropCustom(drops, mime, ctx); + // dropCustom is no longer noexcept (some overrides invoke parsers that + // can throw on malformed input — see ProcessDropHandler.hpp). Catch + // here so a throwing handler never escapes through the noexcept + // public API and tears down the editor. + try + { + dropCustom(drops, mime, ctx); + } + catch(const std::exception& e) + { + qWarning() << "ProcessDropHandler::dropCustom threw:" << e.what(); + } + catch(...) + { + qWarning() << "ProcessDropHandler::dropCustom threw an unknown exception"; + } } void ProcessDropHandler::getMimeDrops( @@ -61,7 +76,7 @@ QSet ProcessDropHandler::fileExtensions() const noexcept void ProcessDropHandler::dropCustom( std::vector&, const QMimeData& data, - const score::DocumentContext& ctx) const noexcept + const score::DocumentContext& ctx) const { } diff --git a/src/plugins/score-lib-process/Process/Drop/ProcessDropHandler.hpp b/src/plugins/score-lib-process/Process/Drop/ProcessDropHandler.hpp index 4f568657a1..a712f06016 100644 --- a/src/plugins/score-lib-process/Process/Drop/ProcessDropHandler.hpp +++ b/src/plugins/score-lib-process/Process/Drop/ProcessDropHandler.hpp @@ -59,7 +59,7 @@ class SCORE_LIB_PROCESS_EXPORT ProcessDropHandler : public score::InterfaceBase protected: virtual void dropCustom( std::vector& drops, const QMimeData& mime, - const score::DocumentContext& ctx) const noexcept; + const score::DocumentContext& ctx) const; virtual void dropPath( std::vector& drops, const score::FilePath& path, diff --git a/src/plugins/score-plugin-avnd/Crousti/Concepts.hpp b/src/plugins/score-plugin-avnd/Crousti/Concepts.hpp index 06905031fb..2b17f5ad2b 100644 --- a/src/plugins/score-plugin-avnd/Crousti/Concepts.hpp +++ b/src/plugins/score-plugin-avnd/Crousti/Concepts.hpp @@ -448,7 +448,35 @@ make_control_in(avnd::field_index, Id&& id, QObject* parent) auto [Mx, My, Mz] = c.max; auto [ix, iy, iz] = c.init; return new Process::XYZSpinboxes{{mx, my, mz}, {Mx, My, Mz}, {ix, iy, iz}, - qname, id, parent}; + false, qname, id, parent}; + } + } + else if constexpr(widg.widget == avnd::widget_type::xyzw_spinbox) + { + static constexpr auto c = avnd::get_range(); + if constexpr(requires { + c.min == 0.f; + c.max == 0.f; + c.init == 0.f; + }) + { + return new Process::XYZSpinboxes{ + {c.min, c.min, c.min}, + {c.max, c.max, c.max}, + {c.init, c.init, c.init}, + false, + qname, + id, + parent}; + } + else + { + auto [mx, my, mz, mw] = c.min; + auto [Mx, My, Mz, Mw] = c.max; + auto [ix, iy, iz, iw] = c.init; + // FIXME we don't have a good 4-way widget + return new Process::XYZSpinboxes{{mx, my, mz}, {Mx, My, Mz}, {ix, iy, iz}, + false, qname, id, parent}; } } else if constexpr(widg.widget == avnd::widget_type::color) diff --git a/src/plugins/score-plugin-avnd/Crousti/CpuAnalysisNode.hpp b/src/plugins/score-plugin-avnd/Crousti/CpuAnalysisNode.hpp index 3f049ab18a..e0e1035b29 100644 --- a/src/plugins/score-plugin-avnd/Crousti/CpuAnalysisNode.hpp +++ b/src/plugins/score-plugin-avnd/Crousti/CpuAnalysisNode.hpp @@ -5,10 +5,10 @@ namespace oscr { - template requires( - (avnd::texture_output_introspection::size + avnd::buffer_output_introspection::size + avnd::geometry_output_introspection::size) == 0 + (avnd::texture_output_introspection::size + avnd::buffer_output_introspection::size + avnd::geometry_output_introspection::size + scene_output_introspection::size) == 0 + && (avnd::gpu_render_target_output_port_output_introspection::size == 0) ) struct GfxRenderer final : score::gfx::OutputNodeRenderer { @@ -19,6 +19,7 @@ struct GfxRenderer final : score::gfx::OutputNodeRenderer AVND_NO_UNIQUE_ADDRESS texture_inputs_storage texture_ins; AVND_NO_UNIQUE_ADDRESS buffer_inputs_storage buffer_ins; AVND_NO_UNIQUE_ADDRESS geometry_inputs_storage geometry_ins; + AVND_NO_UNIQUE_ADDRESS scene_inputs_storage scene_ins; const GfxNode& node() const noexcept { @@ -44,9 +45,19 @@ struct GfxRenderer final : score::gfx::OutputNodeRenderer return {}; } - void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override + // See CpuFilterNode.hpp for the reasoning: init must live in initState + // so the incremental edge-rewire path also runs it. + void initState(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override { - auto& parent = node(); + if(m_initialized) + return; + + // See CpuFilterNode for the reasoning: optional renderlist + // backchannel populated via SFINAE so nodes can reach the + // RenderList's GpuResourceRegistry / AssetTable without plumbing. + if constexpr(requires { state->renderlist = &renderer; }) + state->renderlist = &renderer; + if constexpr(requires { state->prepare(); }) { this->node().processControlIn( @@ -59,6 +70,13 @@ struct GfxRenderer final : score::gfx::OutputNodeRenderer texture_ins.init(*this, renderer); if_possible(state->init(renderer, res)); + + m_initialized = true; + } + + void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override + { + initState(renderer, res); } void update( @@ -82,32 +100,69 @@ struct GfxRenderer final : score::gfx::OutputNodeRenderer } } - void release(score::gfx::RenderList& r) override + void releaseState(score::gfx::RenderList& r) override { + if(!m_initialized) + return; + if constexpr(avnd::texture_input_introspection::size > 0) texture_ins.release(); if constexpr(avnd::geometry_input_introspection::size > 0) geometry_ins.release(r); + if constexpr(scene_input_introspection::size > 0) + scene_ins.release(r); + if constexpr( avnd::texture_input_introspection::size > 0 || avnd::texture_output_introspection::size > 0) { - // FIXME this->defaultRelease(r); + // No call-through to GenericNodeRenderer::defaultRelease here: + // CpuAnalysisNode's GfxRenderer derives from OutputNodeRenderer, + // not GenericNodeRenderer, and OutputNodeRenderer has no + // defaultRelease equivalent (it owns no pipeline / passes — it + // is a sink, not a node renderer with m_p / m_pipelineCache). + // CpuFilterNode's mirror at line ~357 IS valid because that + // GfxRenderer derives from GenericNodeRenderer. + // + // If a future CpuAnalysisNode uses textures via OutputNodeRenderer + // surfaces, they'll need their own per-storage release path + // (texture_ins.release above already handles texture INPUTS). } if_possible(state->release(r)); + + // Clear the optional renderlist backchannel. Paired with initState; + // same SFINAE guard. + if constexpr(requires { state->renderlist = nullptr; }) + state->renderlist = nullptr; + + m_initialized = false; + } + + void release(score::gfx::RenderList& r) override + { + releaseState(r); } void inputAboutToFinish( score::gfx::RenderList& renderer, const score::gfx::Port& p, QRhiResourceUpdateBatch*& res) override { + // Outer guard includes scene_input_introspection so a node with ONLY + // scene inputs (no texture / buffer / geometry) still allocates `res` + // — necessary if scene_inputs_storage ever grows an inputAboutToFinish + // method (today it's read-only via readInputScenes, but the storage's + // lifecycle is part of the new scene_port concept and may evolve). + // Without the include, a scene-only sink would silently skip the + // res allocation and any future scene-side write would have nowhere + // to land. if constexpr( avnd::texture_input_introspection::size > 0 || avnd::buffer_input_introspection::size > 0 - || avnd::geometry_input_introspection::size > 0) + || avnd::geometry_input_introspection::size > 0 + || scene_input_introspection::size > 0) { res = renderer.state.rhi->nextResourceUpdateBatch(); @@ -118,6 +173,8 @@ struct GfxRenderer final : score::gfx::OutputNodeRenderer if constexpr(avnd::geometry_input_introspection::size > 0) geometry_ins.inputAboutToFinish( renderer, res, this->geometry, *state, this->node()); + // No scene_ins.inputAboutToFinish today — the guard is forward- + // looking; add the call here when scene_inputs_storage grows one. } if_possible(state->inputAboutToFinish(renderer, p, res)); @@ -144,6 +201,8 @@ struct GfxRenderer final : score::gfx::OutputNodeRenderer buffer_ins.readInputBuffers(renderer, parent, *state); if constexpr(avnd::geometry_input_introspection::size > 0) geometry_ins.readInputGeometries(renderer, this->geometry, parent, *state); + if constexpr(scene_input_introspection::size > 0) + scene_ins.readInputScenes(this->scene, *state); parent.processControlIn( *this, *state, m_last_message, parent.last_message, parent.m_ctx); @@ -158,9 +217,13 @@ struct GfxRenderer final : score::gfx::OutputNodeRenderer }; template - requires( - (avnd::texture_output_introspection::size + avnd::buffer_output_introspection::size + avnd::geometry_output_introspection::size) == 0 - ) + requires((avnd::texture_output_introspection::size + + avnd::buffer_output_introspection::size + + avnd::geometry_output_introspection::size + + scene_output_introspection::size) + == 0 + && (avnd::gpu_render_target_output_port_output_introspection::size + == 0)) struct GfxNode final : CustomGpuOutputNodeBase , GpuNodeElements diff --git a/src/plugins/score-plugin-avnd/Crousti/CpuFilterNode.hpp b/src/plugins/score-plugin-avnd/Crousti/CpuFilterNode.hpp index 159c98a9f4..8bed9246d5 100644 --- a/src/plugins/score-plugin-avnd/Crousti/CpuFilterNode.hpp +++ b/src/plugins/score-plugin-avnd/Crousti/CpuFilterNode.hpp @@ -3,18 +3,25 @@ #if SCORE_PLUGIN_GFX #include +#include + namespace oscr { template requires( - (avnd::texture_output_introspection::size + avnd::buffer_output_introspection::size + avnd::geometry_output_introspection::size) >= 1 + (avnd::texture_output_introspection::size + avnd::buffer_output_introspection::size + avnd::geometry_output_introspection::size + scene_output_introspection::size + avnd::gpu_render_target_output_port_output_introspection::size) >= 1 ) struct GfxRenderer final : score::gfx::GenericNodeRenderer { std::shared_ptr state; score::gfx::Message m_last_message{}; - ossia::time_value m_last_time{-1}; + // RenderList::frame id of the last frame on which we ran the expensive + // once-per-frame body of runInitialPasses (input readbacks, operator()(), + // output uploads). runInitialPasses is invoked once PER OUTGOING EDGE, so + // without this guard that whole body re-ran for every downstream edge, + // every frame. -1 = never run yet. + int64_t m_last_frame{-1}; AVND_NO_UNIQUE_ADDRESS texture_inputs_storage texture_ins; AVND_NO_UNIQUE_ADDRESS texture_outputs_storage texture_outs; @@ -24,6 +31,8 @@ struct GfxRenderer final : score::gfx::GenericNodeRenderer AVND_NO_UNIQUE_ADDRESS geometry_inputs_storage geometry_ins; AVND_NO_UNIQUE_ADDRESS geometry_outputs_storage geometry_outs; + AVND_NO_UNIQUE_ADDRESS scene_inputs_storage scene_ins; + AVND_NO_UNIQUE_ADDRESS scene_outputs_storage scene_outs; const GfxNode& node() const noexcept { @@ -42,8 +51,14 @@ struct GfxRenderer final : score::gfx::GenericNodeRenderer { if constexpr(avnd::texture_input_introspection::size > 0) { + // Only texture-RT inputs live in m_rts. Geometry / buffer / scene + // inputs on the same node (e.g. PBRMesh: 4 gpu_texture_inputs + a + // dynamic_gpu_geometry mesh in) land here through the generic + // renderTargetForOutput path — return empty so the upstream's + // addOutputPass skips creating a graphics render pass for them. auto it = texture_ins.m_rts.find(&p); - SCORE_ASSERT(it != texture_ins.m_rts.end()); + if(it == texture_ins.m_rts.end()) + return {}; return it->second; } return {}; @@ -60,6 +75,71 @@ struct GfxRenderer final : score::gfx::GenericNodeRenderer return {}; } + // For non-2D gpu_texture_input fields (cubemap / array / 3D): the port + // is flagged GrabsFromSource (see initGfxPorts + + // port_flags_for_field), so Graph::updateSinkSampler calls us here + // with the upstream's QRhiTexture. Write it into the matching halp + // field so the node's operator()() / runInitialPasses see the handle. + // 2D (classic RT-rendered) inputs ignore this path — their handle is + // set up at init() time by texture_inputs_storage::init. + // + // depthTex: when the port also opts in via halp_meta(samplable_depth, + // true), Graph passes the upstream's depth attachment here too. Stored + // on `texture.depth_handle` for the consumer to sample alongside color. + void updateInputTexture( + const score::gfx::Port& input, QRhiTexture* tex, + QRhiTexture* depthTex = nullptr) override + { + if constexpr(avnd::texture_input_introspection::size > 0) + { + const auto& inputs = this->node().input; + int port_idx = -1; + for(int i = 0, n = (int)inputs.size(); i < n; ++i) + { + if(inputs[i] == &input) + { + port_idx = i; + break; + } + } + if(port_idx < 0) + return; + + avnd::texture_input_introspection::for_all_n2( + avnd::get_inputs(*state), + [&]( + F& t, avnd::predicate_index, avnd::field_index) { + if constexpr(avnd::gpu_texture_port + && halp::texture_kind_of() != halp::texture_kind::texture_2d) + { + if((int)N == port_idx) + { + t.texture.handle = tex; + if(tex) + { + const auto sz = tex->pixelSize(); + t.texture.width = sz.width(); + t.texture.height = sz.height(); + } + else + { + t.texture.width = 0; + t.texture.height = 0; + } + t.texture.kind = halp::texture_kind_of(); + if constexpr(halp::samplable_depth_of()) + { + t.texture.depth_handle = depthTex; + if(depthTex) + t.texture.depth_format + = qrhiToHalpDepthFormat(depthTex->format()); + } + } + } + }); + } + } + QRhiTexture* textureForOutput(const score::gfx::Port& output) override { if constexpr(avnd::gpu_texture_output_introspection::size > 0) @@ -95,9 +175,47 @@ struct GfxRenderer final : score::gfx::GenericNodeRenderer return nullptr; } - void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override + // All of the setup lives in initState(), not init(). The incremental + // edge-rewire path (Graph::createPassForEdgeIfMissing) only calls + // initState() on newly-created renderers — so a halp scene-in/scene-out + // node inserted live would otherwise never allocate its storage, its + // operator()() would run against uninitialised state every frame, and + // nothing would flow downstream until a stop/start cycle forced a full + // rebuild through init(). + void initState(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override { + if(m_initialized) + return; + auto& parent = node(); + + // Optional renderlist backchannel for CPU halp nodes that need to + // reach their hosting RenderList's GpuResourceRegistry / AssetTable + // (e.g. Camera / Light / PBRMesh / MaterialOverride allocating arena + // slots). Populated by SFINAE so nodes that don't declare the member + // pay nothing. Lifetime: valid from initState until releaseState + // clears it back to nullptr. + if constexpr(requires { state->renderlist = &renderer; }) + state->renderlist = &renderer; + + // Ordering invariant: init → processControlIn → operator()() + // + // For nodes WITHOUT prepare(): processControlIn is NOT called here. + // state->init() therefore runs (line below) before any control-update + // callback can fire rebuild(). All five scene producers — Camera, + // CameraArray, Light, Transform3D, SceneGroup — rely on this: they + // populate m_*_ref arena handles in init(), and rebuild() reads those + // handles unconditionally. The invariant is also enforced at the two + // call-graph roots: + // • Graph.cpp:865-893 (incremental edge update): initState() is + // called before seedInitialOutputs() / operator()(). + // • RenderList.cpp:434-470 (full graph init): init() for all + // renderers runs before the first render frame fires update(). + // + // If you add prepare() to a scene producer, processControlIn becomes + // reachable BEFORE state->init() (see branch below vs. line 202) and + // any m_*_ref read inside rebuild() will observe an empty handle. + // Re-audit the producer's rebuild() ref-read sites before doing so. if constexpr(requires { state->prepare(); }) { parent.processControlIn( @@ -116,6 +234,70 @@ struct GfxRenderer final : score::gfx::GenericNodeRenderer buffer_outs.init(renderer, *state, parent); if_possible(state->init(renderer, res)); + + m_initialized = true; + } + + void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override + { + initState(renderer, res); + } + + // Called by Graph::reconcileAllRenderLists right after this renderer is + // spawned (in particular when the user live-inserts a scene-producing + // node — Camera, EnvironmentLoader, Light — into a running + // graph). Runs the node's operator()() once to populate its outputs and + // then pushes the result into every downstream sink's per-port scene + // cache immediately, rather than waiting for the first render-frame's + // upstream-input scan to find our new edge. Without this, the Camera + // live-insertion symptom is that the camera has no visible effect until + // the user stops and restarts transport (triggering a full render-list + // rebuild where every renderer's runInitialPasses runs from clean + // state). + void seedInitialOutputs(score::gfx::RenderList& renderer) override + { + if constexpr( + scene_output_introspection::size > 0 + || avnd::geometry_output_introspection::size > 0) + { + auto& parent = node(); + // Apply any control values that arrived before we were created. + // processControlIn is normally called from update() but the render + // loop won't run update() until the first frame after reconcile + // — the inserted Camera's slider defaults would leak through for + // one frame otherwise. + parent.processControlIn( + *this, *state, m_last_message, parent.last_message, parent.m_ctx); + + if_possible((*state)()); + + // Push to every existing output edge on scene/geometry ports. The + // upload helpers look at edge.sink to find the downstream renderer + // and call its NodeRenderer::process(port, scene_spec, source) — + // seeding exactly the same m_portScenes slot the first runInitialPasses + // would have filled one frame later. + // + // Scene and geometry ports both stamp score::gfx::Types::Geometry (per + // port_to_type_enum in GpuUtils.hpp — Process::GeometryInlet carries + // either a geometry or a full scene by design). Dispatching on the + // runtime port->type can never see Types::Scene, so we branch on + // compile-time introspection instead. Each upload helper is a no-op + // for nodes that don't have the corresponding output kind, and both + // branches can fire for nodes with mixed outputs. + const auto& outs = parent.output; + for(std::size_t i = 0; i < outs.size(); ++i) + { + auto* port = outs[i]; + if(!port || port->edges.empty()) + continue; + if constexpr(scene_output_introspection::size > 0) + for(auto* edge : port->edges) + scene_outs.upload(renderer, *this->state, *edge); + if constexpr(avnd::geometry_output_introspection::size > 0) + for(auto* edge : port->edges) + geometry_outs.upload(renderer, *this->state, *edge); + } + } } void update( @@ -145,8 +327,11 @@ struct GfxRenderer final : score::gfx::GenericNodeRenderer } } - void release(score::gfx::RenderList& r) override + void releaseState(score::gfx::RenderList& r) override { + if(!m_initialized) + return; + if constexpr(avnd::texture_input_introspection::size > 0) texture_ins.release(); @@ -159,12 +344,38 @@ struct GfxRenderer final : score::gfx::GenericNodeRenderer if constexpr(avnd::geometry_input_introspection::size > 0) geometry_ins.release(r); + if constexpr(scene_input_introspection::size > 0) + scene_ins.release(r); + + // Symmetric with the other *_outs.release calls above. No-ops today + // (scene_outputs_storage / geometry_outputs_storage own no QRhi + // resources — scene_spec is value-semantics + a shared_ptr; geometry + // wraps non-owning pointers + transform values). Wired so future + // RHI handles on the storages release cleanly. + if constexpr(avnd::geometry_output_introspection::size > 0) + geometry_outs.release(r); + if constexpr(scene_output_introspection::size > 0) + scene_outs.release(r); + if constexpr(avnd::texture_input_introspection::size > 0 || avnd::texture_output_introspection::size > 0) { this->defaultRelease(r); } if_possible(state->release(r)); + + // Clear the optional renderlist backchannel. Paired with the init + // assignment; same SFINAE guard so nodes without the member are + // unaffected. + if constexpr(requires { state->renderlist = nullptr; }) + state->renderlist = nullptr; + + m_initialized = false; + } + + void release(score::gfx::RenderList& r) override + { + releaseState(r); } void inputAboutToFinish( @@ -197,59 +408,112 @@ struct GfxRenderer final : score::gfx::GenericNodeRenderer auto& parent = node(); auto& rhi = *renderer.state.rhi; - if constexpr( - avnd::texture_input_introspection::size > 0 - || avnd::buffer_input_introspection::size > 0 - || avnd::geometry_input_introspection::size > 0) + // runInitialPasses is called once PER OUTGOING EDGE per frame. The + // expensive work below — rhi.finish() sync point, input readbacks, + // operator()(), and output buffer/texture uploads — only needs to run + // ONCE per frame: its result lives in `*this->state` and the storages, + // identical for every edge. We dedupe on RenderList::frame, which is + // bumped exactly once at the end of each RenderList::render() (see + // RenderList.cpp). This is NOT a transport-date gate: it does not + // freeze scene producers when the transport is paused (token.date + // frozen) — operator()() still re-runs every frame so live parameter + // edits take effect immediately. The per-edge geometry/scene uploads + // (which genuinely differ per edge — they target edge.sink) run for + // EVERY edge, below the guard. + const bool firstEdgeThisFrame = (renderer.frame != m_last_frame); + if(firstEdgeThisFrame) { - // FIXME: for geometry, here we should optimize if we know we aren't going to need them on the CPU, OR if it is a type ? - // Insert a synchronisation point to allow readbacks to complete - rhi.finish(); - } + m_last_frame = renderer.frame; - // If we are paused, we don't run the processor implementation. - if(parent.last_message.token.date == m_last_time) - return; - m_last_time = parent.last_message.token.date; + if constexpr( + avnd::texture_input_introspection::size > 0 + || avnd::buffer_input_introspection::size > 0 + || avnd::geometry_input_introspection::size > 0) + { + // FIXME: for geometry, here we should optimize if we know we aren't going to need them on the CPU, OR if it is a type ? + // Insert a synchronisation point to allow readbacks to complete + rhi.finish(); + } - if constexpr(avnd::texture_input_introspection::size > 0) - texture_ins.runInitialPasses(*this, rhi); - if constexpr(avnd::buffer_input_introspection::size > 0) - buffer_ins.readInputBuffers(renderer, parent, *state); - if constexpr(avnd::geometry_input_introspection::size > 0) - geometry_ins.readInputGeometries(renderer, this->geometry, parent, *state); + if constexpr(avnd::texture_input_introspection::size > 0) + texture_ins.runInitialPasses(*this, rhi); + if constexpr(avnd::buffer_input_introspection::size > 0) + buffer_ins.readInputBuffers(renderer, parent, *state); + if constexpr(avnd::geometry_input_introspection::size > 0) + geometry_ins.readInputGeometries(renderer, this->geometry, parent, *state); + if constexpr(scene_input_introspection::size > 0) + scene_ins.readInputScenes(this->scene, *state); - buffer_outs.prepareUpload(*res); + buffer_outs.prepareUpload(*res); - // Run the processor - if_possible(state->runInitialPasses(renderer, commands, res, edge)); - if_possible((*state)()); + // Run the processor + if_possible(state->runInitialPasses(renderer, commands, res, edge)); + if_possible((*state)()); - // Upload output buffers - if constexpr(avnd::buffer_output_introspection::size > 0) - buffer_outs.upload(renderer, *state, *res); + // Upload output buffers + if constexpr(avnd::buffer_output_introspection::size > 0) + buffer_outs.upload(renderer, *state, *res); - // Upload output textures - if constexpr(avnd::texture_output_introspection::size > 0) - { - texture_outs.runInitialPasses(*this, renderer, res); + // Upload output textures + if constexpr(avnd::texture_output_introspection::size > 0) + { + texture_outs.runInitialPasses(*this, renderer, res); - commands.resourceUpdate(res); - res = renderer.state.rhi->nextResourceUpdateBatch(); + commands.resourceUpdate(res); + res = renderer.state.rhi->nextResourceUpdateBatch(); + } + + // Copy the data to the model node + parent.processControlOut(*this->state); } + // Per-edge uploads: these target the specific downstream sink + // (edge.sink) and must run for every outgoing edge, even on edges + // after the first this frame. The producer's output is already + // populated in *this->state by the once-per-frame body above. + // Copy the geometry if constexpr(avnd::geometry_output_introspection::size > 0) geometry_outs.upload(renderer, *this->state, edge); - // Copy the data to the model node - parent.processControlOut(*this->state); + // Copy the scene (travels on the same Gfx::GeometryOutlet as geometry, + // published via NodeRenderer::process(scene_spec)). + if constexpr(scene_output_introspection::size > 0) + scene_outs.upload(renderer, *this->state, edge); + } + + // Customization point for halp nodes that produce their output via + // their own GPU pipeline (post-process effects, custom rasterizers). + // + // Default GenericNodeRenderer::runRenderPass calls defaultRenderPass, + // which uses a pre-built fullscreen-quad pipeline that samples + // m_samplers[0] (the upstream input texture, set up by + // m_material.init) and writes to the consumer's per-edge RT via the + // generic_texgen_fs shader. That hard-codes "blit upstream input → + // downstream input RT" — which is fine for halp filter nodes whose + // output IS just a CPU-uploaded copy of their input, but is wrong for + // any node that did real work in runInitialPasses (writing to its own + // m_outputTex / a private RT): the framework's input-blit overwrites + // the result, so the consumer sees the unmodified upstream. + // + // When the halp class declares its own runRenderPass, we hand off to + // it. The method runs INSIDE the consumer's beginPass/endPass cycle — + // it is expected to record draw commands only (no beginPass/endPass + // on its own) targeting the currently-bound (per-edge) render target. + void runRenderPass( + score::gfx::RenderList& renderer, QRhiCommandBuffer& commands, + score::gfx::Edge& edge) override + { + if constexpr(requires { state->runRenderPass(renderer, commands, edge); }) + state->runRenderPass(renderer, commands, edge); + else + score::gfx::GenericNodeRenderer::runRenderPass(renderer, commands, edge); } }; template requires( - (avnd::texture_output_introspection::size + avnd::buffer_output_introspection::size + avnd::geometry_output_introspection::size) >= 1 + (avnd::texture_output_introspection::size + avnd::buffer_output_introspection::size + avnd::geometry_output_introspection::size + scene_output_introspection::size + avnd::gpu_render_target_output_port_output_introspection::size) >= 1 ) struct GfxNode final : CustomGfxNodeBase diff --git a/src/plugins/score-plugin-avnd/Crousti/GppCoroutines.hpp b/src/plugins/score-plugin-avnd/Crousti/GppCoroutines.hpp index 676468cded..afebcbe7a7 100644 --- a/src/plugins/score-plugin-avnd/Crousti/GppCoroutines.hpp +++ b/src/plugins/score-plugin-avnd/Crousti/GppCoroutines.hpp @@ -266,6 +266,7 @@ struct handle_update requires { C::vertex; } || requires { C::index; }) { auto buf = rhi.newBuffer(buffer_type(), usage(), command.size); + buf->setName("GppCoroutines::vbuf_or_ibuf"); buf->create(); return reinterpret_cast(buf); } @@ -279,6 +280,7 @@ struct handle_update requires { C::ubo; } || requires { C::storage; }) { auto buf = rhi.newBuffer(buffer_type(), usage(), command.size); + buf->setName("GppCoroutines::ubo_or_ssbo"); buf->create(); // Replace it in our bindings diff --git a/src/plugins/score-plugin-avnd/Crousti/GppShaders.hpp b/src/plugins/score-plugin-avnd/Crousti/GppShaders.hpp index 9456a45446..c4d8951803 100644 --- a/src/plugins/score-plugin-avnd/Crousti/GppShaders.hpp +++ b/src/plugins/score-plugin-avnd/Crousti/GppShaders.hpp @@ -154,7 +154,7 @@ struct generate_shaders , C::binding(), C::name()); - boost::pfr::for_each_field(field, write_binding{shader}); + avnd::pfr::for_each_field(field, write_binding{shader}); shader += fmt::format("}};\n\n"); } @@ -166,7 +166,7 @@ struct generate_shaders , C::binding(), C::name()); - boost::pfr::for_each_field(field, write_binding{shader}); + avnd::pfr::for_each_field(field, write_binding{shader}); shader += fmt::format("}};\n\n"); } @@ -180,24 +180,24 @@ struct generate_shaders std::string shader = "#version 450\n\n"; if constexpr(requires { lay.vertex_input; }) - boost::pfr::for_each_field(lay.vertex_input, write_input{shader}); + avnd::pfr::for_each_field(lay.vertex_input, write_input{shader}); else if constexpr(requires { typename T::vertex_input; }) - boost::pfr::for_each_field(typename T::vertex_input{}, write_input{shader}); + avnd::pfr::for_each_field(typename T::vertex_input{}, write_input{shader}); else - boost::pfr::for_each_field( + avnd::pfr::for_each_field( DefaultPipeline::layout::vertex_input{}, write_input{shader}); if constexpr(requires { lay.vertex_output; }) - boost::pfr::for_each_field(lay.vertex_output, write_output{shader}); + avnd::pfr::for_each_field(lay.vertex_output, write_output{shader}); else if constexpr(requires { typename T::vertex_output; }) - boost::pfr::for_each_field(typename T::vertex_output{}, write_output{shader}); + avnd::pfr::for_each_field(typename T::vertex_output{}, write_output{shader}); shader += "\n"; if constexpr(requires { lay.bindings; }) - boost::pfr::for_each_field(lay.bindings, write_bindings{shader}); + avnd::pfr::for_each_field(lay.bindings, write_bindings{shader}); else if constexpr(requires { typename T::bindings; }) - boost::pfr::for_each_field(typename T::bindings{}, write_bindings{shader}); + avnd::pfr::for_each_field(typename T::bindings{}, write_bindings{shader}); return shader; } @@ -208,21 +208,21 @@ struct generate_shaders std::string shader = "#version 450\n\n"; if constexpr(requires { lay.fragment_input; }) - boost::pfr::for_each_field(lay.fragment_input, write_input{shader}); + avnd::pfr::for_each_field(lay.fragment_input, write_input{shader}); else if constexpr(requires { typename T::fragment_input; }) - boost::pfr::for_each_field(typename T::fragment_input{}, write_input{shader}); + avnd::pfr::for_each_field(typename T::fragment_input{}, write_input{shader}); if constexpr(requires { lay.fragment_output; }) - boost::pfr::for_each_field(lay.fragment_output, write_output{shader}); + avnd::pfr::for_each_field(lay.fragment_output, write_output{shader}); else if constexpr(requires { typename T::fragment_output; }) - boost::pfr::for_each_field(typename T::fragment_output{}, write_output{shader}); + avnd::pfr::for_each_field(typename T::fragment_output{}, write_output{shader}); shader += "\n"; if constexpr(requires { lay.bindings; }) - boost::pfr::for_each_field(lay.bindings, write_bindings{shader}); + avnd::pfr::for_each_field(lay.bindings, write_bindings{shader}); else if constexpr(requires { typename T::bindings; }) - boost::pfr::for_each_field(typename T::bindings{}, write_bindings{shader}); + avnd::pfr::for_each_field(typename T::bindings{}, write_bindings{shader}); return shader; } @@ -250,7 +250,7 @@ struct generate_shaders fstr.resize(fstr.size() - 2); fstr += ") in;\n\n"; - boost::pfr::for_each_field(lay.bindings, write_bindings{fstr}); + avnd::pfr::for_each_field(lay.bindings, write_bindings{fstr}); return fstr; } diff --git a/src/plugins/score-plugin-avnd/Crousti/GpuComputeNode.hpp b/src/plugins/score-plugin-avnd/Crousti/GpuComputeNode.hpp index 8f5d2b6308..7ff165505e 100644 --- a/src/plugins/score-plugin-avnd/Crousti/GpuComputeNode.hpp +++ b/src/plugins/score-plugin-avnd/Crousti/GpuComputeNode.hpp @@ -65,6 +65,7 @@ struct GpuComputeRenderer final : ComputeRendererBaseType QRhiComputePipeline* m_pipeline{}; bool m_createdPipeline{}; + bool m_initialized{}; int sampler_k = 0; int ubo_k = 0; @@ -175,7 +176,7 @@ struct GpuComputeRenderer final : ComputeRendererBaseType QVarLengthArray bindings; using bindings_type = decltype(Node_T::layout::bindings); - boost::pfr::for_each_field( + avnd::pfr::for_each_field( bindings_type{}, [&](auto f) { bindings.push_back(initBinding(renderer, f)); }); srb->setBindings(bindings.begin(), bindings.end()); @@ -230,8 +231,28 @@ struct GpuComputeRenderer final : ComputeRendererBaseType createdUbos[ubo_type::binding()] = ubo; } - void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override + // Compute renderers own a single shared compute pipeline + SRB; they + // don't allocate any per-output-edge state. Edge add/remove is a no-op + // for them. These overrides are required because NodeRenderer + // ::removeOutputPass is now pure-virtual, and Graph.cpp's incremental + // path drives renderers through addOutputPass (the per-edge passes a + // compute node simply doesn't have). + void removeOutputPass(score::gfx::RenderList&, score::gfx::Edge&) override { } + void addOutputPass( + score::gfx::RenderList&, score::gfx::Edge&, QRhiResourceUpdateBatch&) override { + } + + // All edge-independent setup lives in initState(), mirroring + // CustomGpuRenderer in GpuNode.hpp. The incremental edge-rewire path + // (Graph.cpp) only calls initState()/releaseState()/addOutputPass() on + // newly-spawned renderers; a compute node inserted live would otherwise + // never allocate its pipeline/SRB and run against uninitialised state. + void initState(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override + { + if(m_initialized) + return; + auto& parent = node(); if constexpr(requires { state->prepare(); }) { @@ -255,6 +276,13 @@ struct GpuComputeRenderer final : ComputeRendererBaseType SCORE_ASSERT(m_pipeline->create()); m_createdPipeline = true; } + + m_initialized = true; + } + + void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override + { + initState(renderer, res); } std::vector tmp; @@ -337,8 +365,11 @@ struct GpuComputeRenderer final : ComputeRendererBaseType } } - void release(score::gfx::RenderList& r) override + void releaseState(score::gfx::RenderList& r) override { + if(!m_initialized) + return; + m_createdPipeline = false; // Release the object's internal states @@ -382,6 +413,13 @@ struct GpuComputeRenderer final : ComputeRendererBaseType sampler_k = 0; ubo_k = 0; + + m_initialized = false; + } + + void release(score::gfx::RenderList& r) override + { + releaseState(r); } void runCompute( diff --git a/src/plugins/score-plugin-avnd/Crousti/GpuNode.hpp b/src/plugins/score-plugin-avnd/Crousti/GpuNode.hpp index c202ab2d27..83827b5f9b 100644 --- a/src/plugins/score-plugin-avnd/Crousti/GpuNode.hpp +++ b/src/plugins/score-plugin-avnd/Crousti/GpuNode.hpp @@ -6,6 +6,7 @@ #include #include #include +#include #include // #include @@ -27,9 +28,17 @@ struct CustomGpuRenderer final : score::gfx::NodeRenderer score::gfx::PassMap m_p; + // Per-pass "pipeline + SRB created" flags, kept index-parallel with m_p + // and `states` (same push_back in addOutputPass / same erase in + // removeOutputPass). A single global m_createdPipeline could not handle + // a pass added live onto an update()-driven node: the first frame would + // (re)create already-live passes, or skip the new one entirely. Each + // pass now gates its own srb->create()/pipeline->create(). + ossia::small_vector m_passCreated; + score::gfx::MeshBuffers m_meshBuffer{}; - bool m_createdPipeline{}; + QRhiShaderResourceBindings* m_srb{}; int sampler_k = 0; ossia::flat_map createdUbos; @@ -113,14 +122,14 @@ struct CustomGpuRenderer final : score::gfx::NodeRenderer if constexpr(requires { decltype(Node_T::layout::bindings){}; }) { using bindings_type = decltype(Node_T::layout::bindings); - boost::pfr::for_each_field(bindings_type{}, [&](auto f) { + avnd::pfr::for_each_field(bindings_type{}, [&](auto f) { bindings.push_back(initBinding(renderer, f)); }); } else if constexpr(requires { sizeof(typename Node_T::layout::bindings); }) { using bindings_type = typename Node_T::layout::bindings; - boost::pfr::for_each_field(bindings_type{}, [&](auto f) { + avnd::pfr::for_each_field(bindings_type{}, [&](auto f) { bindings.push_back(initBinding(renderer, f)); }); } @@ -201,18 +210,18 @@ struct CustomGpuRenderer final : score::gfx::NodeRenderer createdUbos[ubo_type::binding()] = ubo; } - void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override + void initState(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override { - auto& parent = node(); - if constexpr(requires { states[0].prepare(); }) - { - for(auto& state : states) - { - parent.processControlIn( - *this, *state, m_last_message, parent.last_message, parent.m_ctx); - state.prepare(); - } - } + if(m_initialized) + return; + + // NB: prepare()/processControlIn for graphics nodes is NOT invoked + // here — `states` is empty at initState time (states are constructed + // per-edge in addOutputPass), so there is nothing to prepare. The old + // `states[0].prepare()` detection was also doubly-wrong: `states[0]` + // is a shared_ptr, so the requires-expression never matched, and even + // if it had, indexing an empty vector is UB. The prepare/control-in + // call now happens in addOutputPass right after each state is built. if(m_meshBuffer.buffers.empty()) { @@ -224,34 +233,154 @@ struct CustomGpuRenderer final : score::gfx::NodeRenderer avnd::input_introspection::for_all( [this, &renderer](auto f) { init_input(renderer, f); }); - // Create the initial srbs - // TODO when implementing multi-pass, we may have to - // move this back inside the loop below as they may depend on the pipelines... - auto srb = initBindings(renderer); + // Create the shared shader resource bindings + m_srb = initBindings(renderer); - // Create the states and pipelines - for(score::gfx::Edge* edge : parent.output[0]->edges) + m_initialized = true; + } + + void addOutputPass( + score::gfx::RenderList& renderer, score::gfx::Edge& edge, + QRhiResourceUpdateBatch& res) override + { + auto& parent = node(); + auto rt = renderer.renderTargetForOutput(edge); + if(rt.renderTarget) { - auto rt = renderer.renderTargetForOutput(*edge); - if(rt.renderTarget) + states.push_back(std::make_shared()); + prepareNewState(states.back(), parent); + + // Graphics nodes that declare prepare(): apply any pending control + // input and run prepare() on the freshly-constructed state, here — + // not in initState, where `states` is still empty. Detection uses + // operator-> because states.back() is a shared_ptr. + if constexpr(requires { states.back()->prepare(); }) { - states.push_back(std::make_shared()); - prepareNewState(states.back(), parent); + parent.processControlIn( + *this, *states.back(), m_last_message, parent.last_message, parent.m_ctx); + states.back()->prepare(); + } + + auto ps = createRenderPipeline(renderer, rt); + ps->setShaderResourceBindings(m_srb); + + m_p.emplace_back(&edge, score::gfx::Pass{rt, score::gfx::Pipeline{ps, m_srb}, nullptr}); + m_passCreated.push_back(false); + + // No update step: we can directly create this pass's pipeline here. + // The SRB is shared across all passes (m_srb); creating it is + // idempotent for our purposes, and the per-pass flag tracks the + // pipeline that is genuinely per-edge. + if constexpr(!requires { &Node_T::update; }) + { + SCORE_ASSERT(m_srb->create()); + SCORE_ASSERT(ps->create()); + m_passCreated.back() = true; + } + } + } - auto ps = createRenderPipeline(renderer, rt); - ps->setShaderResourceBindings(srb); + bool hasOutputPassForEdge(score::gfx::Edge& edge) const override + { + return ossia::find_if(m_p, [&](const auto& p) { return p.first == &edge; }) + != m_p.end(); + } + + void removeOutputPass(score::gfx::RenderList&, score::gfx::Edge& edge) override + { + // Mirror addOutputPass: each edge owns one entry in m_p (pipeline + + // SRB) and one parallel entry in `states`. Release both. The shared + // m_srb pointer is owned by initState; Pass::p.srb refers to the + // SAME pointer (see addOutputPass), so null it out before + // Pipeline::release() to avoid double-deleteLater of the shared SRB. + auto it + = ossia::find_if(m_p, [&](const auto& p) { return p.first == &edge; }); + if(it == m_p.end()) + return; + const auto idx = std::distance(m_p.begin(), it); + it->second.p.srb = nullptr; // shared with siblings — owned by initState + it->second.release(); + m_p.erase(it); + if((std::size_t)idx < states.size()) + states.erase(states.begin() + idx); + if((std::size_t)idx < m_passCreated.size()) + m_passCreated.erase(m_passCreated.begin() + idx); + } + + void releaseState(score::gfx::RenderList& r) override + { + if(!m_initialized) + return; - m_p.emplace_back(edge, score::gfx::Pipeline{ps, srb}); + m_passCreated.clear(); - // No update step: we can directly create the pipeline here - if constexpr(!requires { &Node_T::update; }) + // Release the object's internal states + if constexpr(requires { &Node_T::release; }) + { + for(auto& state : states) + { + for(auto& promise : state->release()) { - SCORE_ASSERT(srb->create()); - SCORE_ASSERT(ps->create()); - m_createdPipeline = true; + gpp::qrhi::handle_release handler{*r.state.rhi}; + visit(handler, promise.current_command); } } } + states.clear(); + + // Release the allocated mesh buffers + m_meshBuffer = {}; + + // Release the allocated textures + for(auto& [id, tex] : this->createdTexs) + tex->deleteLater(); + this->createdTexs.clear(); + + // Release the allocated samplers + for(auto& [id, sampl] : this->createdSamplers) + sampl->deleteLater(); + this->createdSamplers.clear(); + + // Release the allocated ubos + for(auto& [id, ubo] : this->createdUbos) + ubo->deleteLater(); + this->createdUbos.clear(); + + // Release the allocated rts + for(auto [port, rt] : m_rts) + rt.release(); + m_rts.clear(); + + // Release the allocated pipelines. Each Pass::p.srb refers to the + // SAME shared m_srb (see addOutputPass); null it out per-pass before + // Pipeline::release() so the shared SRB isn't deleteLater'd once per + // pass (it survived previously only via QRhi's QSet dedup), then + // delete it exactly once below — covering the m_p-empty case too, + // which formerly leaked m_srb. Mirrors removeOutputPass. + for(auto& pass : m_p) + { + pass.second.p.srb = nullptr; // shared — owned by initState + pass.second.release(); + } + m_p.clear(); + if(m_srb) + m_srb->deleteLater(); + m_srb = nullptr; + + m_meshBuffer = {}; + + sampler_k = 0; + + m_initialized = false; + } + + void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override + { + initState(renderer, res); + + auto& parent = node(); + for(score::gfx::Edge* edge : parent.output[0]->edges) + addOutputPass(renderer, *edge, res); } std::vector tmp; @@ -289,14 +418,22 @@ struct CustomGpuRenderer final : score::gfx::NodeRenderer // as we have to take into account that buffers could be allocated, freed, etc. // and thus updated in the shader resource bindings SCORE_ASSERT(states.size() == m_p.size()); + SCORE_ASSERT(states.size() == m_passCreated.size()); //SCORE_SOFT_ASSERT(state.size() == edges); for(int k = 0; k < states.size(); k++) { auto& state = *states[k]; auto& pass = m_p[k].second; + // Per-pass creation flag: a pass added live (e.g. a new output + // edge onto an update()-driven node) starts at false and gets its + // srb/pipeline created on the next update; passes already live + // keep their pipeline. A single global flag would skip the new + // pass entirely (or needlessly destroy the live ones). + const bool created = m_passCreated[k]; + bool srb_touched{false}; - tmp.assign(pass.srb->cbeginBindings(), pass.srb->cendBindings()); + tmp.assign(pass.p.srb->cbeginBindings(), pass.p.srb->cendBindings()); for(auto& promise : state.update()) { using ret_type = decltype(promise.feedback_value); @@ -307,75 +444,24 @@ struct CustomGpuRenderer final : score::gfx::NodeRenderer if(srb_touched) { - if(m_createdPipeline) - pass.srb->destroy(); + if(created) + pass.p.srb->destroy(); - pass.srb->setBindings(tmp.begin(), tmp.end()); + pass.p.srb->setBindings(tmp.begin(), tmp.end()); } - if(!m_createdPipeline) + if(!created) { - SCORE_ASSERT(pass.srb->create()); - SCORE_ASSERT(pass.pipeline->create()); + SCORE_ASSERT(pass.p.srb->create()); + SCORE_ASSERT(pass.p.pipeline->create()); + m_passCreated[k] = true; } } - m_createdPipeline = true; tmp.clear(); } } - void release(score::gfx::RenderList& r) override - { - m_createdPipeline = false; - - // Release the object's internal states - if constexpr(requires { &Node_T::release; }) - { - for(auto& state : states) - { - for(auto& promise : state->release()) - { - gpp::qrhi::handle_release handler{*r.state.rhi}; - visit(handler, promise.current_command); - } - } - } - states.clear(); - - // Release the allocated mesh buffers - m_meshBuffer = {}; - - // Release the allocated textures - for(auto& [id, tex] : this->createdTexs) - tex->deleteLater(); - this->createdTexs.clear(); - - // Release the allocated samplers - for(auto& [id, sampl] : this->createdSamplers) - sampl->deleteLater(); - this->createdSamplers.clear(); - - // Release the allocated ubos - for(auto& [id, ubo] : this->createdUbos) - ubo->deleteLater(); - this->createdUbos.clear(); - - // Release the allocated rts - // TODO investigate why reference does not work here: - for(auto [port, rt] : m_rts) - rt.release(); - m_rts.clear(); - - // Release the allocated pipelines - for(auto& pass : m_p) - pass.second.release(); - m_p.clear(); - - m_meshBuffer = {}; - m_createdPipeline = false; - - sampler_k = 0; - } + void release(score::gfx::RenderList& r) override { releaseState(r); } void runInitialPasses( score::gfx::RenderList& renderer, QRhiCommandBuffer& commands, diff --git a/src/plugins/score-plugin-avnd/Crousti/GpuUtils.hpp b/src/plugins/score-plugin-avnd/Crousti/GpuUtils.hpp index a18b24f0fa..99ce89117a 100644 --- a/src/plugins/score-plugin-avnd/Crousti/GpuUtils.hpp +++ b/src/plugins/score-plugin-avnd/Crousti/GpuUtils.hpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -35,6 +36,7 @@ #include #include #include +#include #include @@ -170,6 +172,8 @@ struct GpuProcessIns { using node_type = std::remove_cvref_t; auto& node = const_cast(gpu.node()); + if(field_index >= mess.input.size()) + return; auto val = ossia::get_if(&mess.input[field_index]); if(!val) return; @@ -181,6 +185,8 @@ struct GpuProcessIns { using node_type = std::remove_cvref_t; auto& node = const_cast(gpu.node()); + if(field_index >= mess.input.size()) + return; auto val = ossia::get_if(&mess.input[field_index]); if(!val) return; @@ -190,10 +196,24 @@ struct GpuProcessIns template void operator()(Field& t, avnd::field_index field_index) { - using node_type = std::remove_cvref_t; - auto& node = const_cast(gpu.node()); + // Intentional no-op. Geometry data flows through its own publish path + // (geometry_inputs_storage::readInputGeometries / etc.); the + // GpuProcessIns visitor only handles per-message control fields + // (texture/parameter) — geometry data is not in the control message. + // The empty body keeps GpuProcessIns instantiable for nodes whose + // input list contains geometry fields without forcing them to hit + // the `= delete` catch-all at the end of this struct. + } - // FIXME + template + void operator()(Field& t, avnd::field_index field_index) + { + // Intentional no-op — same reasoning as the geometry_port overload above. + // Scene data flows through scene_inputs_storage / scene_outputs_storage + // separately; GpuProcessIns only handles per-message control fields. + // The empty body keeps GpuProcessIns instantiable for nodes whose + // input list contains scene_port fields without hitting the `= delete` + // catch-all at the end of this struct. } void operator()(auto& t, auto field_index) = delete; @@ -423,12 +443,24 @@ struct port_to_type_enum { return score::gfx::Types::Image; } + template + constexpr auto operator()(avnd::field_reflection p) + { + return score::gfx::Types::Image; + } template constexpr auto operator()(avnd::field_reflection p) { return score::gfx::Types::Geometry; } + // Scene ports reuse Types::Geometry — a scene is a richer form of geometry. + template + requires(!avnd::geometry_port) + constexpr auto operator()(avnd::field_reflection p) + { + return score::gfx::Types::Geometry; + } template constexpr auto operator()(avnd::field_reflection p) { @@ -459,9 +491,25 @@ struct port_to_type_enum { using value_type = std::remove_cvref_t; - if constexpr(std::is_aggregate_v) + if constexpr(std::is_array_v) + { + static constexpr int sz = sizeof(value_type) / sizeof(value_type{}[0]); + if constexpr(sz == 2) + { + return score::gfx::Types::Vec2; + } + else if constexpr(sz == 3) + { + return score::gfx::Types::Vec3; + } + else if constexpr(sz == 4) + { + return score::gfx::Types::Vec4; + } + } + else if constexpr(std::is_aggregate_v) { - constexpr int sz = boost::pfr::tuple_size_v; + static constexpr int sz = avnd::pfr::tuple_size_v; if constexpr(sz == 2) { return score::gfx::Types::Vec2; @@ -484,19 +532,71 @@ struct port_to_type_enum } }; +// Compile-time port flags derived from a field's declarative metadata. +// Inspects: +// - `texture_target` (texture_kind_of) — non-2D textures bypass the +// local-RT allocation and grab the upstream texture directly. +// - `samplable_depth` (samplable_depth_of) — opt-in to having the +// framework allocate a sampleable depth attachment on the producing +// edge's RT and expose its handle through `texture.depth_handle`, +// mirroring the semantics CSF/ISF shaders get via "DEPTH": true. +template +constexpr score::gfx::Flag port_flags_for_field() noexcept +{ + if constexpr(avnd::gpu_texture_port) + { + constexpr auto kind = halp::texture_kind_of(); + constexpr bool nonD2 = (kind != halp::texture_kind::texture_2d); + constexpr bool depth = halp::samplable_depth_of(); + if constexpr(nonD2 && depth) + return score::gfx::Flag::GrabsFromSource | score::gfx::Flag::SamplableDepth; + else if constexpr(nonD2) + return score::gfx::Flag::GrabsFromSource; + else if constexpr(depth) + return score::gfx::Flag::SamplableDepth; + } + return score::gfx::Flag{}; +} + +// Map QRhi's depth-format taxonomy onto halp's depth_format_t. +// The 4-arg subset matches every depth format score's createRenderTarget +// can produce (today always D32F, but the API accepts the others). +inline constexpr halp::gpu_texture::depth_format_t qrhiToHalpDepthFormat( + QRhiTexture::Format f) noexcept +{ + using D = halp::gpu_texture::depth_format_t; + switch(f) + { + case QRhiTexture::D16: return D::D16; + case QRhiTexture::D24: return D::D24; + case QRhiTexture::D24S8: return D::D24S8; + case QRhiTexture::D32F: return D::D32F; + default: break; + } + return D::D32F; +} + template inline void initGfxPorts(auto* self, auto& input, auto& output) { avnd::input_introspection::for_all( [self, &input](avnd::field_reflection f) { static constexpr auto type = port_to_type_enum{}(f); - input.push_back(new score::gfx::Port{self, {}, type, {}, {}}); + static constexpr auto flags = port_flags_for_field(); + input.push_back(new score::gfx::Port{self, {}, type, flags, {}}); }); avnd::output_introspection::for_all( [self, &output](avnd::field_reflection f) { static constexpr auto type = port_to_type_enum{}(f); - output.push_back(new score::gfx::Port{self, {}, type, {}, {}}); + // port_flags_for_field encodes INPUT-side sink semantics + // (GrabsFromSource → "sample the upstream's texture directly"; + // SamplableDepth → "ask the producer for a sampleable depth + // attachment"). Neither has any meaning on an OUTPUT port — emitting + // them here would make the graph treat this node's own output as if it + // grabbed from / sampled some upstream source. Outputs carry no such + // flags. + output.push_back(new score::gfx::Port{self, {}, type, score::gfx::Flag{}, {}}); }); } @@ -690,6 +790,13 @@ struct geometry_inputs_storage allocated.push_back(buf); meshes.buffers[buffer_index] = buf; } + else if(auto* existing = meshes.buffers[buffer_index]; + existing && existing->size() < bytesize) + { + // Buffer exists but is too small — resize it. + existing->setSize(bytesize); + existing->create(); + } res->uploadStaticBuffer(meshes.buffers[buffer_index], 0, bytesize, data); }, [&](auto& write_buf, int buffer_index, void* handle) { @@ -727,9 +834,11 @@ template requires(avnd::geometry_input_introspection::size == 0) struct geometry_inputs_storage { - static void readInputBuffers(auto&&...) { } + static void readInputGeometries(auto&&...) { } static void inputAboutToFinish(auto&&...) { } + + static void release(auto&&...) { } }; template @@ -1034,7 +1143,7 @@ struct texture_inputs_storage template QRhiTexture* createInput( score::gfx::RenderList& renderer, score::gfx::Port* port, Tex& texture_spec, - const score::gfx::RenderTargetSpecs& spec) + const score::gfx::RenderTargetSpecs& spec, bool wantsSamplableDepth = false) { static constexpr auto flags = QRhiTexture::RenderTarget | QRhiTexture::UsedAsTransferSource; @@ -1054,8 +1163,14 @@ struct texture_inputs_storage fmt, spec.size, 1, flags); SCORE_ASSERT(texture->create()); + // wantsSamplableDepth implies wantsDepth: createRenderTarget allocates + // a sampleable single-sample depth texture (with MSAA-resolve when + // available) instead of a renderbuffer / non-resolve depth target. + // Same shape ISF/CSF inputs get when their port has SamplableDepth. + const bool wantsDepth = renderer.requiresDepth(*port) || wantsSamplableDepth; m_rts[port] = score::gfx::createRenderTarget( - renderer.state, texture, renderer.samples(), renderer.requiresDepth(*port)); + renderer.state, texture, renderer.samples(), + wantsDepth, wantsSamplableDepth); return texture; } @@ -1065,6 +1180,21 @@ struct texture_inputs_storage avnd::texture_input_introspection::for_all_n2( avnd::get_inputs(*self.state), [&](F& t, avnd::predicate_index, avnd::field_index) { + // Non-2D GPU texture inputs (cube / array / 3D) don't get a local + // render target — the port carries Flag::GrabsFromSource (set by + // initGfxPorts via texture_kind_of()), the graph will populate + // t.texture.handle through updateInputTexture when the edge + // resolves. Skipping the allocation here avoids wasting a 2D + // colour attachment that would never be rendered into anyway. + if constexpr(avnd::gpu_texture_port + && halp::texture_kind_of() != halp::texture_kind::texture_2d) + { + t.texture.kind = halp::texture_kind_of(); + // Handle + size populated later by updateInputTexture once the + // upstream is resolved. + return; + } + auto& parent = self.node(); auto spec = parent.resolveRenderTargetSpecs(N, renderer); if constexpr(requires { @@ -1076,7 +1206,10 @@ struct texture_inputs_storage spec.size.rheight() = t.request_height; } - auto tex = createInput(renderer, parent.input[N], t.texture, spec); + constexpr bool wantsSamplableDepth + = avnd::gpu_texture_port && halp::samplable_depth_of(); + auto tex = createInput( + renderer, parent.input[N], t.texture, spec, wantsSamplableDepth); if constexpr(avnd::cpu_texture_port) { t.texture.width = spec.size.width(); @@ -1087,6 +1220,16 @@ struct texture_inputs_storage t.texture.handle = tex; t.texture.width = spec.size.width(); t.texture.height = spec.size.height(); + if constexpr(wantsSamplableDepth) + { + // The local RT just allocated owns a sampleable depth texture + // that the upstream renders into when the edge runs — same + // pointer, stable for the RT's lifetime, no per-frame refresh. + const auto& rt = m_rts[parent.input[N]]; + t.texture.depth_handle = rt.depthTexture; + if(rt.depthTexture) + t.texture.depth_format = qrhiToHalpDepthFormat(rt.depthTexture->format()); + } } }); } @@ -1196,7 +1339,7 @@ struct texture_inputs_storage template static QRhiTexture* updateTexture(auto& self, score::gfx::RenderList& renderer, int k, const Tex& cpu_tex) { - auto& [sampler, texture] = self.m_samplers[k]; + auto& [sampler, texture, fb_] = self.m_samplers[k]; if(texture) { auto sz = texture->pixelSize(); @@ -1213,8 +1356,8 @@ static QRhiTexture* updateTexture(auto& self, score::gfx::RenderList& renderer, QRhiTexture::Flag{}); newtex->create(); for(auto& [edge, pass] : self.m_p) - if(pass.srb) - score::gfx::replaceTexture(*pass.srb, sampler, newtex); + if(pass.p.srb) + score::gfx::replaceTexture(*pass.p.srb, sampler, newtex); texture = newtex; if(oldtex && oldtex != &renderer.emptyTexture()) @@ -1227,8 +1370,8 @@ static QRhiTexture* updateTexture(auto& self, score::gfx::RenderList& renderer, else { for(auto& [edge, pass] : self.m_p) - if(pass.srb) - score::gfx::replaceTexture(*pass.srb, sampler, &renderer.emptyTexture()); + if(pass.p.srb) + score::gfx::replaceTexture(*pass.p.srb, sampler, &renderer.emptyTexture()); return &renderer.emptyTexture(); } @@ -1362,7 +1505,7 @@ struct texture_outputs_storage void release(auto& self, score::gfx::RenderList& r) { // Free outputs - for(auto& [sampl, texture] : self.m_samplers) + for(auto& [sampl, texture, fb_] : self.m_samplers) { if(texture != &r.emptyTexture()) texture->deleteLater(); @@ -1491,7 +1634,7 @@ struct geometry_outputs_storage SCORE_ASSERT(it != edge_sink->node->input.end()); int n = it - edge_sink->node->input.begin(); - rendered_node->second->process(n, spc); + rendered_node->second->process(n, spc, edge.source); // 3. Same for transform3d @@ -1518,6 +1661,12 @@ struct geometry_outputs_storage avnd::get_outputs(state), [&](auto& field, auto pred) { this->upload(renderer, field, edge, pred); }); } + + // Lifecycle parity with the other *_outs storages. The geometry_spec + // wrapper carries non-owning pointers + transform values today, so + // release is a no-op — wired so future RHI handles on the storage + // release cleanly. + void release(score::gfx::RenderList&) noexcept { } }; @@ -1529,7 +1678,131 @@ struct geometry_outputs_storage { } + static void release(auto&&...) noexcept { } +}; + +// Scene output support (Crousti-side pending promotion to avendish). +// The `scene_port` concept and `scene_dirt_flags` live in SceneConcepts.hpp +// so the port-creation visitor in ProcessModelPortInit.hpp can reuse them. + +template +using is_scene_port_t = boost::mp11::mp_bool>; + +template +using scene_output_introspection = + avnd::predicate_introspection::type, is_scene_port_t>; + +template +using scene_input_introspection = + avnd::predicate_introspection::type, is_scene_port_t>; + +// Scene input transport: NodeRenderer::process(port, scene_spec, source) +// already merges multi-producer scenes into `this->scene`, so scene_inputs_storage +// only needs to copy that merged scene_spec into each halp scene input field +// before operator()() runs. Cheap (shared_ptr assignment), no decode. +template +struct scene_inputs_storage; + +template + requires(scene_input_introspection::size > 0) +struct scene_inputs_storage +{ + void readInputScenes(const ossia::scene_spec& scene, auto& state) + { + scene_input_introspection::for_all( + avnd::get_inputs(state), [&](auto& field) { field.scene = scene; }); + } + + static void release(score::gfx::RenderList&) { } +}; + +template + requires(scene_input_introspection::size == 0) +struct scene_inputs_storage +{ + static void readInputScenes(auto&&...) { } + static void release(auto&&...) { } +}; + +template +struct scene_outputs_storage; + +template + requires(scene_output_introspection::size > 0) +struct scene_outputs_storage +{ + template + void upload( + score::gfx::RenderList& renderer, Field& ctrl, score::gfx::Edge& edge, + avnd::predicate_index) + { + // Publish the scene every frame. The old behaviour skipped the push + // when `ctrl.dirty == 0` — but that broke multi-producer graphs: any + // other producer on the same downstream inlet (e.g. a legacy Geometry + // outlet of the same loader, or a Light node) pushes every frame + // unconditionally, and the consumer's NodeRenderer::process(...) logic + // replaces `this->scene` on the first push of each frame when + // `sceneChanged` is false (i.e. at frame start). A once-only scene push + // then gets overwritten every subsequent frame and its transforms are + // lost. Downstream consumers already short-circuit via shared_ptr + // identity + version (ScenePreprocessor checks m_cachedSceneState), so + // pushing every frame is cheap — just a few atomic refcount bumps. + // + // Producers can still use `ctrl.dirty` to track what changed for their + // own purposes; we don't consume the bits here anymore. + if(!ctrl.scene.state) + return; + + auto* edge_sink = edge.sink; + if(!edge_sink || !edge_sink->node) + return; + + auto rendered_node = edge_sink->node->renderedNodes.find(&renderer); + if(rendered_node == edge_sink->node->renderedNodes.end()) + return; + + auto it = std::find( + edge_sink->node->input.begin(), edge_sink->node->input.end(), edge_sink); + if(it == edge_sink->node->input.end()) + return; + int n = it - edge_sink->node->input.begin(); + + // NodeRenderer::process(port, scene_spec, source_key) handles additive + // merging across multiple producers converging on the same sink port + // (keyed on the source edge's producer Port pointer), extracts a legacy + // geometry_spec for downstream consumers that only understand geometry, + // and sets sceneChanged=true. + rendered_node->second->process(n, ctrl.scene, edge.source); + + if constexpr(requires { ctrl.dirty; }) + ctrl.dirty = 0; + } + + void upload(score::gfx::RenderList& renderer, auto& state, score::gfx::Edge& edge) + { + scene_output_introspection::for_all_n( + avnd::get_outputs(state), + [&](auto& field, auto pred) { this->upload(renderer, field, edge, pred); }); + } + + // Lifecycle parity with texture_outputs_storage / buffer_outputs_storage: + // the storage owns no QRhi resources today (the scene_spec is a value- + // semantics struct + a shared_ptr to scene_state, both managed by their + // own destructors), so release is a documented no-op. Mirror the call + // site naming so future RHI handles added to the storage have a release + // hook ready, and so CpuFilterNode / CpuAnalysisNode releaseState calls + // are symmetric across all storages. + void release(score::gfx::RenderList&) noexcept { } +}; + +template + requires(scene_output_introspection::size == 0) +struct scene_outputs_storage +{ + static void upload(auto&&...) { } + static void release(auto&&...) noexcept { } }; + } #endif diff --git a/src/plugins/score-plugin-avnd/Crousti/Layer.hpp b/src/plugins/score-plugin-avnd/Crousti/Layer.hpp index da0ee9ef5d..74f84bffd5 100644 --- a/src/plugins/score-plugin-avnd/Crousti/Layer.hpp +++ b/src/plugins/score-plugin-avnd/Crousti/Layer.hpp @@ -15,6 +15,8 @@ #include +#include + namespace oscr { template @@ -333,13 +335,11 @@ struct LayoutBuilder final : Process::LayoutBuilderBase createdLayouts.push_back(new_l); { - using namespace boost::pfr; - using namespace boost::pfr::detail; - static constexpr int N = boost::pfr::tuple_size_v; - auto t = boost::pfr::structure_tie(item); + static constexpr int N = avnd::pfr::tuple_size_v; + auto t = avnd::pfr::detail::tie_as_tuple(item); [&](std::index_sequence) { using namespace std; - using namespace boost::pfr; + using namespace avnd::pfr; (this->walkLayout(get(t), recursive_members...), ...); }(std::make_index_sequence{}); diff --git a/src/plugins/score-plugin-avnd/Crousti/MessageBus.hpp b/src/plugins/score-plugin-avnd/Crousti/MessageBus.hpp index f358ccfbde..4bc8fb57a3 100644 --- a/src/plugins/score-plugin-avnd/Crousti/MessageBus.hpp +++ b/src/plugins/score-plugin-avnd/Crousti/MessageBus.hpp @@ -19,7 +19,7 @@ struct Serializer if constexpr(std::is_arithmetic_v) r.stream().stream << f; else if constexpr(std::is_aggregate_v) - boost::pfr::for_each_field(f, *this); + avnd::pfr::for_each_field(f, *this); else if constexpr(avnd::list_ish) { r.stream().stream << (int64_t)std::ssize(f); @@ -115,8 +115,11 @@ struct Deserializer DataStreamWriter& r; template - requires std::is_aggregate_v - void operator()(F& f) { boost::pfr::for_each_field(f, *this); } + requires std::is_aggregate_v + void operator()(F& f) + { + avnd::pfr::for_each_field(f, *this); + } template requires(std::is_arithmetic_v) diff --git a/src/plugins/score-plugin-avnd/Crousti/Metadata.hpp b/src/plugins/score-plugin-avnd/Crousti/Metadata.hpp index e8336fa5fd..8b9a2a0762 100644 --- a/src/plugins/score-plugin-avnd/Crousti/Metadata.hpp +++ b/src/plugins/score-plugin-avnd/Crousti/Metadata.hpp @@ -2,6 +2,7 @@ #include #include +#include #include @@ -154,12 +155,25 @@ struct ProcessPortVisitor this->texture(); } + template + void operator()(const avnd::field_reflection) + { + this->texture(); + } template void operator()(const avnd::field_reflection) { this->geometry(); } + // Scene ports travel through the same Process::PortType::Geometry slot. + template + requires(!avnd::geometry_port) + void operator()(const avnd::field_reflection) + { + this->geometry(); + } + template void operator()(const avnd::field_reflection) { diff --git a/src/plugins/score-plugin-avnd/Crousti/Metadatas.hpp b/src/plugins/score-plugin-avnd/Crousti/Metadatas.hpp index f617a3ede6..50348b4f58 100644 --- a/src/plugins/score-plugin-avnd/Crousti/Metadatas.hpp +++ b/src/plugins/score-plugin-avnd/Crousti/Metadatas.hpp @@ -1,35 +1,51 @@ #pragma once #include +#include #include #include -#include #include #include namespace oscr { template -concept GpuNode = avnd::texture_input_introspection::size > 0 - || avnd::texture_output_introspection::size > 0 - || avnd::buffer_input_introspection::size > 0 - || avnd::buffer_output_introspection::size > 0 - || avnd::geometry_input_introspection::size > 0 - || avnd::geometry_output_introspection::size > 0; - +concept GpuNode + = avnd::texture_input_introspection::size > 0 + || avnd::texture_output_introspection::size > 0 + || avnd::buffer_input_introspection::size > 0 + || avnd::buffer_output_introspection::size > 0 + || avnd::geometry_input_introspection::size > 0 + || avnd::geometry_output_introspection::size > 0 + || scene_input_introspection::size > 0 + || scene_output_introspection::size > 0 + || avnd::gpu_render_target_output_port_output_introspection::size > 0; + +// Halp shader nodes (vertex+fragment / compute) currently route through +// CustomGpuRenderer / GpuComputeRenderer, neither of which carries +// geometry_ / scene_ I/O storage today. Exclude nodes that declare those +// ports from the GpuGraphicsNode2 / GpuComputeNode2 dispatch so they fall +// through to GfxNode<> (which has the proper storage via CpuFilterNode / +// CpuAnalysisNode). When CustomGpuRenderer / GpuComputeRenderer gain +// dedicated scene_ / geometry_ storage, drop the requires-clause exclusion +// here and add init_input + readInput / upload paths in those renderers. template -concept GpuGraphicsNode2 = requires -{ - T::layout::graphics; -}; +concept GpuGraphicsNode2 + = requires { T::layout::graphics; } + && (avnd::geometry_input_introspection::size == 0) + && (avnd::geometry_output_introspection::size == 0) + && (scene_input_introspection::size == 0) + && (scene_output_introspection::size == 0); template -concept GpuComputeNode2 = requires -{ - T::layout::compute; -}; +concept GpuComputeNode2 + = requires { T::layout::compute; } + && (avnd::geometry_input_introspection::size == 0) + && (avnd::geometry_output_introspection::size == 0) + && (scene_input_introspection::size == 0) + && (scene_output_introspection::size == 0); template concept is_gpu = GpuNode || GpuGraphicsNode2 || GpuComputeNode2; diff --git a/src/plugins/score-plugin-avnd/Crousti/ProcessModel.hpp b/src/plugins/score-plugin-avnd/Crousti/ProcessModel.hpp index f019937881..5a1cd5be99 100644 --- a/src/plugins/score-plugin-avnd/Crousti/ProcessModel.hpp +++ b/src/plugins/score-plugin-avnd/Crousti/ProcessModel.hpp @@ -19,8 +19,6 @@ #include #include -#include - #include #include diff --git a/src/plugins/score-plugin-avnd/Crousti/ProcessModelPortInit.hpp b/src/plugins/score-plugin-avnd/Crousti/ProcessModelPortInit.hpp index 39c3de7bc4..89e6bb2931 100644 --- a/src/plugins/score-plugin-avnd/Crousti/ProcessModelPortInit.hpp +++ b/src/plugins/score-plugin-avnd/Crousti/ProcessModelPortInit.hpp @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -276,6 +277,22 @@ struct InletInitFunc #endif } + // Scene inputs reuse Gfx::GeometryInlet — a scene is a richer form of + // geometry and travels through the same Process-layer port. Mirror of the + // outlet overload below. Needed so scene-modifying halp nodes (Transform, + // SceneFilter, ...) can declare `struct { ossia::scene_spec scene; } scene_in;` + // in their inputs{} struct and get wired up by the framework. + template + requires(!avnd::geometry_port) + void operator()(const T& in, auto idx) + { +#if SCORE_PLUGIN_GFX + auto p = new Gfx::GeometryInlet(portName(), Id(inlet++), &self); + setupNewPort(in, p); + ins.push_back(p); +#endif + } + template void operator()(const avnd::field_reflection& in, auto dummy) { @@ -407,6 +424,16 @@ struct OutletInitFunc #endif } + template + void operator()(const T& out, auto idx) + { +#if SCORE_PLUGIN_GFX + auto p = new Gfx::TextureOutlet(portName(), Id(outlet++), &self); + setupNewPort(out, p); + outs.push_back(p); +#endif + } + template void operator()(const T& out, auto idx) { @@ -417,6 +444,20 @@ struct OutletInitFunc #endif } + // Scene outputs reuse Gfx::GeometryOutlet — a scene is a richer form of + // geometry that travels through the same Process-layer port. The Crousti + // upload path publishes scene_spec via NodeRenderer::process(scene_spec). + template + requires(!avnd::geometry_port) + void operator()(const T& out, auto idx) + { +#if SCORE_PLUGIN_GFX + auto p = new Gfx::GeometryOutlet(portName(), Id(outlet++), &self); + setupNewPort(out, p); + outs.push_back(p); +#endif + } + template void operator()(const T& out, auto idx) { diff --git a/src/plugins/score-plugin-avnd/Crousti/SceneConcepts.hpp b/src/plugins/score-plugin-avnd/Crousti/SceneConcepts.hpp new file mode 100644 index 0000000000..abe4e50fa0 --- /dev/null +++ b/src/plugins/score-plugin-avnd/Crousti/SceneConcepts.hpp @@ -0,0 +1,45 @@ +#pragma once + +// Scene port concept — shared between Crousti's port setup (type dispatch, +// port factory) and the GPU upload path. +// +// A halp output struct field is a "scene port" when it carries an +// `ossia::scene_spec scene` field. Scene output travels through the +// existing Gfx::GeometryOutlet / Types::Geometry: a scene is a richer form +// of geometry, same pattern as Process::TexturePort carrying any GPU +// resource. +// +// Once the design proves out, this should be promoted to avendish itself +// (3rdparty/avendish/include/avnd/concepts/gfx.hpp) under a corresponding +// scene concept alongside `geometry_port`. + +#include + +#include +#include + +namespace oscr +{ + +template +concept scene_port = requires(T t) { + { t.scene } -> std::convertible_to; +}; + +// Dirty-flag lexicon mirrors ossia::scene_port::dirt_flags so shader authors +// can signal fine-grained changes without republishing the whole scene. +// Users set bits on the halp field's `dirty` member; the upload path clears +// them after publishing. +namespace scene_dirt_flags +{ +constexpr uint8_t transform = 0x01; +constexpr uint8_t geometry = 0x02; +constexpr uint8_t materials = 0x04; +constexpr uint8_t lights = 0x08; +constexpr uint8_t animation = 0x10; +constexpr uint8_t environment = 0x20; +constexpr uint8_t structure = 0x40; +constexpr uint8_t all = 0xFF; +} + +} diff --git a/src/plugins/score-plugin-fx/Fx/VelToNote.hpp b/src/plugins/score-plugin-fx/Fx/VelToNote.hpp index 7e93b8ed3e..547fa06ebe 100644 --- a/src/plugins/score-plugin-fx/Fx/VelToNote.hpp +++ b/src/plugins/score-plugin-fx/Fx/VelToNote.hpp @@ -4,8 +4,6 @@ #include #include -#include - #include #include #include diff --git a/src/plugins/score-plugin-gfx/3rdparty/libisf/src/isf.cpp b/src/plugins/score-plugin-gfx/3rdparty/libisf/src/isf.cpp index 66cb9114c9..c4500a5da0 100644 --- a/src/plugins/score-plugin-gfx/3rdparty/libisf/src/isf.cpp +++ b/src/plugins/score-plugin-gfx/3rdparty/libisf/src/isf.cpp @@ -41,10 +41,14 @@ layout(location = 0) out vec2 isf_FragNormCoord; static constexpr auto vertexInitFunc = R"_( void isf_vertShaderInit() { - gl_Position = clipSpaceCorrMatrix * vec4( position, 0.0, 1.0 ); + gl_Position = clipSpaceCorrMatrix * vec4(position, 0.0, 1.0); isf_FragNormCoord = vec2((gl_Position.x+1.0)/2.0, (gl_Position.y+1.0)/2.0); +} + +void isf_vertShaderFinish() +{ #if defined(QSHADER_SPIRV) || defined(QSHADER_HLSL) || defined(QSHADER_MSL) - gl_Position.y = - gl_Position.y; + gl_Position.y = -gl_Position.y; #endif } )_"; @@ -53,6 +57,7 @@ void isf_vertShaderInit() void main() { isf_vertShaderInit(); + isf_vertShaderFinish(); } )_"; @@ -67,12 +72,18 @@ layout(std140, binding = 0) uniform renderer_t { mat4 clipSpaceCorrMatrix_; vec2 RENDERSIZE_; + // MSAA sample count of the active output target (1 when MSAA is off). + // Mirrors RenderList::samples(); needed because glslang strips + // gl_NumSamples under SPIR-V. _pad0 keeps the struct vec4-aligned. + int MSAA_SAMPLES_; + int _renderer_pad0_; } isf_renderer_uniforms; // This dance is needed because otherwise // spirv-cross may generate different struct names in the vertex & fragment, causing crashes.. // but we have to keep compat with ISF #define clipSpaceCorrMatrix isf_renderer_uniforms.clipSpaceCorrMatrix_ +#define MSAA_SAMPLES isf_renderer_uniforms.MSAA_SAMPLES_ // Time-dependent uniforms, only relevant during execution layout(std140, binding = 1) uniform process_t { @@ -86,6 +97,15 @@ layout(std140, binding = 1) uniform process_t { vec2 RENDERSIZE_; vec4 DATE_; + // Mirrors gl_NumWorkGroups for compute shaders. SPIRV-Cross's HLSL + // backend refuses to emit code for the NumWorkgroups built-in unless + // remap_num_workgroups_builtin() is set up on both the cross-compiler + // and the QRhi side; QShaderBaker exposes neither, so any compute + // shader using gl_NumWorkGroups silently fails to bake to HLSL on + // D3D11/D3D12. We sidestep that by routing references through this + // uniform — populated host-side just before each dispatch — and + // textually shadowing the built-in via the #define below. + uvec3 NUMWORKGROUPS_; } isf_process_uniforms; #define TIME isf_process_uniforms.TIME_ @@ -95,12 +115,29 @@ layout(std140, binding = 1) uniform process_t { #define FRAMEINDEX isf_process_uniforms.FRAMEINDEX_ #define RENDERSIZE isf_process_uniforms.RENDERSIZE_ #define DATE isf_process_uniforms.DATE_ +#define SAMPLERATE isf_process_uniforms.SAMPLERATE_ +#define gl_NumWorkGroups isf_process_uniforms.NUMWORKGROUPS_ +#define isf_NumWorkGroups isf_process_uniforms.NUMWORKGROUPS_ )_"; static constexpr auto defaultFunctions = R"_( +// GLSL's textureSize is overloaded by sampler dimensionality — sampler2D +// returns ivec2, sampler3D returns ivec3. Authors typically reach for +// TEX_DIMENSIONS regardless of 2D/3D; the *_2D / *_3D aliases below make +// the intended dimensionality explicit in shader source. #define TEX_DIMENSIONS(tex) textureSize(tex, 0) +#define TEX_DIMENSIONS_2D(tex) textureSize(tex, 0) +#define TEX_DIMENSIONS_3D(tex) textureSize(tex, 0) #define IMG_SIZE(tex) textureSize(tex, 0) +#define IMG_SIZE_3D(tex) textureSize(tex, 0) + +// IMG_CUBE(tex, dir) — canonical colour-cube read; same in both coord systems +// since a direction vector has no Y-flip. IMG_CUBE_DEPTH(tex, dir) — +// canonical depth-cube read for inputs declared DEPTH: true on a cubemap, +// hides the internal `_depth` companion binding. +#define IMG_CUBE(tex, dir) texture(tex, dir) +#define IMG_CUBE_DEPTH(tex, dir) texture(tex##_depth, dir).r #if defined(QSHADER_SPIRV) #define isf_FragCoord vec4(gl_FragCoord.x, RENDERSIZE.y - gl_FragCoord.y, gl_FragCoord.z, gl_FragCoord.w) @@ -384,6 +421,86 @@ static bool parse_input_impl(sajson::value& v, bool) return v.get_type() == sajson::TYPE_TRUE; } +// Parse sampler-config fields from a JSON input object directly (flat fields, +// no nested "SAMPLER" object). All fields optional; missing = keep default. +static void parse_sampler_config(sampler_config& s, const sajson::value& v) +{ + auto str_field = [&](const char* key, std::string& out) { + if(auto k = v.find_object_key_insensitive(sajson::literal(key)); + k != v.get_length()) + { + auto val = v.get_object_value(k); + if(val.get_type() == sajson::TYPE_STRING) + out = val.as_string(); + } + }; + auto float_field = [&](const char* key, std::optional& out) { + if(auto k = v.find_object_key_insensitive(sajson::literal(key)); + k != v.get_length()) + { + auto val = v.get_object_value(k); + if(is_number(val)) + out = (float)val.get_number_value(); + } + }; + + str_field("WRAP", s.wrap); + str_field("WRAP_S", s.wrap_s); + str_field("WRAP_T", s.wrap_t); + str_field("WRAP_R", s.wrap_r); + str_field("FILTER", s.filter); + str_field("MIN_FILTER", s.min_filter); + str_field("MAG_FILTER", s.mag_filter); + str_field("MIPMAP_MODE", s.mipmap_mode); + str_field("BORDER_COLOR", s.border_color); + str_field("COMPARE", s.compare); + float_field("ANISOTROPY", s.anisotropy); + float_field("LOD_BIAS", s.lod_bias); + float_field("MIN_LOD", s.min_lod); + float_field("MAX_LOD", s.max_lod); +} + +// Audio inputs expose only FILTER and WRAP — audio textures are 1-mip +// 2D samplers so the rest of sampler_config (COMPARE / BORDER_COLOR / LOD +// / anisotropy) has no meaningful effect. +static void parse_audio_sampler_config(audio_sampler_config& s, const sajson::value& v) +{ + auto str_field = [&](const char* key, std::string& out) { + if(auto k = v.find_object_key_insensitive(sajson::literal(key)); + k != v.get_length()) + { + auto val = v.get_object_value(k); + if(val.get_type() == sajson::TYPE_STRING) + out = val.as_string(); + } + }; + str_field("FILTER", s.filter); + str_field("WRAP", s.wrap); +} + +// Drop COMPARE from a sampler config whose texture shape has no corresponding +// *Shadow GLSL sampler type. A non-"never" COMPARE makes the runtime call +// QRhiSampler::setTextureCompareOp, which on Vulkan requires the shader-side +// binding to be a shadow sampler (compareEnable=VK_TRUE is a validation +// error otherwise) and on the other backends produces undefined reads. The +// only core-GLSL shape without a shadow variant is 3D — sampler3DShadow is +// not a core type. 2D / 2D-array / cube / cube-array all have shadow +// counterparts and are handled by the emitter. +static void drop_unsupported_compare_3d(sampler_config& s, const char* where) +{ + if(s.compare.empty()) return; + std::string c = s.compare; + for(auto& ch : c) ch = (char)tolower(ch); + if(c == "never") return; + fmt::print( + stderr, + "[isf] {}: COMPARE is set but sampler3DShadow is not a core GLSL " + "sampler type — ignoring. Use a 2D, 2D-array, cubemap or cubemap-array " + "shadow sampler instead.\n", + where); + s.compare.clear(); +} + static void parse_input(image_input& inp, const sajson::value& v) { if(auto k = v.find_object_key_insensitive(sajson::literal("DIMENSIONS")); @@ -391,15 +508,64 @@ static void parse_input(image_input& inp, const sajson::value& v) { auto val = v.get_object_value(k); if(val.get_type() == sajson::TYPE_INTEGER) - inp.dimensions = val.get_integer_value(); + { + auto d = val.get_integer_value(); + if(d != 2 && d != 3) + throw invalid_file{ + "image_input DIMENSIONS must be 2 or 3 (got " + std::to_string(d) + + "). 1D and 4D textures are not supported."}; + inp.dimensions = d; + } } if(auto k = v.find_object_key_insensitive(sajson::literal("DEPTH")); k != v.get_length()) { inp.depth = v.get_object_value(k).get_type() == sajson::TYPE_TRUE; } + if(auto k = v.find_object_key_insensitive(sajson::literal("IS_ARRAY")); + k != v.get_length()) + { + inp.is_array = v.get_object_value(k).get_type() == sajson::TYPE_TRUE; + } + else if(auto k2 = v.find_object_key_insensitive(sajson::literal("ARRAY")); + k2 != v.get_length()) + { + inp.is_array = v.get_object_value(k2).get_type() == sajson::TYPE_TRUE; + } + // STATIC: shader author opts into "upstream publishes a long-lived + // QRhiTexture, bind it directly". Engine path = same Flag::GrabsFromSource + // already used for cube / 3D / array inputs (those grab implicitly + // because they can't be 2D color attachments). For plain 2D texture + // inputs both modes are valid — RT-render (compositor pattern) is the + // safe default; STATIC: true opts into direct binding for static-LUT / + // IBL-bake / asset-cache producers (avnd gpu_texture_output, etc.). + if(auto k = v.find_object_key_insensitive(sajson::literal("STATIC")); + k != v.get_length()) + { + inp.is_static = v.get_object_value(k).get_type() == sajson::TYPE_TRUE; + } + parse_sampler_config(inp.sampler, v); + if(inp.dimensions == 3) + { + drop_unsupported_compare_3d(inp.sampler, "image input (DIMENSIONS: 3)"); + if(inp.is_array) + { + throw invalid_file{ + "image input: DIMENSIONS: 3 with ARRAY: true is not supported — " + "sampler3DArray is not a core GLSL type. Use a 3D texture and drop " + "ARRAY, or a 2D-array texture and drop DIMENSIONS: 3."}; + } + } +} +static void parse_input(cubemap_input& inp, const sajson::value& v) +{ + if(auto k = v.find_object_key_insensitive(sajson::literal("DEPTH")); + k != v.get_length()) + { + inp.depth = v.get_object_value(k).get_type() == sajson::TYPE_TRUE; + } + parse_sampler_config(inp.sampler, v); } -static void parse_input(cubemap_input& inp, const sajson::value& v) { } static void parse_input(event_input& inp, const sajson::value& v) { } @@ -419,6 +585,7 @@ static void parse_input(audio_input& inp, const sajson::value& v) } } } + parse_audio_sampler_config(inp.sampler, v); } static void parse_input(audioHist_input& inp, const sajson::value& v) @@ -437,6 +604,7 @@ static void parse_input(audioHist_input& inp, const sajson::value& v) } } } + parse_audio_sampler_config(inp.sampler, v); } // CSF-specific parsing functions @@ -497,6 +665,106 @@ static void parse_input(storage_input& inp, const sajson::value& v) if(val.get_type() == sajson::TYPE_STRING) inp.buffer_usage = val.as_string(); } + else if(k == "PERSISTENT") + { + inp.persistent = v.get_object_value(i).get_type() == sajson::TYPE_TRUE; + } + else if(k == "VISIBILITY") + { + auto val = v.get_object_value(i); + if(val.get_type() == sajson::TYPE_STRING) + inp.visibility = val.as_string(); + } + } + + // Warn on semantically-impossible combinations. PERSISTENT allocates a + // ping-pong pair and always emits `_prev` as a readonly buffer — if the + // primary is write_only, nothing ever writes the data that _prev is + // supposed to read back, so it's silently always zero. + if(inp.persistent && inp.access == "write_only") + { + throw invalid_file{ + "storage input declared as PERSISTENT + ACCESS: write_only is " + "invalid — _prev would always read zero (no read path exists to " + "populate it). Use ACCESS: read_write or read_only with PERSISTENT, " + "or drop PERSISTENT if you don't need frame history."}; + } + + // Reject empty LAYOUT for non-indirect storage_inputs. The graphics + // emit at isf_emit_graphics_storage / isf_emit_ssbo_decl produces an + // empty `readonly buffer NAME_buf { };` block which is invalid GLSL + // (`buffer { };` requires at least one member declarator). shaderc + // then fails with a cryptic message pointing at the auto-emitted + // block. uniform_input has the symmetric check at parse_input(uniform). + // Indirect-draw SSBOs LEGITIMATELY have empty LAYOUT — they are + // skipped from graphics emit (isf.cpp:3361-3363) when buffer_usage is + // non-empty. Match that gate here so legitimate indirect-draw paths + // pass through unchallenged. + if(inp.layout.empty() && inp.buffer_usage.empty()) + { + throw invalid_file{ + "storage_input declares an empty LAYOUT and no BUFFER_USAGE — " + "the SSBO graphics emit would produce `readonly buffer NAME_buf " + "{ };` which is invalid GLSL (a buffer block must have at least " + "one member declarator). Empty LAYOUT only makes sense for " + "indirect-draw SSBOs which set BUFFER_USAGE: \"indirect_draw\" " + "or \"indirect_draw_indexed\". Either declare members in LAYOUT " + "or set BUFFER_USAGE."}; + } +} + +static void parse_input(uniform_input& inp, const sajson::value& v) +{ + std::size_t N = v.get_length(); + for(std::size_t i = 0; i < N; i++) + { + auto k = v.get_object_key(i).as_string(); + if(k == "LAYOUT") + { + auto val = v.get_object_value(i); + if(val.get_type() == sajson::TYPE_ARRAY) + { + std::size_t layout_size = val.get_length(); + inp.layout.reserve(layout_size); + for(std::size_t j = 0; j < layout_size; j++) + { + auto field = val.get_array_element(j); + if(field.get_type() != sajson::TYPE_OBJECT) + continue; + uniform_input::layout_field lf; + for(std::size_t f = 0; f < field.get_length(); f++) + { + auto fk = field.get_object_key(f).as_string(); + if(fk == "NAME") + { + auto nv = field.get_object_value(f); + if(nv.get_type() == sajson::TYPE_STRING) + lf.name = nv.as_string(); + } + else if(fk == "TYPE") + { + auto tv = field.get_object_value(f); + if(tv.get_type() == sajson::TYPE_STRING) + lf.type = tv.as_string(); + } + } + inp.layout.push_back(lf); + } + } + } + else if(k == "VISIBILITY") + { + auto val = v.get_object_value(i); + if(val.get_type() == sajson::TYPE_STRING) + inp.visibility = val.as_string(); + } + } + if(inp.layout.empty()) + { + throw invalid_file{ + "uniform_input declares an empty LAYOUT — std140 interface blocks " + "must contain at least one field. Either declare its members in " + "LAYOUT: [{ NAME, TYPE }, ...] or remove the input."}; } } @@ -507,8 +775,18 @@ static void parse_input(texture_input& inp, const sajson::value& v) { auto val = v.get_object_value(k); if(val.get_type() == sajson::TYPE_INTEGER) - inp.dimensions = val.get_integer_value(); + { + auto d = val.get_integer_value(); + if(d != 2 && d != 3) + throw invalid_file{ + "texture_input DIMENSIONS must be 2 or 3 (got " + std::to_string(d) + + "). 1D and 4D textures are not supported."}; + inp.dimensions = d; + } } + parse_sampler_config(inp.sampler, v); + if(inp.dimensions == 3) + drop_unsupported_compare_3d(inp.sampler, "texture input (DIMENSIONS: 3)"); } // Parse a COPY_FROM JSON object. @@ -540,17 +818,213 @@ parse_copy_from(const sajson::value& obj) return cf; } -// Parse an AUXILIARY JSON array into a vector of auxiliary_request. +// Detect whether an AUXILIARY entry declares a texture (TYPE: "image" / +// "cubemap" / "texture") rather than a buffer. Buffers are the default +// (TYPE absent, or "storage" / "buffer"). +// Three-way classification of an AUXILIARY JSON entry: +// Ssbo — default; declared either without TYPE or with TYPE: +// "storage" / "buffer" / "ssbo". Layout maps to an std430 +// `buffer` block bound as bufferLoad / bufferStore / bufferLoadStore. +// Ubo — TYPE: "uniform" / "ubo". Layout maps to an std140 `uniform` +// block bound as uniformBuffer. +// Texture — TYPE: "image" / "texture" / "cubemap" / "image_cube" / +// "storage_*". Goes through the auxiliary_texture_request pool. +enum class aux_kind { Ssbo, Ubo, Texture }; + +static aux_kind aux_entry_kind(const sajson::value& aux_obj) +{ + auto k = aux_obj.find_object_key_insensitive(sajson::literal("TYPE")); + if(k == aux_obj.get_length()) + return aux_kind::Ssbo; + auto v = aux_obj.get_object_value(k); + if(v.get_type() != sajson::TYPE_STRING) + return aux_kind::Ssbo; + std::string t = v.as_string(); + for(auto& c : t) c = (char)tolower(c); + if(t == "image" || t == "texture" || t == "cubemap" || t == "image_cube" + || t == "storage_image" || t == "storage_cube" + || t == "storage_image_array" || t == "storage_3d") + return aux_kind::Texture; + if(t == "uniform" || t == "ubo") + return aux_kind::Ubo; + return aux_kind::Ssbo; +} + +// Parse a single texture auxiliary entry. +static void parse_auxiliary_texture( + const sajson::value& aux_obj, + geometry_input::auxiliary_texture_request& out) +{ + for(std::size_t f = 0; f < aux_obj.get_length(); f++) + { + auto fkey = aux_obj.get_object_key(f).as_string(); + auto fval = aux_obj.get_object_value(f); + + if(fkey == "NAME" && fval.get_type() == sajson::TYPE_STRING) + out.name = fval.as_string(); + else if(fkey == "TYPE" && fval.get_type() == sajson::TYPE_STRING) + { + std::string t = fval.as_string(); + for(auto& c : t) c = (char)tolower(c); + if(t == "cubemap" || t == "image_cube") + out.is_cubemap = true; + else if(t == "storage_image") + out.is_storage = true; + else if(t == "storage_cube") + { out.is_storage = true; out.is_cubemap = true; } + else if(t == "storage_image_array") + { out.is_storage = true; out.is_array = true; } + else if(t == "storage_3d") + { out.is_storage = true; out.dimensions = 3; } + } + else if(fkey == "DIMENSIONS") + { + if(fval.get_type() == sajson::TYPE_INTEGER) + out.dimensions = fval.get_integer_value(); + } + else if(fkey == "IS_ARRAY" || fkey == "ARRAY") + out.is_array = (fval.get_type() == sajson::TYPE_TRUE); + else if(fkey == "DEPTH") + { + // DEPTH overload — context-dependent: + // "DEPTH": true → legacy sampleable-depth flag (paired with + // COMPARE for shadow-comparison samplers) + // "DEPTH": → 3D-texture depth dimension literal + // "DEPTH": "" → 3D-texture depth dimension expression + // Distinguishable by sajson type so authors can use either form + // without the parser silently dropping one. + const auto t = fval.get_type(); + if(t == sajson::TYPE_TRUE) + out.is_depth = true; + else if(t == sajson::TYPE_FALSE) + out.is_depth = false; + else if(t == sajson::TYPE_INTEGER) + out.depth_expression = std::to_string(fval.get_integer_value()); + else if(t == sajson::TYPE_DOUBLE) + out.depth_expression = std::to_string(fval.get_double_value()); + else if(t == sajson::TYPE_STRING) + out.depth_expression = fval.as_string(); + } + else if(fkey == "STORAGE") + out.is_storage = (fval.get_type() == sajson::TYPE_TRUE); + else if(fkey == "FORMAT" && fval.get_type() == sajson::TYPE_STRING) + out.format = fval.as_string(); + else if(fkey == "ACCESS" && fval.get_type() == sajson::TYPE_STRING) + out.access = fval.as_string(); + // WIDTH / HEIGHT / LAYERS — same expression-or-literal convention as + // csf_image_input. Strings allow `$var` substitution against the + // shader's long/float inputs at allocation time. + else if(fkey == "WIDTH") + { + const auto t = fval.get_type(); + if(t == sajson::TYPE_INTEGER) + out.width_expression = std::to_string(fval.get_integer_value()); + else if(t == sajson::TYPE_DOUBLE) + out.width_expression = std::to_string(fval.get_double_value()); + else if(t == sajson::TYPE_STRING) + out.width_expression = fval.as_string(); + } + else if(fkey == "HEIGHT") + { + const auto t = fval.get_type(); + if(t == sajson::TYPE_INTEGER) + out.height_expression = std::to_string(fval.get_integer_value()); + else if(t == sajson::TYPE_DOUBLE) + out.height_expression = std::to_string(fval.get_double_value()); + else if(t == sajson::TYPE_STRING) + out.height_expression = fval.as_string(); + } + else if(fkey == "LAYERS") + { + const auto t = fval.get_type(); + if(t == sajson::TYPE_INTEGER) + out.layers_expression = std::to_string(fval.get_integer_value()); + else if(t == sajson::TYPE_DOUBLE) + out.layers_expression = std::to_string(fval.get_double_value()); + else if(t == sajson::TYPE_STRING) + out.layers_expression = fval.as_string(); + } + } + + // depth_expression non-empty implies a 3D texture even if DIMENSIONS + // wasn't set explicitly. Mirrors csf_image_input::is3D() semantics — + // saves the author from writing both fields. + if(!out.depth_expression.empty() && out.dimensions == 2) + out.dimensions = 3; + + // Auto-infer storage-image semantics when FORMAT is explicitly set to + // anything other than the sampled-texture default (rgba8). Allows + // author-friendly declarations like: + // + // { "NAME": "voxel_grid", "TYPE": "image", "ACCESS": "read_write", + // "FORMAT": "r32ui", "DIMENSIONS": 3, ... } + // + // to be parsed as a storage image without forcing the author to + // additionally write `"STORAGE": true` or use the more-cryptic + // `"TYPE": "storage_3d"`. + // + // ONLY uses FORMAT — NOT ACCESS — because `access` defaults to + // "read_write" in the struct (it's only meaningful when is_storage is + // already true), so an ACCESS-based heuristic would mis-fire on every + // sampled-aux entry that doesn't explicitly override it. FORMAT + // defaults to "rgba8" which is also the sampled-image default, so the + // discriminator is "did the author explicitly write a non-rgba8 + // FORMAT?" — unambiguous either way. If you want a storage rgba8 + // image, write `"STORAGE": true` explicitly. + if(!out.is_storage) + { + const bool format_implies_storage + = !out.format.empty() && out.format != "rgba8"; + if(format_implies_storage) + out.is_storage = true; + } + // Inherit the flat sampler_config fields (WRAP/FILTER/COMPARE/…). + parse_sampler_config(out.sampler, aux_obj); + // Storage images don't use the sampler; regular samplers on a 3D texture + // have no shadow variant. Cubemap and 2D-array shapes have shadow variants + // and are fine. + if(!out.is_storage && !out.is_cubemap && out.dimensions == 3) + drop_unsupported_compare_3d( + out.sampler, + fmt::format("auxiliary texture '{}' (DIMENSIONS: 3)", out.name).c_str()); + // Cube-arrays (samplerCubeArray / imageCubeArray) are unsupported: every + // QRhi backend silently collapses `CubeMap | TextureArray` to one flag or + // the other at view-creation time (Vulkan qrhivulkan.cpp:7736+, + // D3D12:1160+, Metal:4025+, GL:6124+), so the shader-side type and the + // bound resource disagree. Reject at parse time rather than ship broken + // bindings. Same story for 3D cubemaps (nonsensical). + if(out.is_cubemap && out.is_array) + { + throw invalid_file{ + "auxiliary texture '" + out.name + + "': cubemap + ARRAY is not supported on any QRhi backend " + "(cube-array views are not constructible). Use a plain cubemap, " + "or decompose to a 2D array and do face math in the shader."}; + } + if(out.is_cubemap && out.dimensions == 3) + { + fmt::print( + stderr, + "[isf] auxiliary texture '{}': cubemap with DIMENSIONS: 3 is " + "meaningless (cube faces are 2D). Ignoring DIMENSIONS.\n", + out.name); + out.dimensions = 2; + } +} + +// Parse an AUXILIARY JSON array, dispatching each entry by TYPE into +// either the buffer list or the texture list. // Shared by geometry_input parsing and top-level AUXILIARY key. static void parse_auxiliary_array( const sajson::value& val, - std::vector& out) + std::vector& out_buffers, + std::vector& out_textures) { if(val.get_type() != sajson::TYPE_ARRAY) return; std::size_t aux_count = val.get_length(); - out.reserve(aux_count); + out_buffers.reserve(out_buffers.size() + aux_count); for(std::size_t j = 0; j < aux_count; j++) { @@ -558,7 +1032,21 @@ static void parse_auxiliary_array( if(aux_obj.get_type() != sajson::TYPE_OBJECT) continue; + const aux_kind kind = aux_entry_kind(aux_obj); + if(kind == aux_kind::Texture) + { + geometry_input::auxiliary_texture_request tr; + parse_auxiliary_texture(aux_obj, tr); + if(!tr.name.empty()) + out_textures.push_back(std::move(tr)); + continue; + } + geometry_input::auxiliary_request ar; + // UBO kind: flag set on the request so both parser-side GLSL emission + // and runtime-side binding know to treat it as a std140 uniform block. + // Buffer-kind SSBO is the default (is_uniform stays false). + ar.is_uniform = (kind == aux_kind::Ubo); for(std::size_t f = 0; f < aux_obj.get_length(); f++) { @@ -611,12 +1099,61 @@ static void parse_auxiliary_array( { ar.forward = parse_copy_from(fval); } + else if(fkey == "PERSISTENT") + { + if(fval.get_type() == sajson::TYPE_TRUE) + ar.persistent = true; + else if(fval.get_type() == sajson::TYPE_FALSE) + ar.persistent = false; + } } if(ar.access.empty()) ar.access = "read_only"; - out.push_back(std::move(ar)); + out_buffers.push_back(std::move(ar)); + } +} + +// Validate that every geometry_input ATTRIBUTE.TYPE either names a +// built-in GLSL scalar/vector/matrix type or matches a user-defined +// struct declared in descriptor::types. Run AFTER both RESOURCES and +// TYPES are parsed (TYPES may appear in any order in the JSON) — i.e. +// once at the end of parse_csf / parse_raw_raster_pipeline. Catches +// typos in TYPE strings at parse time instead of as a confusing +// "undefined identifier" GLSL compile error 30 lines deep into the +// generated shader. +static void validate_attribute_types(const descriptor& d) +{ + static constexpr std::string_view builtins[] = { + "float", "int", "uint", "bool", + "vec2", "vec3", "vec4", + "ivec2", "ivec3", "ivec4", + "uvec2", "uvec3", "uvec4", + "mat2", "mat3", "mat4" + }; + auto is_builtin = [](std::string_view t) noexcept { + for(auto b : builtins) if(t == b) return true; + return false; + }; + auto is_user_type = [&](std::string_view t) noexcept { + for(const auto& td : d.types) if(td.name == t) return true; + return false; + }; + for(const auto& inp : d.inputs) + { + auto* gi = ossia::get_if(&inp.data); + if(!gi) continue; + for(const auto& ar : gi->attributes) + { + if(ar.type.empty()) continue; + if(is_builtin(ar.type) || is_user_type(ar.type)) continue; + throw invalid_file{ + "ATTRIBUTES \"" + ar.name + "\" on geometry resource \"" + inp.name + + "\" declares TYPE \"" + ar.type + + "\", which is neither a built-in GLSL scalar/vector/matrix type " + "nor a user-defined type from the TYPES section."}; + } } } @@ -703,27 +1240,79 @@ static void parse_input(geometry_input& inp, const sajson::value& v) else if(val.get_type() == sajson::TYPE_DOUBLE) inp.instance_count = std::to_string((int)val.get_double_value()); } + else if(k == "FORMAT_ID") + { + // String tag stamped on the consumer geometry's filter_tag + // (rapidhash truncated to 32 bits). Lets a CSF that produces + // primitive-cloud-shaped output declare its format identity in + // the JSON header without engine-side knowledge of the format. + auto val = v.get_object_value(i); + if(val.get_type() == sajson::TYPE_STRING) + inp.format_id = val.as_string(); + } else if(k == "AUXILIARY") { - parse_auxiliary_array(v.get_object_value(i), inp.auxiliary); + parse_auxiliary_array(v.get_object_value(i), inp.auxiliary, inp.auxiliary_textures); } - else if(k == "INDIRECT_DRAW") + else if(k == "INDIRECT") { auto val = v.get_object_value(i); - if(val.get_type() == sajson::TYPE_TRUE) - inp.indirect_draw = true; - else if(val.get_type() == sajson::TYPE_FALSE) - inp.indirect_draw = false; + if(val.get_type() == sajson::TYPE_OBJECT) + { + geometry_input::indirect_request req; + for(std::size_t j = 0; j < val.get_length(); j++) + { + auto ik = val.get_object_key(j).as_string(); + boost::algorithm::to_upper(ik); + if(ik == "COUNT") + { + auto iv = val.get_object_value(j); + if(iv.get_type() == sajson::TYPE_STRING) + req.count = iv.as_string(); + else if(iv.get_type() == sajson::TYPE_INTEGER) + req.count = std::to_string(iv.get_integer_value()); + else if(iv.get_type() == sajson::TYPE_DOUBLE) + req.count = std::to_string((int)iv.get_double_value()); + } + } + if(req.count.empty()) + req.count = "1"; + inp.indirect = req; + } } - else if(k == "INDIRECT_DRAW_TYPE") + else if(k == "INDIRECT_DRAW") { auto val = v.get_object_value(i); - if(val.get_type() == sajson::TYPE_STRING) - inp.indirect_draw_type = val.as_string(); + if(val.get_type() == sajson::TYPE_TRUE) + inp.indirect = geometry_input::indirect_request{.count = "1"}; } } } +// Known GLSL image format qualifiers. Used for a parse-time sanity check — +// lets the shader author see a typo ("rgba16" vs "rgba16f") before the +// runtime silently falls back to rgba8. Strict GLSL image-format typing +// validation (matching imageStore argument types to declared formats) would +// need a full GLSL AST which this parser does not build; the most useful +// check we can do cheaply is reject unknown format strings. +static bool isf_is_known_image_format(std::string fmt) +{ + boost::algorithm::to_lower(fmt); + static const ossia::hash_set known{ + "rgba8", "rgba8_snorm", "rgba8ui", "rgba8i", + "rgba16", "rgba16_snorm", "rgba16f", "rgba16ui", "rgba16i", + "rgba32f","rgba32ui", "rgba32i", + "rg8", "rg8_snorm", "rg8ui", "rg8i", + "rg16", "rg16_snorm", "rg16f", "rg16ui", "rg16i", + "rg32f", "rg32ui", "rg32i", + "r8", "r8_snorm", "r8ui", "r8i", + "r16", "r16_snorm", "r16f", "r16ui", "r16i", + "r32f", "r32ui", "r32i", + "rgb10_a2", "rgb10_a2ui", "r11f_g11f_b10f", + "bgra8"}; + return known.count(fmt) > 0; +} + static void parse_input(csf_image_input& inp, const sajson::value& v) { std::size_t N = v.get_length(); @@ -741,7 +1330,18 @@ static void parse_input(csf_image_input& inp, const sajson::value& v) { auto val = v.get_object_value(i); if(val.get_type() == sajson::TYPE_STRING) + { inp.format = val.as_string(); + if(!inp.format.empty() && !isf_is_known_image_format(inp.format)) + { + fmt::print( + stderr, + "[isf] csf_image_input FORMAT \"{}\" is not a recognised GLSL " + "image qualifier — will fall back to rgba8 at runtime. Check " + "for typos (e.g. \"rgba16\" vs \"rgba16f\").\n", + inp.format); + } + } } else if(k == "WIDTH") { @@ -798,10 +1398,90 @@ static void parse_input(csf_image_input& inp, const sajson::value& v) { auto val = v.get_object_value(i); if(val.get_type() == sajson::TYPE_INTEGER) - inp.dimensions = val.get_integer_value(); + { + auto d = val.get_integer_value(); + if(d != 2 && d != 3) + throw invalid_file{ + "csf_image_input DIMENSIONS must be 2 or 3 (got " + std::to_string(d) + + "). 1D and 4D textures are not supported."}; + inp.dimensions = d; + } else if(val.get_type() == sajson::TYPE_DOUBLE) - inp.dimensions = (int)val.get_double_value(); + { + auto d = (int)val.get_double_value(); + if(d != 2 && d != 3) + throw invalid_file{ + "csf_image_input DIMENSIONS must be 2 or 3 (got " + std::to_string(d) + + "). 1D and 4D textures are not supported."}; + inp.dimensions = d; + } + } + else if(k == "VISIBILITY") + { + auto val = v.get_object_value(i); + if(val.get_type() == sajson::TYPE_STRING) + inp.visibility = val.as_string(); + } + else if(k == "PERSISTENT") + { + inp.persistent = v.get_object_value(i).get_type() == sajson::TYPE_TRUE; + } + else if(k == "GENERATE_MIPS") + { + inp.generate_mips = v.get_object_value(i).get_type() == sajson::TYPE_TRUE; + } + else if(k == "IS_ARRAY" || k == "ARRAY") + { + inp.is_array = v.get_object_value(i).get_type() == sajson::TYPE_TRUE; } + else if(k == "LAYERS") + { + auto val = v.get_object_value(i); + auto t = val.get_type(); + if(t == sajson::TYPE_STRING) + inp.layers_expression = val.as_string(); + else if(t == sajson::TYPE_INTEGER) + inp.layers_expression = std::to_string(val.get_integer_value()); + else if(t == sajson::TYPE_DOUBLE) + inp.layers_expression = std::to_string(val.get_double_value()); + } + else if(k == "CUBEMAP" || k == "IS_CUBE") + { + inp.cubemap = v.get_object_value(i).get_type() == sajson::TYPE_TRUE; + } + } + + // See the matching note on storage_input — persistent + write_only has no + // useful semantics because _prev is readonly and nothing writes it. + if(inp.persistent && inp.access == "write_only") + { + throw invalid_file{ + "csf_image_input declared as PERSISTENT + ACCESS: write_only is " + "invalid — _prev would always read zero (no read path exists to " + "populate it). Use ACCESS: read_write or read_only with PERSISTENT, " + "or drop PERSISTENT."}; + } + + // Cube-array writable images are unsupported (see sampler-side analysis in + // parse_auxiliary_texture / isf.hpp). Reject here so downstream allocators + // and the GLSL emitter can assume the combo never shows up. + if(inp.is_array && inp.cubemap) + { + throw invalid_file{ + "csf_image_input: IS_ARRAY + image_cube is not supported — " + "imageCubeArray views are broken on every QRhi backend. Bind N " + "separate cubemaps or use image2DArray and do face math in the " + "shader."}; + } + // 3D arrays do not exist as a core GLSL image type either. + if(inp.is_array && inp.is3D()) + { + fmt::print( + stderr, + "[isf] csf_image_input: IS_ARRAY + 3D image (DIMENSIONS: 3 or DEPTH " + "expression) is not a valid GLSL type (image3DArray is not core). " + "Dropping IS_ARRAY.\n"); + inp.is_array = false; } } @@ -821,6 +1501,7 @@ static void parse_input(audioFFT_input& inp, const sajson::value& v) } } } + parse_audio_sampler_config(inp.sampler, v); } static void parse_input(long_input& inp, const sajson::value& v) @@ -1010,6 +1691,13 @@ static void parse_input(Input_T& inp, const sajson::value& v) auto val = v.get_object_value(i); inp.def = parse_input_impl(val, value_type{}); } + else if(k == "AS_COLOR") + { + if constexpr(requires { inp.as_color; }) + { + inp.as_color = v.get_object_value(i).get_type() == sajson::TYPE_TRUE; + } + } } // Handle shaders without min / max @@ -1120,6 +1808,170 @@ input parse(const sajson::value& v) return i; } +// --- PIPELINE_STATE / MULTIVIEW parsing helpers --------------------------- + +static bool get_bool(const sajson::value& v, bool& out) +{ + if(v.get_type() == sajson::TYPE_TRUE) { out = true; return true; } + if(v.get_type() == sajson::TYPE_FALSE){ out = false; return true; } + return false; +} +static bool get_float(const sajson::value& v, float& out) +{ + if(v.get_type() == sajson::TYPE_DOUBLE) { out = (float)v.get_double_value(); return true; } + if(v.get_type() == sajson::TYPE_INTEGER) { out = (float)v.get_integer_value(); return true; } + return false; +} +static bool get_int(const sajson::value& v, int& out) +{ + if(v.get_type() == sajson::TYPE_INTEGER) { out = v.get_integer_value(); return true; } + if(v.get_type() == sajson::TYPE_DOUBLE) { out = (int)v.get_double_value(); return true; } + return false; +} +static bool get_uint(const sajson::value& v, uint32_t& out) +{ + int x{}; + if(get_int(v, x)) { out = (uint32_t)x; return true; } + return false; +} +static bool get_str(const sajson::value& v, std::string& out) +{ + if(v.get_type() == sajson::TYPE_STRING) { out = v.as_string(); return true; } + return false; +} + +static void parse_blend_attachment(const sajson::value& v, blend_attachment& out) +{ + if(v.get_type() != sajson::TYPE_OBJECT) + return; + std::size_t n = v.get_length(); + for(std::size_t i = 0; i < n; i++) + { + auto k = v.get_object_key(i).as_string(); + auto val = v.get_object_value(i); + bool b{}; + if (k == "ENABLE" ) { get_bool(val, b); out.enable = b; } + else if(k == "SRC_COLOR" ) get_str(val, out.src_color); + else if(k == "DST_COLOR" ) get_str(val, out.dst_color); + else if(k == "OP_COLOR" ) get_str(val, out.op_color); + else if(k == "SRC_ALPHA" ) get_str(val, out.src_alpha); + else if(k == "DST_ALPHA" ) get_str(val, out.dst_alpha); + else if(k == "OP_ALPHA" ) get_str(val, out.op_alpha); + else if(k == "COLOR_WRITE") get_str(val, out.color_write); + // Legacy shorter names + else if(k == "SRC" ) { get_str(val, out.src_color); out.src_alpha = out.src_color; } + else if(k == "DST" ) { get_str(val, out.dst_color); out.dst_alpha = out.dst_color; } + else if(k == "OP" ) { get_str(val, out.op_color); out.op_alpha = out.op_color; } + } +} + +static void parse_stencil_op_state(const sajson::value& v, stencil_op_state& out) +{ + if(v.get_type() != sajson::TYPE_OBJECT) + return; + std::size_t n = v.get_length(); + for(std::size_t i = 0; i < n; i++) + { + auto k = v.get_object_key(i).as_string(); + auto val = v.get_object_value(i); + if (k == "FAIL_OP" ) get_str(val, out.fail_op); + else if(k == "DEPTH_FAIL_OP") get_str(val, out.depth_fail_op); + else if(k == "PASS_OP" ) get_str(val, out.pass_op); + else if(k == "COMPARE_OP" ) get_str(val, out.compare_op); + else if(k == "COMPARE" ) get_str(val, out.compare_op); + } +} + +static void parse_pipeline_state(const sajson::value& v, pipeline_state& out) +{ + if(v.get_type() != sajson::TYPE_OBJECT) + return; + std::size_t n = v.get_length(); + for(std::size_t i = 0; i < n; i++) + { + auto k = v.get_object_key(i).as_string(); + auto val = v.get_object_value(i); + bool b{}; + float f{}; + uint32_t u{}; + std::string s; + + if (k == "DEPTH_TEST" ) { if(get_bool(val, b)) out.depth_test = b; } + else if(k == "DEPTH_WRITE") { if(get_bool(val, b)) out.depth_write = b; } + else if(k == "DEPTH_COMPARE") { if(get_str(val, s)) out.depth_compare = s; } + else if(k == "DEPTH_BIAS") { if(get_float(val, f)) out.depth_bias = f; } + else if(k == "SLOPE_SCALED_DEPTH_BIAS") { if(get_float(val, f)) out.slope_scaled_depth_bias = f; } + else if(k == "CULL_MODE") { if(get_str(val, s)) out.cull_mode = s; } + else if(k == "FRONT_FACE") { if(get_str(val, s)) out.front_face = s; } + else if(k == "POLYGON_MODE") { if(get_str(val, s)) out.polygon_mode = s; } + else if(k == "LINE_WIDTH") { if(get_float(val, f)) out.line_width = f; } + else if(k == "VERTEX_COUNT") { if(get_uint(val, u)) out.vertex_count = u; } + else if(k == "INSTANCE_COUNT") { if(get_uint(val, u)) out.instance_count = u; } + else if(k == "TOPOLOGY") { if(get_str(val, s)) out.topology = s; } + else if(k == "BLEND") + { + // Shortcut: "BLEND": true/false turns on the default alpha-blend. + if(val.get_type() == sajson::TYPE_TRUE || val.get_type() == sajson::TYPE_FALSE) + { + blend_attachment a{}; + a.enable = val.get_type() == sajson::TYPE_TRUE; + out.blend_all = a; + } + else if(val.get_type() == sajson::TYPE_OBJECT) + { + blend_attachment a{}; + a.enable = true; + parse_blend_attachment(val, a); + out.blend_all = a; + } + } + else if(k == "BLEND_PER_ATTACHMENT") + { + if(val.get_type() == sajson::TYPE_ARRAY) + { + std::size_t m = val.get_length(); + out.blend_per_attachment.clear(); + out.blend_per_attachment.reserve(m); + for(std::size_t j = 0; j < m; j++) + { + blend_attachment a{}; + a.enable = true; + parse_blend_attachment(val.get_array_element(j), a); + out.blend_per_attachment.push_back(a); + } + } + } + else if(k == "STENCIL_TEST") { if(get_bool(val, b)) out.stencil_test = b; } + else if(k == "STENCIL_READ_MASK") { if(get_uint(val, u)) out.stencil_read_mask = u; } + else if(k == "STENCIL_WRITE_MASK") { if(get_uint(val, u)) out.stencil_write_mask = u; } + else if(k == "STENCIL_FRONT") + { + stencil_op_state st{}; + parse_stencil_op_state(val, st); + out.stencil_front = st; + } + else if(k == "STENCIL_BACK") + { + stencil_op_state st{}; + parse_stencil_op_state(val, st); + out.stencil_back = st; + } + else if(k == "SHADING_RATE") + { + if(val.get_type() == sajson::TYPE_ARRAY && val.get_length() >= 2) + { + int w{}, h{}; + if(get_int(val.get_array_element(0), w) + && get_int(val.get_array_element(1), h) + && w >= 1 && h >= 1) + { + out.shading_rate = std::array{w, h}; + } + } + } + } +} + using root_fun = void (*)(descriptor&, const sajson::value&); using input_fun = input (*)(const sajson::value&); static const ossia::string_map& root_parse{[] { @@ -1166,6 +2018,7 @@ static const ossia::string_map& root_parse{[] { // CSF-specific types - note: 'image' in CSF context is csf_image_input, not image_input i.insert({"storage", [](const auto& s) { return parse(s); }}); + i.insert({"uniform", [](const auto& s) { return parse(s); }}); i.insert({"texture", [](const auto& s) { return parse(s); }}); i.insert({"geometry", [](const auto& s) { return parse(s); }}); @@ -1185,20 +2038,87 @@ static const ossia::string_map& root_parse{[] { auto k = obj.find_object_key_insensitive(sajson::literal("TYPE")); if(k != obj.get_length()) { - std::string type_str = obj.get_object_value(k).as_string(); + std::string type_str; + if(!get_str(obj.get_object_value(k), type_str)) + continue; boost::algorithm::to_lower(type_str); - auto inp = input_parse.find(type_str); - if(inp != input_parse.end()) - d.inputs.push_back((inp->second)(obj)); + + // "image" with ACCESS or FORMAT → storage image (csf_image_input), + // same as the RESOURCES section. This lets users declare storage + // images in INPUTS without having to move them to RESOURCES. + if(type_str == "image" + && (obj.find_object_key_insensitive(sajson::literal("ACCESS")) != obj.get_length() + || obj.find_object_key_insensitive(sajson::literal("FORMAT")) != obj.get_length())) + { + input inp; + parse_input_base(inp, obj); + csf_image_input ci; + parse_input(ci, obj); + inp.data = ci; + d.inputs.push_back(inp); + } + else + { + auto inp = input_parse.find(type_str); + if(inp != input_parse.end()) + d.inputs.push_back((inp->second)(obj)); + } } else { + // No TYPE specified — default to storage (SSBO). Matches the + // nested-AUXILIARY default (`aux_entry_kind`, ~L820) so the + // top-level INPUTS dispatcher behaves the same as nested + // declarations. This is the right default because: + // - The dual-bind UBO/SSBO design (scene_counts etc.) is + // SSBO-only after the cross-backend cleanup; readers + // declare `TYPE: "storage", ACCESS: "read_only"`. + // - Authors who omit TYPE on a buffer-shaped declaration + // almost always mean storage, not uniform — uniforms + // have a much smaller addressable subset (no runtime + // arrays, std140 padding) and writers always need + // storage anyway. + // - The previous behaviour silently dropped the entry + // without an error, so a typo'd `TYPE: "uniform"` → + // missing TYPE flipped scene_counts off entirely with + // no warning. Defaulting to storage means the next + // stage (binding emission) will catch the misuse via + // a layout/std430 check rather than a silent skip. + d.inputs.push_back(parse(obj)); } } } } }}); + // How many GLSL interface-block input/output locations a given type + // consumes, per GLSL 4.50 spec §4.4.1 "A matrix of sizes matM or matMxN + // takes M locations (one per column)". Non-matrix types consume one + // location. Doubles of >dvec2 width technically consume two locations + // each on desktop GL, but those are vanishingly rare in shader-toy- + // style pipelines — if anyone hits the edge they can pin LOCATION + // explicitly. The mat{M,MxN} cases matter because every existing + // preset that wants mat4 per-instance or per-vertex would otherwise + // have its subsequent attribute collide with column 2/3/4 of the + // matrix. + static constexpr auto locations_consumed = [](attribute_type t) noexcept -> int { + using A = attribute_type; + switch(t) + { + case A::Mat2: case A::Mat2x3: case A::Mat2x4: + case A::DMat2: case A::DMat2x3: case A::DMat2x4: + return 2; + case A::Mat3: case A::Mat3x2: case A::Mat3x4: + case A::DMat3: case A::DMat3x2: case A::DMat3x4: + return 3; + case A::Mat4: case A::Mat4x2: case A::Mat4x3: + case A::DMat4: case A::DMat4x2: case A::DMat4x3: + return 4; + default: + return 1; + } + }; + static constexpr auto parse_attributes = [](descriptor& d, const sajson::value& v) { using namespace std::literals; @@ -1223,33 +2143,140 @@ static const ossia::string_map& root_parse{[] { } else if(loc_obj.get_type() == sajson::TYPE_STRING) { - // Parse as integer, e.g. "LOCATION": "3" - ip.location = std::stoi(loc_obj.as_string()); + // Parse as integer, e.g. "LOCATION": "3". std::stoi throws + // std::invalid_argument (a logic_error, not runtime_error) + // on non-numeric input — catch it locally and surface a + // useful invalid_file message instead. The previous + // unguarded call escaped through the parser's outer + // catch(const std::runtime_error&) and either terminated + // (when the parser was invoked from a noexcept context; + // see ProcessDropHandler.cpp) or surfaced as the generic + // "Unknown error" via the catch(...) fallback at + // ShaderProgram.cpp. // FIXME parse standard locations from ossia::geometry_port + try + { + ip.location = std::stoi(loc_obj.as_string()); + } + catch(const std::exception&) + { + throw invalid_file{ + std::string("LOCATION must be integer or numeric " + "string, got: \"") + + std::string(loc_obj.as_string()) + "\""}; + } } } if(auto k = obj.find_object_key_insensitive(sajson::literal("TYPE")); k != obj.get_length()) { - std::string type_str = obj.get_object_value(k).as_string(); - boost::algorithm::to_lower(type_str); - auto inp = attribute_type_parse.find(type_str); - if(inp != attribute_type_parse.end()) - ip.type = inp->second; + std::string type_str; + if(get_str(obj.get_object_value(k), type_str)) + { + boost::algorithm::to_lower(type_str); + auto inp = attribute_type_parse.find(type_str); + if(inp != attribute_type_parse.end()) + ip.type = inp->second; + } } if(auto k = obj.find_object_key_insensitive(sajson::literal("NAME")); k != obj.get_length()) { - ip.name = obj.get_object_value(k).as_string(); + get_str(obj.get_object_value(k), ip.name); + } + + // SEMANTIC (only meaningful on vertex_input): explicit ossia + // attribute semantic name to use for upstream-buffer matching. + // When omitted, name is used as the semantic key. When set to + // "custom" the runtime falls back to NAME-based matching. + if(auto k = obj.find_object_key_insensitive(sajson::literal("SEMANTIC")); + k != obj.get_length()) + { + auto val = obj.get_object_value(k); + if(val.get_type() == sajson::TYPE_STRING) + ip.semantic = val.as_string(); + } + + // Interpolation qualifier: "smooth" (default, not emitted), "flat", + // "noperspective", "centroid", "sample". Applies to vertex outputs + // and fragment inputs (no effect on vertex inputs / fragment outputs). + if(auto k = obj.find_object_key_insensitive(sajson::literal("INTERPOLATION")); + k != obj.get_length()) + { + auto val = obj.get_object_value(k); + if(val.get_type() == sajson::TYPE_STRING) + ip.interpolation = val.as_string(); } - // If LOCATION was not specified, assign sequentially - // FIXME maybe try to match it from the name ? + // REQUIRED / DEFAULT: only meaningful on vertex_input (raw raster + // pipeline's strictness-vs-fallback control). Silently ignored on + // vertex_output / fragment_input / fragment_output — their matching + // rules are author-owned, not upstream-dependent. + if constexpr (std::is_same_v) + { + if(auto k = obj.find_object_key_insensitive(sajson::literal("REQUIRED")); + k != obj.get_length()) + { + const auto& rv = obj.get_object_value(k); + if(rv.get_type() == sajson::TYPE_FALSE) + ip.required = false; + else if(rv.get_type() == sajson::TYPE_TRUE) + ip.required = true; + // Other JSON types left at default (true). No error here — + // strict JSON typing is already enforced upstream by sajson. + } + + if(auto k = obj.find_object_key_insensitive(sajson::literal("DEFAULT")); + k != obj.get_length()) + { + const auto& dv = obj.get_object_value(k); + if(dv.get_type() == sajson::TYPE_ARRAY) + { + const std::size_t len = dv.get_length(); + ip.default_val.reserve(len); + for(std::size_t j = 0; j < len; ++j) + { + const auto& e = dv.get_array_element(j); + if(e.get_type() == sajson::TYPE_INTEGER) + ip.default_val.push_back((double)e.get_integer_value()); + else if(e.get_type() == sajson::TYPE_DOUBLE) + ip.default_val.push_back(e.get_double_value()); + // Non-numeric entries silently skipped — the runtime's + // component-pad rule will fill missing slots with zero. + } + } + else if(dv.get_type() == sajson::TYPE_INTEGER) + { + // Allow a bare scalar for 1-wide types: "DEFAULT": 1 + ip.default_val.push_back((double)dv.get_integer_value()); + } + else if(dv.get_type() == sajson::TYPE_DOUBLE) + { + ip.default_val.push_back(dv.get_double_value()); + } + } + } + + // If LOCATION was not specified, assign sequentially with + // per-type location counts so mat3/mat4 and their rectangular + // cousins claim the right number of slots (matMxN consumes M + // consecutive locations under GLSL 4.50 §4.4.1). Previously + // this was `(int)(d.*member).size()` — off-by-3 the moment a + // shader declared any mat4 input, and the next attribute + // would land inside the matrix, which the driver rejects. + // + // For mixed explicit / auto layouts the cumulative-sum above + // can collide with a user-pinned LOCATION; that's a pre-existing + // policy tradeoff left untouched here — the simpler "always + // auto" pattern is what 99% of shipped shaders use. if(ip.location < 0 && !ip.name.empty()) { - ip.location = (int)(d.*member).size(); + int next_loc = 0; + for(const auto& prev : d.*member) + next_loc += locations_consumed(prev.type); + ip.location = next_loc; } if(ip.type != attribute_type::Unknown && ip.location >= 0 && !ip.name.empty()) @@ -1277,9 +2304,12 @@ static const ossia::string_map& root_parse{[] { parse_attributes.operator()(d, v); }}); - // Top-level AUXILIARY for RAW_RASTER_PIPELINE: SSBOs expected from upstream geometry + // Top-level AUXILIARY for RAW_RASTER_PIPELINE: SSBOs AND textures travelling + // bundled with the upstream geometry. Buffer entries (default / TYPE: + // "storage") land in d.auxiliary; texture entries (TYPE: "image" / + // "texture" / "cubemap" / "image_cube") land in d.auxiliary_textures. p.insert({"AUXILIARY", [](descriptor& d, const sajson::value& v) { - parse_auxiliary_array(v, d.auxiliary); + parse_auxiliary_array(v, d.auxiliary, d.auxiliary_textures); }}); // Add RESOURCES parsing for CSF (which can contain both inputs and resources) @@ -1296,16 +2326,22 @@ static const ossia::string_map& root_parse{[] { auto k = obj.find_object_key_insensitive(sajson::literal("TYPE")); if(k != obj.get_length()) { - std::string type_str = obj.get_object_value(k).as_string(); + std::string type_str; + if(!get_str(obj.get_object_value(k), type_str)) + continue; boost::algorithm::to_lower(type_str); - // Handle special case for CSF image type - if(type_str == "image") + // Handle special cases for CSF image types + // "image" → 2D / 3D storage image (image2D / image3D) + // "image_cube" → writable cubemap storage image (imageCube) + if(type_str == "image" || type_str == "image_cube") { input inp; parse_input_base(inp, obj); csf_image_input ci; parse_input(ci, obj); + if(type_str == "image_cube") + ci.cubemap = true; inp.data = ci; d.inputs.push_back(inp); } @@ -1548,8 +2584,8 @@ static const ossia::string_map& root_parse{[] { = obj.find_object_key_insensitive(sajson::literal("TARGET")); target_k != obj.get_length()) { - p.target = obj.get_object_value(target_k).as_string(); - if(!p.target.empty()) + if(get_str(obj.get_object_value(target_k), p.target) + && !p.target.empty()) { d.pass_targets.push_back(p.target); } @@ -1619,6 +2655,54 @@ static const ossia::string_map& root_parse{[] { } } + // LAYER: render to a specific layer of a texture-array output. + if(auto layer_k + = obj.find_object_key_insensitive(sajson::literal("LAYER")); + layer_k != obj.get_length()) + { + int lyr{}; + if(get_int(obj.get_object_value(layer_k), lyr)) + p.layer = lyr; + } + + // Z: render to a specific Z-slice of a 3D target. Stored as an + // expression so it can reference $USER or input sizes; resolved + // at render time. + if(auto z_k = obj.find_object_key_insensitive(sajson::literal("Z")); + z_k != obj.get_length()) + { + auto t = obj.get_object_value(z_k).get_type(); + if(t == sajson::TYPE_STRING) + p.z_expression = obj.get_object_value(z_k).as_string(); + else if(t == sajson::TYPE_INTEGER) + p.z_expression + = std::to_string(obj.get_object_value(z_k).get_integer_value()); + else if(t == sajson::TYPE_DOUBLE) + p.z_expression + = std::to_string((int)obj.get_object_value(z_k).get_double_value()); + } + + // FORMAT: override the intermediate-render-target format for + // this pass only. Useful for separable-filter chains where one + // intermediate wants extra precision (rgba16f) but the final + // output is RGBA8. + if(auto fmt_k + = obj.find_object_key_insensitive(sajson::literal("FORMAT")); + fmt_k != obj.get_length()) + { + auto v2 = obj.get_object_value(fmt_k); + if(v2.get_type() == sajson::TYPE_STRING) + p.format = v2.as_string(); + } + + // PIPELINE_STATE: per-pass pipeline state overrides. + if(auto ps_k + = obj.find_object_key_insensitive(sajson::literal("PIPELINE_STATE")); + ps_k != obj.get_length()) + { + parse_pipeline_state(obj.get_object_value(ps_k), p.override_state); + } + d.passes.push_back(std::move(p)); } } @@ -1640,25 +2724,203 @@ static const ossia::string_map& root_parse{[] { if(auto name_k = obj.find_object_key_insensitive(sajson::literal("NAME")); name_k != obj.get_length()) { - out.name = obj.get_object_value(name_k).as_string(); + get_str(obj.get_object_value(name_k), out.name); } if(auto type_k = obj.find_object_key_insensitive(sajson::literal("TYPE")); type_k != obj.get_length()) { - out.type = obj.get_object_value(type_k).as_string(); + get_str(obj.get_object_value(type_k), out.type); } // Default type to "color" if not specified if(out.type.empty()) out.type = "color"; + // LAYERS: >1 allocates a texture array with this many layers. + if(auto layers_k = obj.find_object_key_insensitive(sajson::literal("LAYERS")); + layers_k != obj.get_length()) + { + int l{}; + if(get_int(obj.get_object_value(layers_k), l) && l > 0) + out.layers = l; + } + + // DEPTH: >1 allocates a 3D texture with this depth. Passes targeting + // this output can specify Z to write into a specific slice. + if(auto depth_k = obj.find_object_key_insensitive(sajson::literal("DEPTH")); + depth_k != obj.get_length()) + { + int d_val{}; + if(get_int(obj.get_object_value(depth_k), d_val) && d_val > 0) + out.depth = d_val; + } + + // FORMAT: optional explicit texture format (e.g. "rgba16f", "r32f", "d32f"). + if(auto fmt_k = obj.find_object_key_insensitive(sajson::literal("FORMAT")); + fmt_k != obj.get_length()) + { + auto v2 = obj.get_object_value(fmt_k); + if(v2.get_type() == sajson::TYPE_STRING) + out.format = v2.as_string(); + } + + // SAMPLES: MSAA sample count (1, 2, 4, 8, 16, ...). + if(auto s_k = obj.find_object_key_insensitive(sajson::literal("SAMPLES")); + s_k != obj.get_length()) + { + int s{}; + if(get_int(obj.get_object_value(s_k), s) && s >= 1) + out.samples = s; + } + + // CUBEMAP: when true the layered output is allocated as a cubemap + // (six faces sampled via samplerCube downstream) rather than a + // plain 2D array. Combines with `LAYERS: 6` + `MULTIVIEW: 6` for + // the IBL precompute case (one draw writes all six faces of the + // target cube). Consumer shaders declare a matching + // `TYPE: "cubemap"` INPUT to read it. + if(auto cube_k = obj.find_object_key_insensitive(sajson::literal("CUBEMAP")); + cube_k != obj.get_length()) + { + auto v2 = obj.get_object_value(cube_k); + if(v2.get_type() == sajson::TYPE_TRUE) + out.is_cubemap = true; + else if(v2.get_type() == sajson::TYPE_INTEGER) + out.is_cubemap = (v2.get_integer_value() != 0); + } + + // GENERATE_MIPS: post-pass mip-chain auto-fill. Implies the + // MipMapped + UsedWithGenerateMips allocator flags. Runtime + // issues a QRhiResourceUpdateBatch::generateMips after the + // render loop (and after any CUBEMAP+MULTIVIEW cube-copy). + if(auto gm_k = obj.find_object_key_insensitive(sajson::literal("GENERATE_MIPS")); + gm_k != obj.get_length()) + { + auto v2 = obj.get_object_value(gm_k); + if(v2.get_type() == sajson::TYPE_TRUE) + out.generate_mips = true; + else if(v2.get_type() == sajson::TYPE_INTEGER) + out.generate_mips = (v2.get_integer_value() != 0); + } + + // WIDTH / HEIGHT: explicit offscreen target size. Integer + // literal (fast path) or string expression (evaluated at + // init time against input-image sizes / scalar ports, + // mirroring CSF dispatch-expression semantics). Zero / + // unset → fall back to renderer.state.renderSize. + if(auto w_k = obj.find_object_key_insensitive(sajson::literal("WIDTH")); + w_k != obj.get_length()) + { + auto v2 = obj.get_object_value(w_k); + if(v2.get_type() == sajson::TYPE_INTEGER) + out.width = v2.get_integer_value(); + else if(v2.get_type() == sajson::TYPE_DOUBLE) + out.width = (int)v2.get_double_value(); + else if(v2.get_type() == sajson::TYPE_STRING) + out.width_expression = v2.as_string(); + } + if(auto h_k = obj.find_object_key_insensitive(sajson::literal("HEIGHT")); + h_k != obj.get_length()) + { + auto v2 = obj.get_object_value(h_k); + if(v2.get_type() == sajson::TYPE_INTEGER) + out.height = v2.get_integer_value(); + else if(v2.get_type() == sajson::TYPE_DOUBLE) + out.height = (int)v2.get_double_value(); + else if(v2.get_type() == sajson::TYPE_STRING) + out.height_expression = v2.as_string(); + } + d.outputs.push_back(std::move(out)); } } } }}); + p.insert({"PIPELINE_STATE", [](descriptor& d, const sajson::value& v) { + parse_pipeline_state(v, d.default_state); + }}); + + p.insert({"MULTIVIEW", [](descriptor& d, const sajson::value& v) { + if(v.get_type() == sajson::TYPE_INTEGER) + d.multiview_count = v.get_integer_value(); + else if(v.get_type() == sajson::TYPE_DOUBLE) + d.multiview_count = (int)v.get_double_value(); + else if(v.get_type() == sajson::TYPE_TRUE) + d.multiview_count = 2; // "MULTIVIEW": true => 2 views by default + }}); + + // EXECUTION_MODEL (top-level, RAW_RASTER_PIPELINE). Shape: + // "EXECUTION_MODEL": { + // "TYPE": "SINGLE" | "PER_MIP" | "PER_CUBE_FACE" | "PER_LAYER" | "MANUAL", + // "TARGET": "", // PER_MIP / PER_CUBE_FACE / PER_LAYER + // "COUNT": "" // MANUAL (int literal accepted too) + // } + // Distinct from the per-pass EXECUTION_MODEL inside DISPATCH / PASSES + // (CSF compute), which lives in `dispatch_info::execution_type`. + p.insert({"EXECUTION_MODEL", [](descriptor& d, const sajson::value& v) { + if(v.get_type() != sajson::TYPE_OBJECT) + return; + if(auto type_k + = v.find_object_key_insensitive(sajson::literal("TYPE")); + type_k != v.get_length()) + { + auto tv = v.get_object_value(type_k); + if(tv.get_type() == sajson::TYPE_STRING) + d.execution_model.type = tv.as_string(); + } + if(auto target_k + = v.find_object_key_insensitive(sajson::literal("TARGET")); + target_k != v.get_length()) + { + auto tv = v.get_object_value(target_k); + if(tv.get_type() == sajson::TYPE_STRING) + d.execution_model.target = tv.as_string(); + } + if(auto count_k + = v.find_object_key_insensitive(sajson::literal("COUNT")); + count_k != v.get_length()) + { + auto tv = v.get_object_value(count_k); + if(tv.get_type() == sajson::TYPE_STRING) + d.execution_model.count_expression = tv.as_string(); + else if(tv.get_type() == sajson::TYPE_INTEGER) + d.execution_model.count_expression + = std::to_string(tv.get_integer_value()); + } + }}); + + p.insert({"CLIP_DISTANCES", [](descriptor& d, const sajson::value& v) { + int n{}; + if(get_int(v, n) && n > 0 && n <= 8) + d.clip_distances = n; + }}); + + p.insert({"CULL_DISTANCES", [](descriptor& d, const sajson::value& v) { + int n{}; + if(get_int(v, n) && n > 0 && n <= 8) + d.cull_distances = n; + }}); + + p.insert({"DEPTH_LAYOUT", [](descriptor& d, const sajson::value& v) { + if(v.get_type() == sajson::TYPE_STRING) + d.depth_layout = v.as_string(); + }}); + + p.insert({"EXTENSIONS", [](descriptor& d, const sajson::value& v) { + if(v.get_type() != sajson::TYPE_ARRAY) + return; + std::size_t n = v.get_length(); + d.extensions.reserve(d.extensions.size() + n); + for(std::size_t i = 0; i < n; i++) + { + auto e = v.get_array_element(i); + if(e.get_type() == sajson::TYPE_STRING) + d.extensions.emplace_back(e.as_string()); + } + }}); + p.insert({"POINT_COUNT", [](descriptor& d, const sajson::value& v) { if(v.get_type() == sajson::TYPE_INTEGER) d.point_count = v.get_integer_value(); @@ -1708,7 +2970,7 @@ static const ossia::string_map& root_parse{[] { auto name_key = obj.find_object_key_insensitive(sajson::literal("NAME")); if(name_key != obj.get_length()) { - type_def.name = obj.get_object_value(name_key).as_string(); + get_str(obj.get_object_value(name_key), type_def.name); } // Parse LAYOUT field @@ -1731,7 +2993,7 @@ static const ossia::string_map& root_parse{[] { = field_obj.find_object_key_insensitive(sajson::literal("NAME")); if(field_name_key != field_obj.get_length()) { - field.name = field_obj.get_object_value(field_name_key).as_string(); + get_str(field_obj.get_object_value(field_name_key), field.name); } // Parse field TYPE @@ -1739,7 +3001,7 @@ static const ossia::string_map& root_parse{[] { = field_obj.find_object_key_insensitive(sajson::literal("TYPE")); if(field_type_key != field_obj.get_length()) { - field.type = field_obj.get_object_value(field_type_key).as_string(); + get_str(field_obj.get_object_value(field_type_key), field.type); } type_def.layout.push_back(field); @@ -1757,6 +3019,18 @@ static const ossia::string_map& root_parse{[] { return p; }()}; +// A non-empty compare op different from "never" turns the sampler into a +// shadow/comparison sampler. Mirrors QRhiSampler::CompareOp interpretation. +static bool isf_is_comparison_sampler(const sampler_config& s) +{ + if(s.compare.empty()) + return false; + std::string c = s.compare; + for(auto& ch : c) ch = (char)tolower(ch); + return c != "never"; +} + + struct create_val_visitor_450 { struct return_type @@ -1771,14 +3045,43 @@ struct create_val_visitor_450 return_type operator()(const point2d_input&) { return {"vec2", false}; } return_type operator()(const point3d_input&) { return {"vec3", false}; } return_type operator()(const color_input&) { return {"vec4", false}; } - return_type operator()(const image_input& i) { return {i.dimensions == 3 ? "uniform sampler3D" : "uniform sampler2D", true}; } - return_type operator()(const cubemap_input&) { return {"uniform samplerCube", true}; } + return_type operator()(const image_input& i) + { + const bool cmp = isf_is_comparison_sampler(i.sampler); + if(i.dimensions == 3) + return {"uniform sampler3D", true}; // 3D shadow samplers not commonly used + if(i.is_array) + return {cmp ? "uniform sampler2DArrayShadow" : "uniform sampler2DArray", true}; + return {cmp ? "uniform sampler2DShadow" : "uniform sampler2D", true}; + } + return_type operator()(const cubemap_input& c) + { + return {isf_is_comparison_sampler(c.sampler) ? "uniform samplerCubeShadow" + : "uniform samplerCube", + true}; + } return_type operator()(const audio_input&) { return {"uniform sampler2D", true}; } return_type operator()(const audioFFT_input&) { return {"uniform sampler2D", true}; } return_type operator()(const audioHist_input&) { return {"uniform sampler2D", true}; } return_type operator()(const storage_input&) { return {"buffer", true}; } - return_type operator()(const texture_input& i) { return {i.dimensions == 3 ? "uniform sampler3D" : "uniform sampler2D", true}; } - return_type operator()(const csf_image_input& i) { return {i.is3D() ? "uniform image3D" : "uniform image2D", true}; } + return_type operator()(const uniform_input&) { return {"uniform", true}; } + return_type operator()(const texture_input& i) + { + const bool cmp = isf_is_comparison_sampler(i.sampler); + if(i.dimensions == 3) + return {"uniform sampler3D", true}; + return {cmp ? "uniform sampler2DShadow" : "uniform sampler2D", true}; + } + return_type operator()(const csf_image_input& i) + { + if(i.isCube()) + return {"uniform imageCube", true}; + if(i.is3D()) + return {"uniform image3D", true}; + if(i.is_array) + return {"uniform image2DArray", true}; + return {"uniform image2D", true}; + } return_type operator()(const geometry_input&) { return {"buffer", true}; } }; @@ -1942,6 +3245,251 @@ void parser::parse_geometry_filter() m_geometry_filter = filter_ubo + geomWithoutISF + "\n"; } +// --- GLSL helpers for graphics-visible storage resources ---------------- +// +// Derive GLSL image/sampler prefix from a format string. +// Unsigned integer formats (R32UI, RGBA16UI, ...) → "u" +// Signed integer formats (R32I, RGBA16I, ...) → "i" +// Float/unorm formats (R32F, RGBA8, ...) → "" +static std::string isf_glsl_type_prefix(const std::string& format) +{ + if(format.empty()) + return ""; + std::string fmt = format; + for(auto& c : fmt) c = (char)toupper(c); + if(fmt.find("UI") != std::string::npos) + return "u"; + if(fmt.size() >= 2 && fmt.back() == 'I' && fmt[fmt.size() - 2] != 'U') + return "i"; + return ""; +} + +// Returns true when the visibility string indicates this resource should be +// declared in a graphics pipeline (vertex or fragment stage). +static bool is_graphics_visibility(std::string_view vis) +{ + return vis == "fragment" || vis == "vertex" || vis == "vertex+fragment" + || vis == "both" || vis == "graphics"; +} + +// Emit GLSL `struct { };` declarations from the TYPES +// section. Must be injected BEFORE any SSBO/UBO body that references the +// struct, in BOTH vertex and fragment stages — otherwise scene shaders that +// declare e.g. `Light` and use `readonly buffer { Light entries[]; }` fail +// VS compilation when the SSBO leaks into a vertex pipeline that never +// included the struct (the fragment-only TYPES emission was the long-standing +// bug here). The compute path has its own copy of this logic at +// parse_compute_shader; this helper is shared by parse_isf and +// parse_raw_raster_pipeline. +static std::string isf_emit_types_struct(const std::vector& types) +{ + if(types.empty()) + return {}; + + std::string out; + out += "// Struct definitions from TYPES section\n"; + for(const auto& type_def : types) + { + out += "struct " + type_def.name + " {\n"; + for(const auto& field : type_def.layout) + { + auto bracket = field.type.find('['); + if(bracket != std::string::npos) + out += " " + field.type.substr(0, bracket) + " " + field.name + + field.type.substr(bracket) + ";\n"; + else + out += " " + field.type + " " + field.name + ";\n"; + } + out += "};\n\n"; + } + return out; +} + +static std::string isf_emit_ssbo_decl( + int binding, std::string_view name, const storage_input& s, bool alias_prev) +{ + std::string out; + out += "layout(binding = "; + out += std::to_string(binding); + out += ", std430) "; + if(alias_prev || s.access == "read_only") + out += "readonly "; + else if(s.access == "write_only") + out += "writeonly "; + else + out += "restrict "; + out += "buffer "; + out += name; + out += "_buf {\n"; + for(const auto& field : s.layout) + { + auto bracket = field.type.find('['); + if(bracket != std::string::npos) + out += " " + field.type.substr(0, bracket) + " " + field.name + + field.type.substr(bracket) + ";\n"; + else + out += " " + field.type + " " + field.name + ";\n"; + } + out += "} "; + out += name; + out += ";\n\n"; + return out; +} + +static std::string isf_emit_ubo_decl( + int binding, std::string_view name, const uniform_input& u) +{ + std::string out; + out += "layout(binding = "; + out += std::to_string(binding); + out += ", std140) uniform "; + out += name; + out += "_t {\n"; + for(const auto& field : u.layout) + { + auto bracket = field.type.find('['); + if(bracket != std::string::npos) + out += " " + field.type.substr(0, bracket) + " " + field.name + + field.type.substr(bracket) + ";\n"; + else + out += " " + field.type + " " + field.name + ";\n"; + } + out += "} "; + out += name; + out += ";\n\n"; + return out; +} + +static std::string isf_emit_image_decl( + int binding, std::string_view name, const csf_image_input& img, + bool alias_prev = false) +{ + std::string out; + out += "layout(binding = "; + out += std::to_string(binding); + std::string fmt = img.format.empty() ? "rgba8" : img.format; + boost::algorithm::to_lower(fmt); + out += ", "; + out += fmt; + out += ") "; + if(alias_prev || img.access == "read_only") + out += "readonly "; + else if(img.access == "write_only") + out += "writeonly "; + else + out += "restrict "; + auto prefix = isf_glsl_type_prefix(img.format); + out += "uniform "; + out += prefix; + // Shape dispatch must mirror the compute-stage emit at isf_emit_compute_- + // image_decl below: parser admits CUBEMAP / IS_ARRAY / 3D shapes; the + // bound texture's QRhi flags must agree with the GLSL declaration. + // Cube and array variants on graphics-stage csf_image_input were + // previously emitted as flat image2D, mismatching the cube/array texture + // bound by IsfBindingsBuilder's allocator and triggering Vulkan + // VUID-VkGraphicsPipelineCreateInfo-layout-07990. + // Priority: cubemap > 3D > array > 2D (matches the parser's own reject + // table at isf.cpp:1446-1463 which forbids cube+array and array+3D). + const char* shape = "image2D "; + if(img.isCube()) shape = "imageCube "; + else if(img.is3D()) shape = "image3D "; + else if(img.is_array) shape = "image2DArray "; + out += shape; + out += name; + out += ";\n"; + return out; +} + +// Emit declarations for storage_input / csf_image_input inputs for a graphics +// shader (ISF or RawRaster). Starts at `binding`, returns the next free binding. +// Also emits `name_prev` readonly declarations for persistent SSBOs. +static int isf_emit_graphics_storage( + std::string& out, int binding, const std::vector& inputs) +{ + for(const auto& inp : inputs) + { + if(auto* s = ossia::get_if(&inp.data)) + { + if(!is_graphics_visibility(s->visibility)) + continue; + // Indirect-draw buffers don't need shader visibility. + if(!s->buffer_usage.empty()) + continue; + out += isf_emit_ssbo_decl(binding, inp.name, *s, /*alias_prev=*/false); + binding++; + if(s->persistent) + { + out += isf_emit_ssbo_decl( + binding, inp.name + "_prev", *s, /*alias_prev=*/true); + binding++; + } + } + else if(auto* img = ossia::get_if(&inp.data)) + { + if(!is_graphics_visibility(img->visibility)) + continue; + out += isf_emit_image_decl(binding, inp.name, *img, /*alias_prev=*/false); + binding++; + if(img->persistent) + { + out += isf_emit_image_decl( + binding, inp.name + "_prev", *img, /*alias_prev=*/true); + binding++; + } + } + else if(auto* u = ossia::get_if(&inp.data)) + { + if(!is_graphics_visibility(u->visibility)) + continue; + out += isf_emit_ubo_decl(binding, inp.name, *u); + binding++; + } + } + return binding; +} + +// The #extension pragma must come BEFORE any declarations — emit it separately +// so it can be prepended right after #version. +static std::string isf_emit_multiview_extension(int view_count) +{ + std::string out; + out += "#extension GL_EXT_multiview : require\n"; + out += "#define VIEW_INDEX gl_ViewIndex\n"; + out += "#define NUM_VIEWS "; + out += std::to_string(view_count); + out += "\n"; + return out; +} + +// User-declared EXTENSIONS from the descriptor. Emitted alongside the +// multiview extension, each as `#extension : require`. Advanced +// effects (subgroup ops, atomic floats, ray queries, …) go through here. +static std::string isf_emit_user_extensions(const std::vector& exts) +{ + std::string out; + for(const auto& e : exts) + { + if(e.empty()) + continue; + out += "#extension "; + out += e; + out += " : require\n"; + } + return out; +} + +// Emit the multiview view-projection UBO. +static std::string isf_emit_multiview_ubo(int binding, int view_count) +{ + std::string out; + out += "layout(std140, binding = "; + out += std::to_string(binding); + out += ") uniform multiview_t { mat4 viewProjection["; + out += std::to_string(view_count); + out += "]; } isf_mv;\n"; + return out; +} + void parser::parse_isf() { using namespace std::literals; @@ -1960,6 +3508,35 @@ void parser::parse_isf() m_desc.passes.push_back(isf::pass{}); } + // Fragment-mode ISF cannot drive PASSES that target a 3D / Z-sliced + // OUTPUT: that requires per-Z-slice color attachments / 3D image + // storage plumbing through the pass-target allocator and the + // beginPass site, which the RenderedISFNode renderer does not yet + // wire end-to-end. Authors should use a CSF compute shader + // (EXECUTION_MODEL: 3D_IMAGE) for true volumetric writes; refusing + // to load here is loud and prevents a silent 2D downgrade that + // would make every imageStore / fragment write target the wrong + // memory. + for(const auto& pass : m_desc.passes) + { + bool target_is_3d = false; + for(const auto& out : m_desc.outputs) + { + if(out.name == pass.target && out.depth > 1) + { + target_is_3d = true; + break; + } + } + if(!pass.z_expression.empty() || target_is_3d) + { + throw invalid_file{ + "fragment-mode ISF with PASSES targeting Z / 3D OUTPUTS is not " + "yet supported in this engine — use CSF compute " + "(EXECUTION_MODEL: 3D_IMAGE) for volumetric writes."}; + } + } + auto& d = m_desc; // We start from empty strings. @@ -1972,9 +3549,17 @@ void parser::parse_isf() switch(m_version) { case 450: { + // Extensions pragma block — must come right after #version, before + // any layout/uniform/in/out declarations. + std::string extensions_prelude; + if(d.multiview_count >= 2) + extensions_prelude += isf_emit_multiview_extension(d.multiview_count); + extensions_prelude += isf_emit_user_extensions(d.extensions); + // Setup vertex shader { m_vertex = GLSL45.versionPrelude; + m_vertex += extensions_prelude; if(m_sourceVertex.empty()) { @@ -1990,6 +3575,18 @@ void parser::parse_isf() { // Setup fragment shader m_fragment = GLSL45.versionPrelude; + m_fragment += extensions_prelude; + + // LAYER_INDEX for layered / multi-layer outputs: the vertex shader writes + // to gl_Layer and the fragment shader receives it via a flat varying. + bool has_layered_output = (d.multiview_count >= 2); + for(const auto& out : d.outputs) + if(out.layers > 1) + has_layered_output = true; + if(has_layered_output) + { + m_fragment += "#define LAYER_INDEX gl_Layer\n"; + } if(d.outputs.empty()) { @@ -2027,11 +3624,34 @@ void parser::parse_isf() } } } + + // Conservative-depth qualifier on gl_FragDepth (ISF path). + if(!d.depth_layout.empty()) + { + std::string dl = d.depth_layout; + for(auto& c : dl) c = (char)tolower(c); + const char* q = nullptr; + if(dl == "greater") q = "depth_greater"; + else if(dl == "less") q = "depth_less"; + else if(dl == "unchanged") q = "depth_unchanged"; + else if(dl == "any") q = "depth_any"; + if(q) + { + m_fragment += "layout("; + m_fragment += q; + m_fragment += ") out float gl_FragDepth;\n"; + } + } } // Setup the parameters UBOs std::string material_ubos = GLSL45.defaultUniforms; + // TYPES section structs must be visible in BOTH stages because SSBO + // declarations referencing them (e.g. `Light entries[]`) are appended + // to material_ubos, which is in turn injected into both VS and FS. + material_ubos += isf_emit_types_struct(d.types); + int sampler_binding = 3; if(!d.inputs.empty() || !d.pass_targets.empty()) @@ -2043,6 +3663,14 @@ void parser::parse_isf() uniforms += "layout(std140, binding = 2) uniform material_t {\n"; for(const isf::input& val : d.inputs) { + // Storage buffers / storage images are declared separately after + // samplers — skip them here to avoid emitting invalid GLSL. + if(ossia::get_if(&val.data) + || ossia::get_if(&val.data) + || ossia::get_if(&val.data) + || ossia::get_if(&val.data)) + continue; + auto [type, isSampler] = ossia::visit(create_val_visitor_450{}, val.data); if(isSampler) @@ -2069,6 +3697,18 @@ void parser::parse_isf() sampler_binding++; } } + else if(auto* cube = ossia::get_if(&val.data)) + { + if(cube->depth) + { + samplers += "layout(binding = "; + samplers += std::to_string(sampler_binding); + samplers += ") uniform samplerCube "; + samplers += val.name; + samplers += "_depth;\n"; + sampler_binding++; + } + } } else { @@ -2088,8 +3728,25 @@ void parser::parse_isf() } } + // Pass targets are bound as sampler2D for cross-pass reads. Two + // independent dedup checks: + // 1) the same TARGET can appear in multiple PASSES entries (e.g. + // LAYERS where each layer is a pass writing to the same target) + // — we must only emit one sampler per distinct name. + // 2) a TARGET may also appear as a FRAGMENT_OUTPUT for the current + // pass (typical for OUTPUTS with LAYERS) — those collide with + // the `out vec4 ;` declaration emitted above and would + // cause "redefinition" at GLSL compile time. + std::set output_names; + for(const auto& out : d.outputs) + output_names.insert(out.name); + std::set emitted_targets; for(const std::string& target : d.pass_targets) { + if(output_names.count(target)) + continue; + if(!emitted_targets.insert(target).second) + continue; samplers += "layout(binding = "; samplers += std::to_string(sampler_binding); samplers += ") uniform sampler2D "; @@ -2110,6 +3767,21 @@ void parser::parse_isf() } material_ubos += samplers; + + // Storage buffers (SSBOs) and storage images visible to the graphics + // pipeline. Bindings continue after samplers. + sampler_binding = isf_emit_graphics_storage( + material_ubos, sampler_binding, d.inputs); + + // Multiview UBO: injected when MULTIVIEW >= 2 in the descriptor. + // Only the UBO here — the #extension pragma must come right after + // #version, so it's emitted separately below. + if(d.multiview_count >= 2) + { + material_ubos += isf_emit_multiview_ubo( + sampler_binding, d.multiview_count); + sampler_binding++; + } } m_vertex += material_ubos; @@ -2159,6 +3831,17 @@ void parser::parse_raw_raster_pipeline() m_desc.mode = isf::descriptor::RawRaster; + // If FRAGMENT_OUTPUTS declares multiple outputs but OUTPUTS was not + // explicitly provided, auto-populate desc.outputs so the node graph + // creates the right number of output ports (one per attachment). + if(m_desc.outputs.empty() && m_desc.fragment_outputs.size() > 1) + { + for(const auto& fo : m_desc.fragment_outputs) + { + m_desc.outputs.push_back(output_declaration{.name = fo.name, .type = "color"}); + } + } + // Add the raw raster uniforms { static const auto default_ins = [] { @@ -2240,8 +3923,56 @@ void parser::parse_raw_raster_pipeline() m_vertex = GLSL45.versionPrelude; m_fragment = GLSL45.versionPrelude; + // Extensions pragma block — must come right after #version. + // GL_ARB_shader_draw_parameters exposes gl_BaseInstance / gl_BaseVertex / + // gl_DrawIDARB in the vertex shader. Required by MDI shaders that index + // per-draw data (per_draws[gl_BaseInstance], etc.). Harmless when unused. + m_vertex += "#extension GL_ARB_shader_draw_parameters : require\n"; + + if(m_desc.multiview_count >= 2) + { + std::string ext = isf_emit_multiview_extension(m_desc.multiview_count); + m_vertex += ext; + m_fragment += ext; + } + + { + std::string user_ext = isf_emit_user_extensions(m_desc.extensions); + m_vertex += user_ext; + m_fragment += user_ext; + } + + // LAYER_INDEX for layered outputs. + { + bool has_layered_output = (m_desc.multiview_count >= 2); + for(const auto& out : m_desc.outputs) + if(out.layers > 1) + has_layered_output = true; + if(has_layered_output) + m_fragment += "#define LAYER_INDEX gl_Layer\n"; + } + // Write down the inputs / outputs { + // Integer / boolean types require the `flat` interpolation qualifier on + // varyings (VERTEX_OUTPUTS → FRAGMENT_INPUTS). Without it, Vulkan GLSL + // compilation fails: "'uint' : must be qualified as flat in". + auto needs_flat = [](attribute_type t) { + return (t >= attribute_type::Int && t <= attribute_type::Uint4) + || (t >= attribute_type::Bool && t <= attribute_type::Bool4); + }; + + // Interpolation qualifier for a varying: user-specified (if valid) wins + // over the auto "flat" promotion for integer/bool types. + auto interp_qualifier = [&](const vertex_attribute& a) -> const char* { + if(a.interpolation == "flat") return "flat"; + if(a.interpolation == "noperspective") return "noperspective"; + if(a.interpolation == "centroid") return "centroid"; + if(a.interpolation == "sample") return "sample"; + if(a.interpolation == "smooth") return ""; // default, no keyword needed + return needs_flat(a.type) ? "flat" : ""; + }; + // Vertex for(auto& attr : m_desc.vertex_inputs) m_vertex += fmt::format( @@ -2249,22 +3980,56 @@ void parser::parse_raw_raster_pipeline() attribute_type_map.at((int)attr.type), attr.name); for(auto& attr : m_desc.vertex_outputs) m_vertex += fmt::format( - "layout(location = {}) out {} {};\n", attr.location, + "layout(location = {}) {} out {} {};\n", attr.location, + interp_qualifier(attr), attribute_type_map.at((int)attr.type), attr.name); for(auto& attr : m_desc.fragment_inputs) m_fragment += fmt::format( - "layout(location = {}) in {} {};\n", attr.location, + "layout(location = {}) {} in {} {};\n", attr.location, + interp_qualifier(attr), attribute_type_map.at((int)attr.type), attr.name); for(auto& attr : m_desc.fragment_outputs) m_fragment += fmt::format( "layout(location = {}) out {} {};\n", attr.location, attribute_type_map.at((int)attr.type), attr.name); + + // Clip / cull distances: user-declared count controls the size of the + // gl_ClipDistance / gl_CullDistance arrays. Required on some GLSL + // profiles; always explicit on Vulkan GLSL. + if(m_desc.clip_distances > 0) + m_vertex += fmt::format( + "out float gl_ClipDistance[{}];\n", m_desc.clip_distances); + if(m_desc.cull_distances > 0) + m_vertex += fmt::format( + "out float gl_CullDistance[{}];\n", m_desc.cull_distances); + + // Conservative-depth qualifier on gl_FragDepth. Allowed values map to + // GLSL layout qualifiers: greater/less/unchanged/any. + if(!m_desc.depth_layout.empty()) + { + std::string dl = m_desc.depth_layout; + for(auto& c : dl) c = (char)tolower(c); + const char* q = nullptr; + if(dl == "greater") q = "depth_greater"; + else if(dl == "less") q = "depth_less"; + else if(dl == "unchanged") q = "depth_unchanged"; + else if(dl == "any") q = "depth_any"; + if(q) + m_fragment += fmt::format( + "layout({}) out float gl_FragDepth;\n", q); + } } { // Setup the parameters UBOs std::string material_ubos = GLSL45.defaultUniforms; + // TYPES section structs visible in BOTH stages — see the matching emit + // in parse_isf for the rationale (SSBO bodies referencing user structs + // leak into VS via material_ubos and previously failed to compile when + // VISIBILITY was fragment-only). + material_ubos += isf_emit_types_struct(d.types); + int sampler_binding = 3; if(!d.inputs.empty()) @@ -2276,6 +4041,44 @@ void parser::parse_raw_raster_pipeline() uniforms += "layout(std140, binding = 2) uniform material_t {\n"; for(const isf::input& val : d.inputs) { + // Storage buffers / storage images / geometry inputs / UBOs are declared + // separately after samplers. BUT their synthesized host-side size ints + // (storage flex-array size, geometry $USER counts) ARE packed into this + // material blob, so they must be declared here too — otherwise every + // uniform after them reads shifted. Mirrors the CSF Params block. + if(auto* storage = ossia::get_if(&val.data)) + { + if(storage->access.find("write") != std::string::npos + && !storage->layout.empty() + && storage->layout.back().type.find("[]") != std::string::npos) + { + num_uniform++; + uniforms += "int " + val.name + "_size;\n"; + globalvars += "int " + val.name + "_size = isf_material_uniforms." + + val.name + "_size;\n"; + } + continue; + } + if(auto* geo = ossia::get_if(&val.data)) + { + auto emit_synth_int = [&](const std::string& nm) { + num_uniform++; + uniforms += "int " + nm + ";\n"; + globalvars += "int " + nm + " = isf_material_uniforms." + nm + ";\n"; + }; + if(geo->vertex_count.find("$USER") != std::string::npos) + emit_synth_int(val.name + "_vertex_count"); + if(geo->instance_count.find("$USER") != std::string::npos) + emit_synth_int(val.name + "_instance_count"); + for(const auto& aux : geo->auxiliary) + if(aux.size.find("$USER") != std::string::npos) + emit_synth_int(val.name + "_" + aux.name + "_size"); + continue; + } + if(ossia::get_if(&val.data) + || ossia::get_if(&val.data)) + continue; + auto [type, isSampler] = ossia::visit(create_val_visitor_450{}, val.data); if(isSampler) @@ -2302,6 +4105,18 @@ void parser::parse_raw_raster_pipeline() sampler_binding++; } } + else if(auto* cube = ossia::get_if(&val.data)) + { + if(cube->depth) + { + samplers += "layout(binding = "; + samplers += std::to_string(sampler_binding); + samplers += ") uniform samplerCube "; + samplers += val.name; + samplers += "_depth;\n"; + sampler_binding++; + } + } } else { @@ -2337,39 +4152,153 @@ void parser::parse_raw_raster_pipeline() material_ubos += samplers; } + // Storage buffers (SSBOs) and storage images declared via INPUTS with + // TYPE=storage or TYPE=image (visible to graphics stages). + sampler_binding = isf_emit_graphics_storage( + material_ubos, sampler_binding, d.inputs); + // Auxiliary SSBOs (from top-level AUXILIARY key) std::string ssbo_decls; - for(const auto& aux : d.auxiliary) - { - ssbo_decls += "layout(binding = " + std::to_string(sampler_binding) + ", std430) "; - if(aux.access == "read_only") - ssbo_decls += "readonly "; - else if(aux.access == "write_only") - ssbo_decls += "writeonly "; + // Emit a single buffer block for an auxiliary. `qualifier` is the std430 + // access qualifier ("readonly" / "writeonly" / "restrict") and `var` is + // the variable name (differs from `aux.name` for the _prev ping-pong + // slot). + auto emit_aux_block + = [&](const geometry_input::auxiliary_request& aux, int binding, + const char* qualifier, const std::string& var) { + if(aux.is_uniform) + { + // std140 UBO: no access qualifier (UBOs are inherently read-only + // from GLSL), `uniform` instead of `buffer`. + ssbo_decls += "layout(std140, binding = " + std::to_string(binding) + ") uniform "; + } else - ssbo_decls += "restrict "; - - ssbo_decls += "buffer " + aux.name + "_buf {\n"; + { + ssbo_decls += "layout(binding = " + std::to_string(binding) + ", std430) "; + ssbo_decls += qualifier; + ssbo_decls += " buffer "; + } + ssbo_decls += var; + ssbo_decls += "_buf {\n"; for(const auto& field : aux.layout) { - // Handle array types: "vec4[512]" → "vec4 entries[512];" auto bracket = field.type.find('['); if(bracket != std::string::npos) - { ssbo_decls += " " + field.type.substr(0, bracket) + " " + field.name + field.type.substr(bracket) + ";\n"; - } else - { ssbo_decls += " " + field.type + " " + field.name + ";\n"; + } + ssbo_decls += "} "; + ssbo_decls += var; + ssbo_decls += ";\n\n"; + }; + + for(const auto& aux : d.auxiliary) + { + const char* access_qualifier + = (aux.access == "read_only") ? "readonly" + : (aux.access == "write_only") ? "writeonly" + : "restrict"; + + // Persistent ping-pong only makes sense for writable SSBOs. UBOs + // declared persistent silently fall back to a single-block decl + // (the flag is ignored by the runtime allocator on the UBO path). + if(aux.persistent && !aux.is_uniform) + { + // Ping-pong pair: _prev is the previous frame's read-only snapshot, + // is the current frame's writable buffer. Runtime swaps + // the two buffer pointers each frame. + emit_aux_block(aux, sampler_binding, "readonly", aux.name + "_prev"); + sampler_binding++; + emit_aux_block(aux, sampler_binding, access_qualifier, aux.name); + sampler_binding++; + } + else + { + emit_aux_block(aux, sampler_binding, access_qualifier, aux.name); + sampler_binding++; + } + } + material_ubos += ssbo_decls; + + // Auxiliary textures (from top-level AUXILIARY with TYPE: image / + // texture / cubemap / image_cube / storage_*). No input port; the + // renderer resolves them from ossia::geometry::auxiliary_textures + // by name. Sampled textures emit `sampler*` decls with texture() + // semantics; storage images emit `image*` decls with imageLoad / + // imageStore semantics. + std::string aux_tex_decls; + for(const auto& atx : d.auxiliary_textures) + { + if(atx.is_storage) + { + // Storage image: imageLoad/Store target. FORMAT layout qualifier + // is mandatory on writable images; defaults to rgba8. + // Cube-arrays are parser-rejected so no imageCubeArray branch. + const char* image_type = "image2D"; + if(atx.is_cubemap) image_type = "imageCube"; + else if(atx.dimensions == 3) image_type = "image3D"; + else if(atx.is_array) image_type = "image2DArray"; + + const char* access_q = + (atx.access == "read_only") ? "readonly " : + (atx.access == "write_only") ? "writeonly " : ""; + + // Integer formats (r32ui, r32i, rgba32ui, …) require the + // `uimage*` / `iimage*` GLSL variants — the bare `image*` type + // paired with an integer layout qualifier is a compile error. + // Reuses the same prefix helper csf_image_input declarations + // already use, so float / int / uint emission stays consistent + // across the rasterizer-aux and csf-input code paths. + std::string scalar_prefix = isf_glsl_type_prefix(atx.format); + + aux_tex_decls += "layout(binding = " + std::to_string(sampler_binding) + + ", " + atx.format + ") uniform " + access_q + + scalar_prefix + image_type + " " + + atx.name + ";\n"; + sampler_binding++; + } + else + { + const bool cmp = isf_is_comparison_sampler(atx.sampler); + const char* sampler_type = "sampler2D"; + // Precedence: cubemap > 3D > array > 2D. sampler3D does not nest + // with array in core GLSL, so is_array is ignored when dimensions==3. + // Cube-arrays (samplerCubeArray) are parser-rejected — no backend + // plumbs CubeMap|TextureArray views correctly. + if(atx.is_cubemap) + sampler_type = cmp ? "samplerCubeShadow" : "samplerCube"; + else if(atx.dimensions == 3) + sampler_type = "sampler3D"; + else if(atx.is_array) + sampler_type = cmp ? "sampler2DArrayShadow" : "sampler2DArray"; + else + sampler_type = cmp ? "sampler2DShadow" : "sampler2D"; + + aux_tex_decls += "layout(binding = " + std::to_string(sampler_binding) + + ") uniform " + sampler_type + " " + atx.name + ";\n"; + sampler_binding++; + + // Paired depth sampler when DEPTH:true on a plain 2D tex. + if(atx.is_depth && !atx.is_cubemap && atx.dimensions != 3 && !atx.is_array) + { + aux_tex_decls += "layout(binding = " + std::to_string(sampler_binding) + + ") uniform sampler2D " + atx.name + "_depth;\n"; + sampler_binding++; } } - ssbo_decls += "} " + aux.name + ";\n\n"; + } + material_ubos += aux_tex_decls; + // Multiview UBO: injected when MULTIVIEW >= 2. + if(m_desc.multiview_count >= 2) + { + material_ubos += isf_emit_multiview_ubo( + sampler_binding, m_desc.multiview_count); sampler_binding++; } - material_ubos += ssbo_decls; int model_ubo_binding = sampler_binding; material_ubos += fmt::format( @@ -2385,6 +4314,18 @@ void parser::parse_raw_raster_pipeline() m_fragment += material_ubos; } + // The raw-raster path replaces gl_FragCoord → isf_FragCoord for the + // same Y-flip behaviour as fullscreen ISF, but unlike ISF the raw-raster + // FS prelude didn't define the macro — causing "isf_FragCoord : + // undeclared identifier" for any shader using gl_FragCoord. + m_fragment += R"_( +#if defined(QSHADER_SPIRV) || defined(QSHADER_HLSL) || defined(QSHADER_MSL) +#define isf_FragCoord vec4(gl_FragCoord.x, RENDERSIZE.y - gl_FragCoord.y, gl_FragCoord.z, gl_FragCoord.w) +#else +#define isf_FragCoord gl_FragCoord +#endif +)_"; + // Add the actual vert / frag code m_vertex += m_sourceVertex; m_fragment += fragWithoutISF; @@ -2392,6 +4333,9 @@ void parser::parse_raw_raster_pipeline() // Replace the special ISF stuff boost::replace_all(m_fragment, "gl_FragColor", "isf_FragColor"); boost::replace_all(m_fragment, "vv_Frag", "isf_Frag"); + + // Sanity-check ATTRIBUTES.TYPE references — see helper above. + validate_attribute_types(m_desc); } void parser::parse_shadertoy() @@ -2866,6 +4810,46 @@ void main(void) } // Helper function to escape JSON strings +// Serialize a sampler_config's non-empty fields as JSON key/value pairs +// onto `oss`, each prefixed with `", "`. Mirrors parse_sampler_config +// exactly so the JSON round-trip is lossless. Writes nothing when every +// field is at its default (empty strings, unset optionals). +static void emit_sampler_config(std::ostream& oss, const isf::sampler_config& s) +{ + auto esc = [](const std::string& x) { + std::string out; + out.reserve(x.size()); + for(char c : x) + { + if(c == '"' || c == '\\') { out += '\\'; out += c; } + else out += c; + } + return out; + }; + auto str_field = [&](const char* key, const std::string& val) { + if(!val.empty()) + oss << ", \"" << key << "\": \"" << esc(val) << "\""; + }; + auto float_field = [&](const char* key, const std::optional& val) { + if(val) oss << ", \"" << key << "\": " << *val; + }; + + str_field("WRAP", s.wrap); + str_field("WRAP_S", s.wrap_s); + str_field("WRAP_T", s.wrap_t); + str_field("WRAP_R", s.wrap_r); + str_field("FILTER", s.filter); + str_field("MIN_FILTER", s.min_filter); + str_field("MAG_FILTER", s.mag_filter); + str_field("MIPMAP_MODE", s.mipmap_mode); + str_field("BORDER_COLOR", s.border_color); + str_field("COMPARE", s.compare); + float_field("ANISOTROPY", s.anisotropy); + float_field("LOD_BIAS", s.lod_bias); + float_field("MIN_LOD", s.min_lod); + float_field("MAX_LOD", s.max_lod); +} + static auto escape_json(const std::string& str) -> std::string { std::string result; @@ -2926,6 +4910,24 @@ std::string parser::write_isf() const oss << "\n"; } oss << " ]"; + if(!m_desc.inputs.empty() || !m_desc.passes.empty() + || !m_desc.extensions.empty()) + oss << ","; + oss << "\n"; + } + + // Add extensions if present + if(!m_desc.extensions.empty()) + { + oss << " \"EXTENSIONS\": [\n"; + for(size_t i = 0; i < m_desc.extensions.size(); ++i) + { + oss << " \"" << escape_json(m_desc.extensions[i]) << "\""; + if(i + 1 < m_desc.extensions.size()) + oss << ","; + oss << "\n"; + } + oss << " ]"; if(!m_desc.inputs.empty() || !m_desc.passes.empty()) oss << ","; oss << "\n"; @@ -3037,6 +5039,8 @@ std::string parser::write_isf() const oss << ",\n \"DEFAULT\": [" << (*p.def)[0] << ", " << (*p.def)[1] << ", " << (*p.def)[2] << "]"; } + if(p.as_color) + oss << ",\n \"AS_COLOR\": true"; oss << "\n"; } @@ -3065,17 +5069,29 @@ std::string parser::write_isf() const oss << " \"TYPE\": \"image\""; if(img.depth) oss << ",\n \"DEPTH\": true"; + if(img.is_array) + oss << ",\n \"IS_ARRAY\": true"; + if(img.dimensions != 2) + oss << ",\n \"DIMENSIONS\": " << img.dimensions; + oss << "\n"; + } + void operator()(const cubemap_input& c) + { + oss << " \"TYPE\": \"cubemap\""; + if(c.depth) + oss << ",\n \"DEPTH\": true"; oss << "\n"; } - void operator()(const cubemap_input&) { oss << " \"TYPE\": \"cubemap\"\n"; } void operator()(const audio_input& a) { oss << " \"TYPE\": \"audio\""; if(a.max > 0) - { oss << ",\n \"MAX\": " << a.max; - } + if(!a.sampler.filter.empty()) + oss << ",\n \"FILTER\": \"" << escape_json(a.sampler.filter) << "\""; + if(!a.sampler.wrap.empty()) + oss << ",\n \"WRAP\": \"" << escape_json(a.sampler.wrap) << "\""; oss << "\n"; } @@ -3083,9 +5099,11 @@ std::string parser::write_isf() const { oss << " \"TYPE\": \"audioFFT\""; if(a.max > 0) - { oss << ",\n \"MAX\": " << a.max; - } + if(!a.sampler.filter.empty()) + oss << ",\n \"FILTER\": \"" << escape_json(a.sampler.filter) << "\""; + if(!a.sampler.wrap.empty()) + oss << ",\n \"WRAP\": \"" << escape_json(a.sampler.wrap) << "\""; oss << "\n"; } @@ -3093,9 +5111,11 @@ std::string parser::write_isf() const { oss << " \"TYPE\": \"audioHistogram\""; if(a.max > 0) - { oss << ",\n \"MAX\": " << a.max; - } + if(!a.sampler.filter.empty()) + oss << ",\n \"FILTER\": \"" << escape_json(a.sampler.filter) << "\""; + if(!a.sampler.wrap.empty()) + oss << ",\n \"WRAP\": \"" << escape_json(a.sampler.wrap) << "\""; oss << "\n"; } @@ -3104,6 +5124,12 @@ std::string parser::write_isf() const { oss << " \"TYPE\": \"storage\",\n"; oss << " \"ACCESS\": \"" << s.access << "\""; + if(!s.buffer_usage.empty()) + oss << ",\n \"BUFFER_USAGE\": \"" << escape_json(s.buffer_usage) << "\""; + if(s.persistent) + oss << ",\n \"PERSISTENT\": true"; + if(!s.visibility.empty() && s.visibility != "fragment") + oss << ",\n \"VISIBILITY\": \"" << escape_json(s.visibility) << "\""; if(!s.layout.empty()) { oss << ",\n \"LAYOUT\": [\n"; @@ -3121,13 +5147,41 @@ std::string parser::write_isf() const oss << "\n"; } + void operator()(const uniform_input& u) + { + oss << " \"TYPE\": \"uniform\",\n"; + oss << " \"LAYOUT\": [\n"; + for(std::size_t k = 0; k < u.layout.size(); ++k) + { + const auto& f = u.layout[k]; + oss << " { \"NAME\": \"" << escape_json(f.name) + << "\", \"TYPE\": \"" << escape_json(f.type) << "\" }"; + if(k + 1 < u.layout.size()) + oss << ","; + oss << "\n"; + } + oss << " ]"; + if(!u.visibility.empty() && u.visibility != "vertex+fragment") + oss << ",\n \"VISIBILITY\": \"" << escape_json(u.visibility) << "\""; + oss << "\n"; + } + void operator()(const texture_input&) { oss << " \"TYPE\": \"texture\"\n"; } void operator()(const csf_image_input& img) { oss << " \"TYPE\": \"image\",\n"; oss << " \"ACCESS\": \"" << img.access << "\",\n"; - oss << " \"FORMAT\": \"" << img.format << "\"\n"; + oss << " \"FORMAT\": \"" << img.format << "\""; + if(!img.visibility.empty() && img.visibility != "compute") + oss << ",\n \"VISIBILITY\": \"" << escape_json(img.visibility) << "\""; + if(img.persistent) + oss << ",\n \"PERSISTENT\": true"; + if(img.is_array) + oss << ",\n \"IS_ARRAY\": true"; + if(!img.layers_expression.empty()) + oss << ",\n \"LAYERS\": \"" << escape_json(img.layers_expression) << "\""; + oss << "\n"; } void operator()(const geometry_input& geo) @@ -3144,6 +5198,8 @@ std::string parser::write_isf() const try { std::stoi(geo.instance_count); oss << ",\n \"INSTANCE_COUNT\": " << geo.instance_count; } catch(...) { oss << ",\n \"INSTANCE_COUNT\": \"" << escape_json(geo.instance_count) << "\""; } } + if(!geo.format_id.empty()) + oss << ",\n \"FORMAT_ID\": \"" << escape_json(geo.format_id) << "\""; if(!geo.attributes.empty()) { oss << ",\n \"ATTRIBUTES\": [\n"; @@ -3168,14 +5224,20 @@ std::string parser::write_isf() const } oss << " ]"; } - if(!geo.auxiliary.empty()) + if(!geo.auxiliary.empty() || !geo.auxiliary_textures.empty()) { oss << ",\n \"AUXILIARY\": [\n"; - for(size_t i = 0; i < geo.auxiliary.size(); ++i) + const size_t nb = geo.auxiliary.size(); + const size_t nt = geo.auxiliary_textures.size(); + for(size_t i = 0; i < nb; ++i) { const auto& aux = geo.auxiliary[i]; oss << " {\"NAME\": \"" << escape_json(aux.name) << "\""; - if(!aux.access.empty()) + // TYPE: "uniform" for UBO-kind aux. SSBO kind omits TYPE — + // default parse dispatch lands there. + if(aux.is_uniform) + oss << ", \"TYPE\": \"uniform\""; + if(!aux.access.empty() && !aux.is_uniform) oss << ", \"ACCESS\": \"" << escape_json(aux.access) << "\""; if(!aux.size.empty()) { @@ -3196,7 +5258,53 @@ std::string parser::write_isf() const oss << "]"; } oss << "}"; - if(i < geo.auxiliary.size() - 1) + if(i < nb - 1 || nt > 0) + oss << ","; + oss << "\n"; + } + // Texture auxiliaries — identifying TYPE field so parse round- + // trips via aux_entry_is_texture. Full sampler_config fields + // are emitted via emit_sampler_config so WRAP/FILTER/COMPARE + // etc. round-trip losslessly. + for(size_t i = 0; i < nt; ++i) + { + const auto& atx = geo.auxiliary_textures[i]; + oss << " {\"NAME\": \"" << escape_json(atx.name) << "\""; + // TYPE field — reuse the specific storage_* variants so + // parse dispatch and re-emit stay symmetric. + if(atx.is_storage) + { + if(atx.is_cubemap && atx.is_array) + oss << ", \"TYPE\": \"storage_cube\""; // Note: no array-cube storage variant in current vocabulary + else if(atx.is_cubemap) + oss << ", \"TYPE\": \"storage_cube\""; + else if(atx.dimensions == 3) + oss << ", \"TYPE\": \"storage_3d\""; + else if(atx.is_array) + oss << ", \"TYPE\": \"storage_image_array\""; + else + oss << ", \"TYPE\": \"storage_image\""; + } + else if(atx.is_cubemap) + oss << ", \"TYPE\": \"cubemap\""; + else + oss << ", \"TYPE\": \"image\""; + if(atx.is_array && !atx.is_storage) + oss << ", \"IS_ARRAY\": true"; + if(atx.dimensions != 2 && !atx.is_storage) + oss << ", \"DIMENSIONS\": " << atx.dimensions; + if(atx.is_depth) + oss << ", \"DEPTH\": true"; + if(atx.is_storage) + { + if(!atx.format.empty() && atx.format != "rgba8") + oss << ", \"FORMAT\": \"" << escape_json(atx.format) << "\""; + if(!atx.access.empty() && atx.access != "read_write") + oss << ", \"ACCESS\": \"" << escape_json(atx.access) << "\""; + } + emit_sampler_config(oss, atx.sampler); + oss << "}"; + if(i < nt - 1) oss << ","; oss << "\n"; } @@ -3274,14 +5382,32 @@ std::string parser::write_isf() const try { std::stod(pass.height_expression); - oss << " \"HEIGHT\": " << pass.height_expression; + oss << " \"HEIGHT\": " << pass.height_expression << ",\n"; } catch(...) { - oss << " \"HEIGHT\": \"" << escape_json(pass.height_expression) << "\""; + oss << " \"HEIGHT\": \"" << escape_json(pass.height_expression) << "\",\n"; } } + if(!pass.z_expression.empty()) + { + try + { + std::stod(pass.z_expression); + oss << " \"Z\": " << pass.z_expression << ",\n"; + } + catch(...) + { + oss << " \"Z\": \"" << escape_json(pass.z_expression) << "\",\n"; + } + } + + if(!pass.format.empty()) + { + oss << " \"FORMAT\": \"" << escape_json(pass.format) << "\",\n"; + } + // Remove trailing comma if last property auto str = oss.str(); if(str.size() > 2 && str[str.size() - 2] == ',') @@ -3435,6 +5561,18 @@ void parser::parse_vsa() sampler_binding++; } } + else if(auto* cube = ossia::get_if(&val.data)) + { + if(cube->depth) + { + samplers += "layout(binding = "; + samplers += std::to_string(sampler_binding); + samplers += ") uniform samplerCube "; + samplers += val.name; + samplers += "_depth;\n"; + sampler_binding++; + } + } } else { @@ -3517,6 +5655,9 @@ void parser::parse_csf() // Add version m_fragment += "#version 450\n\n"; + // User-declared GLSL EXTENSIONS must come right after #version. + m_fragment += isf_emit_user_extensions(m_desc.extensions); + // Add standard ProcessUBO uniforms (same as ISF/VSA) m_fragment += GLSL45.defaultUniforms; m_fragment += "\n"; @@ -3527,34 +5668,37 @@ void parser::parse_csf() ", local_size_y = ISF_LOCAL_SIZE_Y" ", local_size_z = ISF_LOCAL_SIZE_Z) in;\n\n"; - // Generate struct definitions from TYPES section + // Generate struct definitions from TYPES section. + // + // No auto-padding: GLSL+std430 handles alignment based on actual member + // types (vec4 16B-aligned, float/uint 4B-aligned, struct rounds to its + // largest member). The previous "(4 - field_count % 4) % 4 trailing + // floats" heuristic was based on the field count modulo 4, completely + // unrelated to real alignment, and silently grew the struct stride + // when field_count wasn't a multiple of 4. RawLight (7 fields) became + // 68B → 80B std430-stride here while every rasterizer (graphics-path + // TYPES emitter has no such heuristic) and ScenePreprocessor's + // RawLight arena both use 64B stride — pack_lights_from_points writes + // landed at 80B intervals while the consumer rasterizer read at 64B + // intervals, garbling every slot past index 0 (the user's symptom: + // procedural light positions acting like colours, all lights piled up + // at the constant light_color value). Mirror the graphics-path + // emitter (isf_emit_types_struct) verbatim instead. if(!m_desc.types.empty()) { m_fragment += "// Struct definitions from TYPES section\n"; for(const auto& type_def : m_desc.types) { - m_fragment += "struct " + type_def.name + " \n{\n"; - + m_fragment += "struct " + type_def.name + " {\n"; for(const auto& field : type_def.layout) { auto bracket = field.type.find('['); if(bracket != std::string::npos) - m_fragment += " " + field.type.substr(0, bracket) + " " + field.name + m_fragment += " " + field.type.substr(0, bracket) + " " + field.name + field.type.substr(bracket) + ";\n"; else - m_fragment += " " + field.type + " " + field.name + ";\n"; - } - - // Add padding calculation for struct alignment - // This is a simplified approach - proper padding would require more complex size calculations - int field_count = type_def.layout.size(); - int padding_needed - = (4 - (field_count % 4)) % 4; // Simple 16-byte alignment padding - for(int i = 0; i < padding_needed; i++) - { - m_fragment += " float pad" + std::to_string(i) + ";\n"; + m_fragment += " " + field.type + " " + field.name + ";\n"; } - m_fragment += "};\n\n"; } } @@ -3678,6 +5822,20 @@ void parser::parse_csf() } } } + else if(auto* storage = ossia::get_if(&inp.data)) + { + // A writable storage buffer whose LAYOUT ends in a flexible-array + // member gets a synthesized host-side size int (see ISFVisitors / + // RenderedCSFNode). Declare it here so this std140 block matches the + // packed material blob; otherwise every uniform after it reads shifted. + if(storage->access.find("write") != std::string::npos + && !storage->layout.empty() + && storage->layout.back().type.find("[]") != std::string::npos) + { + k++; + material_block += " int " + inp.name + "_size;\n"; + } + } } material_block += "};\n\n"; @@ -3736,6 +5894,7 @@ void parser::parse_csf() // Generate resource bindings m_fragment += "// From RESOURCES - bindings assigned automatically\n"; + bool emitted_indirect_struct = false; for(const auto& inp : m_desc.inputs) { if(auto* storage_ptr = ossia::get_if(&inp.data)) @@ -3772,34 +5931,50 @@ void parser::parse_csf() { const auto& img = *img_ptr; - m_fragment += "layout(binding = " + std::to_string(binding); + // Emit the primary image binding, then — if persistent — emit a + // readonly `_prev` alias at the following slot. The runtime + // ping-pongs between two textures and swaps pointers each frame so + // the shader sees current-frame writes on `` and the previous + // frame's state on `_prev`. + auto emit_image = [&](int b, const std::string& decl_name, bool alias_prev) { + m_fragment += "layout(binding = " + std::to_string(b); - // Add format qualifier - if(!img.format.empty()) - { - std::string format = img.format; - boost::algorithm::to_lower(format); - m_fragment += ", " + format; - } - else - { - m_fragment += ", rgba8"; // Default format - } + if(!img.format.empty()) + { + std::string format = img.format; + boost::algorithm::to_lower(format); + m_fragment += ", " + format; + } + else + { + m_fragment += ", rgba8"; // Default format + } - m_fragment += ") "; + m_fragment += ") "; - // Add access qualifiers - if(img.access == "read_only") - m_fragment += "readonly "; - else if(img.access == "write_only") - m_fragment += "writeonly "; - else - m_fragment += "restrict "; + if(alias_prev || img.access == "read_only") + m_fragment += "readonly "; + else if(img.access == "write_only") + m_fragment += "writeonly "; + else + m_fragment += "restrict "; - auto prefix = glsl_type_prefix(img.format); - m_fragment += "uniform " + prefix + (img.is3D() ? "image3D " : "image2D "); - m_fragment += inp.name + ";\n"; + auto prefix = glsl_type_prefix(img.format); + const char* shape = "image2D"; + if(img.isCube()) shape = "imageCube"; + else if(img.is3D()) shape = "image3D"; + else if(img.is_array) shape = "image2DArray"; + m_fragment += "uniform " + prefix + shape + " "; + m_fragment += decl_name + ";\n"; + }; + + emit_image(binding, inp.name, /*alias_prev=*/false); binding++; + if(img.persistent) + { + emit_image(binding, inp.name + "_prev", /*alias_prev=*/true); + binding++; + } } else if(auto* tex_ptr = ossia::get_if(&inp.data)) { @@ -3809,6 +5984,11 @@ void parser::parse_csf() m_fragment += inp.name + ";\n"; binding++; } + else if(auto* uni_ptr = ossia::get_if(&inp.data)) + { + m_fragment += isf_emit_ubo_decl(binding, inp.name, *uni_ptr); + binding++; + } else if(auto* geo_ptr = ossia::get_if(&inp.data)) { const auto& geo = *geo_ptr; @@ -3816,6 +5996,26 @@ void parser::parse_csf() m_fragment += "// Geometry input \"" + inp.name + "\" — SoA: one SSBO per attribute\n"; m_fragment += "#define ISF_READ(geo, attr) geo ## _ ## attr ## _in\n"; m_fragment += "#define ISF_WRITE(geo, attr) geo ## _ ## attr ## _out\n"; + // Nested-aux structured-SSBO/UBO instance access. Resolves to the + // instance name emitted by the SSBO/UBO block below — bare aux name + // when there's no cross-geometry collision, prefixed otherwise. + // Use this instead of writing `scene_cluster_aabbs.data[...]` by + // hand: the macro keeps shaders working if the same aux name later + // appears in another geometry input and forces a name collision + // (the SSBO emitter switches to the prefixed instance name then). + m_fragment += "#define ISF_AUX(geo, name) geo ## _ ## name\n"; + // Nested-aux image access (storage images: read_only / write_only / + // read_write). For images there's no _in / _out distinction at the + // GLSL level — the same identifier carries the full access mode + // determined by the layout qualifier. Same one-name-per-image + // contract applies via the alias #define emitted in the texture + // block below. + m_fragment += "#define ISF_IMG(geo, name) geo ## _ ## name\n"; + // Nested-aux sampler access (read-only sampled textures with + // texture()/textureLod()/etc.). Symmetric to ISF_IMG — separate + // macro because the GLSL type differs (samplerXY vs imageXY) and + // future shaders may want to grep for usage independently. + m_fragment += "#define ISF_TEX(geo, name) geo ## _ ## name\n"; for(const auto& attr : geo.attributes) { @@ -3873,16 +6073,24 @@ void parser::parse_csf() const bool collides = colliding_aux_names.count(aux.name) > 0; const std::string instance_name = collides ? aux_prefix : aux.name; - m_fragment += "layout(binding = " + std::to_string(binding) + ", std430) "; - - if(aux.access == "read_only") - m_fragment += "readonly "; - else if(aux.access == "write_only") - m_fragment += "writeonly "; + if(aux.is_uniform) + { + // std140 UBO: no access qualifier, `uniform` not `buffer`. + m_fragment += "layout(std140, binding = " + std::to_string(binding) + ") uniform "; + } else - m_fragment += "restrict "; + { + m_fragment += "layout(binding = " + std::to_string(binding) + ", std430) "; + if(aux.access == "read_only") + m_fragment += "readonly "; + else if(aux.access == "write_only") + m_fragment += "writeonly "; + else + m_fragment += "restrict "; + m_fragment += "buffer "; + } - m_fragment += "buffer " + aux_prefix + "_buf {\n"; + m_fragment += aux_prefix + "_buf {\n"; for(const auto& field : aux.layout) { // Handle array types: "vec4[512]" → "vec4 entries[512];" @@ -3899,14 +6107,17 @@ void parser::parse_csf() } m_fragment += "} " + instance_name + ";\n"; - // Generate ISF_READ/ISF_WRITE-compatible aliases - if(aux.access == "read_only") + // Generate ISF_READ/ISF_WRITE-compatible aliases. UBOs are always + // read-only from GLSL's perspective (the `access` field is ignored + // for UBO kind), so only the `_in` / unqualified aliases exist. + const std::string eff_access = aux.is_uniform ? "read_only" : aux.access; + if(eff_access == "read_only") { m_fragment += "#define " + aux_prefix + "_in " + instance_name + "\n"; if(!collides) m_fragment += "#define " + aux_prefix + " " + instance_name + "\n"; } - else if(aux.access == "write_only") + else if(eff_access == "write_only") { m_fragment += "#define " + aux_prefix + "_out " + instance_name + "\n"; if(!collides) @@ -3916,12 +6127,110 @@ void parser::parse_csf() { m_fragment += "#define " + aux_prefix + "_in " + instance_name + "\n"; m_fragment += "#define " + aux_prefix + "_out " + instance_name + "\n"; + if(!collides) + m_fragment += "#define " + aux_prefix + " " + instance_name + "\n"; } m_fragment += "\n"; binding++; } + // Auxiliary textures (travel with the geometry; resolved by the + // renderer from ossia::geometry::auxiliary_textures by name). + // RenderedCSFNode binds them right after aux SSBOs in the compute + // SRB build loop — order here must match that order. + // + // Each texture is emitted under its bare aux name (e.g. + // `voxel_grid`) — same convention as the structured-SSBO/UBO block + // above when there's no name collision. A `#define + // _ ` alias is also emitted so author shaders can + // use either the prefixed form directly OR the ISF_IMG / + // ISF_TEX macros (which expand to `geo ## _ ## aux`). Keeps + // image-aux access symmetric with SSBO/UBO-aux access. + for(const auto& atx : geo.auxiliary_textures) + { + const std::string aux_prefix = inp.name + "_" + atx.name; + const bool aliased = (aux_prefix != atx.name); + + if(atx.is_storage) + { + // Cube-arrays are parser-rejected so no imageCubeArray branch. + const char* image_type = "image2D"; + if(atx.is_cubemap) image_type = "imageCube"; + else if(atx.dimensions == 3) image_type = "image3D"; + else if(atx.is_array) image_type = "image2DArray"; + + const char* access_q = + (atx.access == "read_only") ? "readonly " : + (atx.access == "write_only") ? "writeonly " : ""; + + // Integer formats (r32ui, r32i, …) require uimage*/iimage*. + std::string scalar_prefix = isf_glsl_type_prefix(atx.format); + + m_fragment += "layout(binding = " + std::to_string(binding) + + ", " + atx.format + ") uniform " + access_q + + scalar_prefix + image_type + " " + + atx.name + ";\n"; + if(aliased) + m_fragment += "#define " + aux_prefix + " " + atx.name + "\n"; + binding++; + } + else + { + const bool cmp = isf_is_comparison_sampler(atx.sampler); + const char* sampler_type = "sampler2D"; + // Cube-arrays (samplerCubeArray) are parser-rejected — no QRhi + // backend plumbs CubeMap|TextureArray views correctly. + if(atx.is_cubemap) + sampler_type = cmp ? "samplerCubeShadow" : "samplerCube"; + else if(atx.dimensions == 3) + sampler_type = "sampler3D"; + else if(atx.is_array) + sampler_type = cmp ? "sampler2DArrayShadow" : "sampler2DArray"; + else + sampler_type = cmp ? "sampler2DShadow" : "sampler2D"; + + m_fragment += "layout(binding = " + std::to_string(binding) + + ") uniform " + sampler_type + " " + atx.name + ";\n"; + if(aliased) + m_fragment += "#define " + aux_prefix + " " + atx.name + "\n"; + binding++; + + if(atx.is_depth && !atx.is_cubemap && atx.dimensions != 3 && !atx.is_array) + { + m_fragment += "layout(binding = " + std::to_string(binding) + + ") uniform sampler2D " + atx.name + "_depth;\n"; + if(aliased) + m_fragment += "#define " + aux_prefix + "_depth " + + atx.name + "_depth\n"; + binding++; + } + } + } + + // Indirect draw command buffer (user-writable SSBO) + if(geo.indirect) + { + if(!emitted_indirect_struct) + { + m_fragment += "struct DrawIndirectCommand {\n" + " uint vertexCount;\n" + " uint instanceCount;\n" + " uint firstVertex;\n" + " int baseVertex;\n" + " uint firstInstance;\n" + "};\n\n"; + emitted_indirect_struct = true; + } + const std::string buf_name = inp.name + "_indirect"; + m_fragment += "layout(binding = " + std::to_string(binding) + ", std430) " + "restrict buffer " + buf_name + "_buf {\n" + " DrawIndirectCommand " + buf_name + "[];\n" + "};\n"; + m_fragment += "#define ISF_INDIRECT(" + inp.name + ") " + buf_name + "\n\n"; + binding++; + } + // Element count uniform (packed into the material UBO or standalone) m_fragment += "// Element count for geometry input \"" + inp.name + "\"\n"; m_fragment += "// (set by the renderer from ossia::geometry::vertices)\n"; @@ -3944,6 +6253,11 @@ void parser::parse_csf() // Add the user's compute shader code (without the JSON header) boost::algorithm::trim(compWithoutCSF); m_fragment += compWithoutCSF; + + // Sanity-check: every ATTRIBUTES.TYPE references a real GLSL built-in + // or a TYPES entry. Throws invalid_file with the offending name on + // miss — surfaces typos at parse time. + validate_attribute_types(m_desc); } descriptor::Mode parser::mode() const diff --git a/src/plugins/score-plugin-gfx/3rdparty/libisf/src/isf.hpp b/src/plugins/score-plugin-gfx/3rdparty/libisf/src/isf.hpp index dd0ff5f4ec..6aae6b8ff9 100644 --- a/src/plugins/score-plugin-gfx/3rdparty/libisf/src/isf.hpp +++ b/src/plugins/score-plugin-gfx/3rdparty/libisf/src/isf.hpp @@ -34,13 +34,25 @@ struct long_input using has_minmax = std::true_type; std::vector> values; std::vector labels; - std::size_t def{}; // index of default value (enum mode) or default value (numeric mode) + + // Enum mode (values/labels non-empty): `def` is the INDEX into `values`. + // Numeric mode (values empty, min/max set): `def` is the default VALUE. + // + // The shader always receives the selected numeric VALUE from `values[i]` + // (for int/double entries) or the INDEX (for string-only VALUES, since + // GLSL can't consume strings). The renderer's UBO-init path resolves this + // index→value step so the initial shader state matches what arrives after + // any user interaction — see ISFNode.cpp / GeometryFilterNode.cpp long_input + // port visitors. + std::size_t def{}; // Numeric mode: when values/labels are empty and min/max are set, - // create an IntSpinBox instead of a ComboBox. + // create an IntSpinBox instead of a ComboBox. In that mode `def` is the + // default value directly (not an index). std::optional min; std::optional max; }; + struct float_input { using value_type = double; @@ -66,6 +78,12 @@ struct point3d_input std::optional def{}; std::optional min{}; std::optional max{}; + + // AS_COLOR: hint to the UI that this vec3 should be shown as a color + // swatch (RGB picker) rather than three spin boxes. Useful for e.g. + // direction-as-RGB visualisations where editing components individually + // is awkward. Does not affect the GLSL type (still vec3). + bool as_color{false}; }; struct color_input @@ -77,29 +95,133 @@ struct color_input std::optional max{}; }; +// Sampler configuration fields shared by image/texture/cubemap inputs. +// All fields are optional: empty/unset string keeps the current default. +// Address modes accept: "repeat", "clamp_to_edge"/"clamp", "mirror"/"mirrored_repeat", +// "mirror_once"/"mirror_clamp_to_edge". +// Filter modes accept: "nearest", "linear" (and "none" for mipmap_mode). +// Border color accepts: "transparent_black"/"transparent", "opaque_black", "opaque_white". +// Compare op accepts: "never", "less", "less_equal"/"lequal", "equal", +// "greater", "greater_equal"/"gequal", "not_equal"/"neq", "always". +// When set (and not "never") a comparison sampler is created and +// the GLSL type becomes sampler*Shadow. Supported on 2D, +// 2D-array, cubemap (image/texture/cubemap inputs) and +// cubemap-array (AUXILIARY only). Silently dropped with a +// stderr warning on 3D inputs (sampler3DShadow is not a core +// GLSL type) — use a 2D / 2D-array / cube shadow instead. +// With the engine's reverse-Z convention, the typical +// compare op for a standard "shadowed if closer" test is +// "greater_equal" (not "less_equal"). +struct sampler_config +{ + std::string wrap; // Applied to all 3 axes if individual WRAP_S/T/R unset + std::string wrap_s; + std::string wrap_t; + std::string wrap_r; + std::string filter; // Applied to both min and mag if individual MIN/MAG_FILTER unset + std::string min_filter; + std::string mag_filter; + std::string mipmap_mode; + std::optional anisotropy; + std::string border_color; + std::optional lod_bias; + std::optional min_lod; + std::optional max_lod; + std::string compare; // empty / "never" = no comparison sampler +}; + struct image_input { - int dimensions{2}; // 2 or 3 - bool depth{false}; // true = shader wants sampleable depth on this input + int dimensions{2}; // 2 or 3 + bool depth{false}; // true = shader wants sampleable depth on this input + bool is_array{false}; // true = sampler2DArray rather than sampler2D + // STATIC: producer publishes a long-lived QRhiTexture that downstream binds + // directly; engine skips the consumer-side render-target allocation. Use for + // precomputed LUTs, IBL bakes, asset caches — anything where the upstream + // is a CPU producer (avnd gpu_texture_output, etc.) rather than an ISF / + // raster pass that draws into the consumer's RT each frame. Orthogonal to + // dimensions / is_array (cube + 3D + array inputs already grab from source + // implicitly because they can't be 2D color attachments anyway). + bool is_static{false}; + sampler_config sampler; }; struct cubemap_input { + // DEPTH: true = request a sampleable depth cube alongside the color cube. + // Mirrors image_input::depth: pairs the main `samplerCube` (or + // `samplerCubeShadow` under COMPARE) with a `samplerCube _depth` + // companion for raw depth reads. Useful for omni-directional scene probes + // where the upstream provides both a colour cube and its depth cube. + // For plain shadow-cube sampling (HW PCF only) set COMPARE instead and + // leave DEPTH false — the texture already has to be depth-format for the + // compare sampler to return meaningful values. + // + // Note: cube-arrays (samplerCubeArray) are intentionally NOT exposed. No + // QRhi backend (Vulkan/D3D12/Metal/GL) constructs a cube-array view + // correctly from the CubeMap | TextureArray flag combination, so the + // shader-side type would always disagree with the bound resource. Bind N + // individual cubemap inputs instead, or decompose to a sampler2DArray + // with face math in the shader. + bool depth{false}; + sampler_config sampler; +}; + +// Sampler state accepted by all audio input flavours. Reuses the same +// string vocabulary as sampler_config (see above) — any unrecognised or +// empty string keeps the built-in default (linear / clamp_to_edge). Full +// sampler_config is overkill here: audio textures are 1-mip 2D samplers +// with no COMPARE / BORDER_COLOR / LOD semantics, so only FILTER and WRAP +// are honoured. Nearest filtering is the common ask for band-exact FFT +// reads where linear interpolation would smear adjacent bins. +struct audio_sampler_config +{ + std::string filter; // "nearest" or "linear" (default) + std::string wrap; // "repeat", "clamp_to_edge"/"clamp", "mirror"/"mirrored_repeat" }; struct audio_input { int max{}; + audio_sampler_config sampler; }; struct audioFFT_input { int max{}; + audio_sampler_config sampler; }; struct audioHist_input { int max{}; + audio_sampler_config sampler; +}; + +// UBO-style input declared in INPUTS as `"TYPE": "uniform"`. +// +// Emitted as `layout(std140, binding=N) uniform _t { ... } ;` +// and bound via QRhiShaderResourceBinding::uniformBuffer (not bufferLoad). +// +// Use for small (≤ MaxUniformBufferRange, typically 16KB), read-only data +// like cameras, light/material counts, indexing constants. For larger or +// writable data, use `storage_input` (SSBO) instead. +struct uniform_input +{ + // Reuse storage_input's layout_field shape via full struct definition here + // to keep the type self-contained. + struct layout_field + { + std::string name; + std::string type; + }; + + std::vector layout; + + // VISIBILITY: which shader stage(s) see this binding in a graphics pipeline. + // Accepted values: "vertex+fragment"/"both" (default), "fragment", "vertex", + // "compute" (implicit for CSF). + std::string visibility{"vertex+fragment"}; }; // CSF-specific input types @@ -116,11 +238,22 @@ struct storage_input std::vector layout; std::string buffer_usage; // "", "indirect_draw", "indirect_draw_indexed" + + // PERSISTENT: creates a ping-pong pair of SSBOs swapped each frame. + // In GLSL, `name` is the current (read-write) buffer, `name_prev` is the + // previous frame's read-only buffer. + bool persistent{false}; + + // VISIBILITY: which shader stage(s) see this binding in a graphics pipeline. + // Accepted values: "fragment" (default), "vertex", "vertex+fragment"/"both", + // "compute" (implicit for CSF), "none" (no shader binding). + std::string visibility{"fragment"}; }; struct texture_input { int dimensions{2}; // 2 or 3 + sampler_config sampler; }; struct csf_image_input @@ -134,7 +267,45 @@ struct csf_image_input int dimensions{2}; // 2 or 3 (alternative to depth_expression for declaring 3D) + // Set internally when the RESOURCES entry uses TYPE: "image_cube". + // Writable cubemap (imageCube in GLSL, QRhiTexture::CubeMap | + // UsedWithLoadStore). Width must equal height (face edge length). Use for + // in-compute reflection-probe baking, environment IBL, etc. Read-only + // sampling of the same data is done via TYPE: "cubemap". + bool cubemap{false}; + + // IS_ARRAY: writable 2D texture array (image2DArray in GLSL, allocated + // via QRhi::newTextureArray + UsedWithLoadStore). Layer count comes from + // layers_expression (LAYERS: "$USER" / literal). Useful for shadow + // cascades, layered G-buffers, compute-written texture atlases. + // + // Cube-arrays (imageCubeArray) are intentionally NOT supported: no QRhi + // backend plumbs CubeMap | TextureArray views correctly, and the shader- + // side type would disagree with the bound resource. The parser rejects + // is_array + cubemap combinations with a stderr warning. + bool is_array{false}; + std::string layers_expression; // LAYERS: expression for arraySize, may contain $USER + + // VISIBILITY: which shader stage(s) see this binding. + // Accepted: "compute" (default), "fragment", "vertex", "vertex+fragment"/"both". + std::string visibility{"compute"}; + + // PERSISTENT: creates a ping-pong pair of images swapped each frame. + // In GLSL, `` is the current (write or read_write) image and + // `_prev` is the previous frame's read-only image — mirrors the + // storage_input convention. Works for both 2D and 3D images. + bool persistent{false}; + + // GENERATE_MIPS: when true, the runtime runs QRhi's generateMips() on + // this image after every frame's compute dispatches complete, so + // downstream samplers with MIPMAP_MODE: linear / nearest see a valid + // mip chain instead of zero-filled upper levels. Ignored for 3D images, + // cubemaps, and 2D arrays where generateMips semantics differ across + // QRhi backends (per-face / per-layer / per-slice). + bool generate_mips{false}; + bool is3D() const noexcept { return dimensions == 3 || !depth_expression.empty(); } + bool isCube() const noexcept { return cubemap; } }; // CSF geometry port input: SoA layout, one SSBO per attribute. @@ -164,27 +335,101 @@ struct geometry_input std::optional forward; }; - // Structured SSBOs that travel with the geometry (matched by name - // against ossia::geometry::auxiliary_buffer entries). + // Structured buffers that travel with the geometry (matched by name + // against ossia::geometry::auxiliary_buffer entries). Default kind is + // SSBO (`layout(std430) buffer`); set `is_uniform = true` to declare a + // std140 UBO instead (`layout(std140) uniform`). struct auxiliary_request { std::string name; std::string access; // "read_only", "write_only", "read_write" + // (meaningful for SSBO kind only; UBO is always read-only from GLSL) std::vector layout; std::string size; // expression for flexible array count, may contain $USER + // (SSBO only; UBOs require fixed-size layouts per std140) // If set, this auxiliary is forwarded from another geometry's upstream. std::optional forward; + + // Raw-raster only: when true the node owns a ping-pong pair of buffers + // (allocated from the LAYOUT + SIZE) that are swapped each frame, and + // the auxiliary is NOT resolved from upstream geometry. In GLSL, + // `` is the current (writable) buffer, `_prev` is the + // previous frame's read-only buffer. Useful for temporal accumulation + // / history buffers that live only in the rendering node. + // (SSBO only; persistent ping-pong makes no sense for read-only UBOs.) + bool persistent{false}; + + // When true, declare/bind this auxiliary as a std140 uniform block + // (`layout(std140, binding=N) uniform name_t { … } name;`) and bind + // with QRhiShaderResourceBinding::uniformBuffer. When false (default), + // it's an std430 SSBO. The upstream geometry's + // ossia::geometry::auxiliary_buffer is kind-agnostic — the shader's + // declaration alone determines how the buffer is bound. + bool is_uniform{false}; + }; + + // Texture variant of auxiliary: resolved from ossia::geometry::auxiliary_textures + // by name, no score input port. Declared in the top-level AUXILIARY array + // with TYPE: "image" / "texture" / "cubemap". Unlike regular INPUTS + // textures, does not create an input port — the texture handle travels + // bundled with the geometry (e.g. ScenePreprocessor ships `base_color_array` + // / `skybox` / `shadow_atlas`). + struct auxiliary_texture_request + { + std::string name; + int dimensions{2}; // 2 or 3 + bool is_array{false}; // sampler2DArray when true + bool is_cubemap{false};// samplerCube when true + bool is_depth{false}; // sampleable depth (promotes comparison when cfg set) + // Storage-image kind: emit `image2D/3D/Cube/Array` with imageLoad/ + // imageStore semantics instead of `sampler2D/…` with texture(). Set + // by TYPE: "storage_image" in the AUXILIARY JSON. Paired with: + // - `format`: GLSL layout qualifier (e.g. "rgba8", "r32f", "rgba16f"). + // - `access`: "read_only" / "write_only" / "read_write", controlling + // imageLoad / imageStore / imageLoadStore binding type + the + // GLSL `readonly`/`writeonly` decoration. + bool is_storage{false}; + std::string format{"rgba8"}; // only meaningful when is_storage + std::string access{"read_write"}; // only meaningful when is_storage + + // Sizing expressions for write_only / read_write storage images. Same + // convention as csf_image_input (top-level INPUTS images): an integer + // literal or a `$variable` reference resolved against the shader's + // long/float input ports + the standard $WIDTH/$HEIGHT/$DEPTH/$LAYERS + // family. Empty → engine falls back to renderer state (renderSize for + // 2D, voxel-resolution heuristics for 3D). When the engine + // auto-allocates a writable nested-aux storage image, these strings + // drive its dimensions; for sampled (read-only) entries they're + // ignored — the texture comes from the upstream producer at whatever + // size that producer baked. + std::string width_expression; + std::string height_expression; + std::string depth_expression; // 3rd dimension for 3D textures + std::string layers_expression; // array slice count for 2D arrays + + sampler_config sampler; }; std::vector attributes; std::vector auxiliary; + std::vector auxiliary_textures; std::string vertex_count; // expression string, may contain $USER std::string instance_count; // expression string, may contain $USER - bool indirect_draw{false}; // compute shader writes draw args to an indirect buffer - std::string indirect_draw_type; // "draw" (default) or "draw_indexed" + // Optional format identity stamped onto the consumer geometry's + // filter_tag (rapidhash truncated to 32 bits). Only meaningful on + // RESOURCES of TYPE: geometry used as outputs (geoOut). Empty leaves + // filter_tag at 0 (the "untagged" sentinel) — no routing change for + // CSFs that don't author an output format. + std::string format_id; + + struct indirect_request + { + std::string count; // expression string (same resolver as vertex_count) + }; + std::optional indirect; }; struct input @@ -193,7 +438,7 @@ struct input float_input, long_input, event_input, bool_input, color_input, point2d_input, point3d_input, image_input, cubemap_input, audio_input, audioFFT_input, audioHist_input, storage_input, texture_input, csf_image_input, - geometry_input>; + geometry_input, uniform_input>; std::string name; std::string label; @@ -290,10 +535,53 @@ struct vertex_attribute int location{}; attribute_type type{}; std::string name; + + // Optional explicit ossia attribute_semantic name ("position", "velocity", + // "texcoord0", ..., "custom"). Only meaningful on `vertex_input` (raw + // raster), where it controls how the runtime matches the declared input + // to an upstream geometry attribute — same lookup algorithm as CSF + // attribute_request. When empty, the parser implicitly uses `name` as the + // semantic key. Set to "custom" to force exact-name matching against + // custom attributes. + std::string semantic; + + // Interpolation qualifier (only applicable to vertex_output / fragment_input). + // Allowed: "smooth" (default), "flat", "noperspective", "centroid", "sample". + // "sample" forces per-sample fragment shading on this varying — the fragment + // shader runs once per MSAA sample for that coverage. Required when MSAA + // outputs need per-sample correct interpolation (specular highlights, + // normal-mapped surfaces). Empty string = default smooth. + std::string interpolation; }; struct vertex_input : vertex_attribute { + // When false, the raw-raster renderer tolerates an upstream geometry that + // does not carry a matching attribute: instead of failing the pipeline + // build, it synthesises a tiny PerInstance step_rate=1 buffer filled with + // a neutral "identity" value (zero for translation, white for color, 1 + // for roughness, etc.) and binds that in place of the missing upstream + // attribute. Lets a single shader cover both instanced and non-instanced + // upstreams without per-shape variants. + // + // When false AND `default_val` is set, those explicit numbers are used + // verbatim (after component-truncation / zero-padding against the + // declared TYPE). When false AND `default_val` is empty, the runtime + // looks the semantic up in a built-in whitelist (see + // score::gfx::vertexFallbackDefault) — non-whitelisted semantics without + // an explicit DEFAULT are rejected at pipeline-build time with a clear + // error to avoid silently-wrong rendering. + // + // When true (default), the upstream geometry MUST provide the attribute + // or the pipeline build fails — existing strict behaviour. + bool required{true}; + + // Explicit DEFAULT numbers from the JSON header. Stored as doubles for + // JSON fidelity; converted to the runtime format (float / int) at + // buffer-build time. Empty = use the whitelist neutral (see `required`). + // Length is not pre-validated against TYPE here — the runtime truncates + // or zero-pads to match the declared GLSL type width. + std::vector default_val; }; struct vertex_output : vertex_attribute { @@ -305,6 +593,92 @@ struct fragment_output : vertex_attribute { }; +// --- Pipeline state control (PIPELINE_STATE descriptor key) --------------- +// +// All fields are optional (std::optional): missing = keep current/legacy +// default. Two instances live in `descriptor`: a global `default_state` +// (from PIPELINE_STATE), and a per-pass `override_state` that merges on top. + +struct blend_attachment +{ + bool enable{false}; + std::string src_color{"src_alpha"}; + std::string dst_color{"one_minus_src_alpha"}; + std::string op_color{"add"}; + std::string src_alpha{"one"}; + std::string dst_alpha{"one_minus_src_alpha"}; + std::string op_alpha{"add"}; + std::string color_write{"rgba"}; // "rgba", "rgb", "r", ... +}; + +struct stencil_op_state +{ + std::string fail_op{"keep"}; + std::string depth_fail_op{"keep"}; + std::string pass_op{"keep"}; + std::string compare_op{"always"}; +}; + +struct pipeline_state +{ + std::optional depth_test; + std::optional depth_write; + std::optional depth_compare; // "less", "less_equal", "greater", ... + std::optional depth_bias; + std::optional slope_scaled_depth_bias; + + std::optional cull_mode; // "none", "front", "back" + std::optional front_face; // "ccw", "cw" + std::optional polygon_mode;// "fill", "line" + std::optional line_width; + + // Procedural-draw override (Vertex Shader Art style). When + // `vertex_count` is set, the renderer issues a single + // cb.draw(vertex_count, instance_count, 0, 0) and ignores the + // incoming geometry's index / indirect buffers entirely. The vertex + // shader drives positions purely from gl_VertexIndex + + // gl_InstanceIndex. Use cases: + // - Fullscreen passes: VERTEX_COUNT=3, TOPOLOGY=triangles (skybox). + // - VSA-style plasma / curves: VERTEX_COUNT=10000, + // TOPOLOGY=line_strip. + // - Procedural particle grids: VERTEX_COUNT=65536, TOPOLOGY=points. + // + // Safety: if VERTEX_INPUTS is non-empty (the shader declares vertex + // attribute reads), the renderer clamps vertex_count to the incoming + // geometry's vertex_count to avoid reading past buffer ends. Shaders + // that rely purely on gl_VertexIndex should declare an empty + // `VERTEX_INPUTS: []` so the pipeline is built with no vertex + // bindings and the draw count is used verbatim. + std::optional vertex_count; + std::optional instance_count; + // Topology override. When unset, the incoming geometry's topology is + // used. Values: "triangles", "triangle_strip", "triangle_fan", + // "lines", "line_strip", "points". + std::optional topology; + + // Blending: either a single state applied to all color attachments, or a + // per-attachment vector. If both are present the per-attachment wins. + std::optional blend_all; + std::vector blend_per_attachment; + + // Stencil (optional) + std::optional stencil_test; + std::optional stencil_read_mask; + std::optional stencil_write_mask; + std::optional stencil_front; + std::optional stencil_back; + + // Variable-rate shading (VRS). + // "SHADING_RATE": [w, h] — per-draw shading rate where w,h ∈ {1, 2, 4}. + // [1,1] = 1×1 (full rate, default). + // [2,2] = 1 invocation per 2×2 pixel block. + // [4,4] = 1 per 4×4 block. + // Combined with a shading-rate map (set on the render target) the actual + // rate is the per-draw rate combined with the per-tile rate via the chosen + // combiner op. Requires QRhi::Feature::VariableRateShading (Vulkan, D3D12). + std::optional> shading_rate; +}; + struct pass { std::string target; @@ -313,12 +687,85 @@ struct pass bool nearest_filter{}; std::string width_expression{}; std::string height_expression{}; + + // Render to a specific layer of a texture-array output (-1 = layer 0). + int layer{-1}; + + // Render to a specific Z-slice of a 3D output. Expression string so the + // slice can be computed from inputs (e.g. "$USER_slice"). Empty = slice 0 + // when the target is 3D, or irrelevant when 2D. + std::string z_expression{}; + + // Optional format override for the intermediate render target of this + // pass (e.g. "rgba16f" for precision-sensitive blur stages). Empty = use + // FLOAT: true mapping (rgba32f / rgba8) as before. + std::string format{}; + + // Per-pass pipeline state overrides (merged with descriptor.default_state). + pipeline_state override_state; }; struct output_declaration { std::string name; // User-chosen name (e.g. "color", "sceneDepth") std::string type; // "color" (default) or "depth" + + // LAYERS: >1 allocates a texture array with this many layers. + int layers{1}; + + // DEPTH: >1 allocates a 3D texture of this depth. Mutually exclusive with + // LAYERS (a ThreeDimensional texture is not a TextureArray). A fragment + // PASSES entry with Z renders into a single Z-slice via a color attachment + // with setLayer(z). + int depth{1}; + + // FORMAT: optional explicit texture format ("rgba8", "rgba16f", "r32f", "d32f", ...). + // Empty = use the default (RGBA8 for color, D32F for depth). + std::string format; + + // SAMPLES: MSAA sample count (1, 2, 4, 8, 16, 32, 64). 1 = no MSAA (default). + // The renderer allocates an MSAA texture and inserts an automatic resolve + // pass when downstream consumers expect a non-MSAA input. Each declared + // OUTPUT can have its own sample count; the depth attachment for a colour + // OUTPUT inherits the same sample count. + int samples{1}; + + // CUBEMAP: when true the output is allocated with the QRhi cubemap flag + // so downstream consumers can bind it as a samplerCube. Implies + // `layers == 6` on allocation even when the shader didn't set LAYERS + // explicitly. Used by the IBL precompute path (irradiance_convolve, + // prefilter_ggx) together with MULTIVIEW:6. + bool is_cubemap{false}; + + // GENERATE_MIPS: when true the runtime calls generateMips() on this + // output's texture after the render pass completes, auto-averaging + // the base level into a full mip chain. Implies the QRhi + // `MipMapped` + `UsedWithGenerateMips` flags on allocation. Use this + // for "source-data" targets whose base level is authored by the + // fragment shader and whose sub-mips should be GPU-filtered (skybox + // converter, base color textures, SSAO LUTs…). NOT for the + // prefilter-style case where each mip has distinct shader-authored + // content — use EXECUTION_MODEL: PER_MIP instead. + bool generate_mips{false}; + + // WIDTH / HEIGHT: explicit target size for offscreen outputs. Set + // by the shader author when the intrinsic size of the algorithm + // isn't tied to the window / swap-chain (IBL precompute, shadow + // atlases, post-process LUTs, …). Zero → fall back to the + // renderer's render-size (classic behaviour). Integer literal or + // string expression; the expression is evaluated once at init + // against the same variable surface as CSF dispatch expressions + // ($WIDTH_ / $HEIGHT_ / scalar input values). + // + // All colour OUTPUTs of a single RAW_RASTER_PIPELINE shader share + // a render pass and must therefore resolve to the same final size; + // the runtime uses the first colour OUTPUT's resolved size as the + // RT size and allocates every attachment at that size. Cubemaps + // are additionally clamped to square via min(w, h) (QRhi contract). + int width{0}; + int height{0}; + std::string width_expression; + std::string height_expression; }; struct descriptor @@ -374,6 +821,91 @@ struct descriptor // Auxiliary SSBOs expected from upstream geometry (matched by name). // Populated from top-level AUXILIARY key in RAW_RASTER_PIPELINE mode. std::vector auxiliary; + + // Auxiliary textures travelling with the geometry (matched by name + // against ossia::geometry::auxiliary_textures). Populated from the same + // top-level AUXILIARY array when entries have TYPE: "image" / "texture" + // / "cubemap". Unlike INPUTS-declared textures they don't consume a + // score input port — the renderer looks them up on the geometry every + // frame. + std::vector auxiliary_textures; + + // PIPELINE_STATE: global pipeline state (depth, blend, cull, stencil, ...). + // Applies to every output pass; may be overridden per-pass via pass::override_state. + pipeline_state default_state; + + // MULTIVIEW: render to N layers of a texture array in a single draw. + // 0 or 1 = disabled. N>=2 = enabled (requires QRhi::MultiView capability). + int multiview_count{0}; + + // EXECUTION_MODEL (RAW_RASTER_PIPELINE only — silently ignored in other + // modes). Drives the invocation count of the single raster pass: + // + // "SINGLE" (default) — one invocation per frame, RT bound at + // mip 0. + // "PER_MIP" — N invocations, RT bound at mip `i` on iteration + // `i`. N is derived from the `target` texture's + // mip chain (floor(log2(min(w, h))) + 1). + // ProcessUBO.passIndex carries the mip index. + // "PER_CUBE_FACE" — 6 invocations, RT bound at cube layer `i` + // (face order +X, -X, +Y, -Y, +Z, -Z). Target + // OUTPUT must be CUBEMAP: true. Mutually + // exclusive with MULTIVIEW (which already + // amplifies one draw to 6 faces). + // "PER_LAYER" — N invocations, RT bound at array layer `i`. N + // comes from the target OUTPUT's `layers` + // declaration. Works on either colour TextureArray + // targets (setLayer attachment) or depth + // TextureArray targets (rendered to a scratch + // and copied into the array layer post-pass — + // QRhi 6.11 has no per-layer depth attachment + // API). ProcessUBO.passIndex carries the layer + // index. Drives shadow_cascades.frag. + // "MANUAL" — N invocations, same RT each time, where N is + // evaluated from the `count` expression string + // via the math_expression parser every frame + // (same variable bindings as CSF's stride / + // image-size expressions: $WIDTH, $HEIGHT, + // $, ...). + struct raster_execution_model + { + std::string type; // "SINGLE" / "PER_MIP" / "PER_CUBE_FACE" / "PER_LAYER" / "MANUAL" + std::string target; // PER_MIP / PER_CUBE_FACE / PER_LAYER: OUTPUT name to iterate + std::string count_expression; // MANUAL: integer-valued expression + }; + raster_execution_model execution_model; + + // User-declared GLSL extension names, emitted as `#extension NAME : require` + // immediately after `#version` in every generated stage. Examples: + // "GL_KHR_shader_subgroup_arithmetic", "GL_EXT_shader_atomic_float". + std::vector extensions{ + "GL_GOOGLE_include_directive", "GL_GOOGLE_cpp_style_line_directive"}; + + // CLIP_DISTANCES: number of gl_ClipDistance[N] outputs the vertex shader + // writes (1..8 typical). When > 0 the parser injects + // `out float gl_ClipDistance[N];` in the vertex stage so user code can + // assign without writing the declaration. Each declared distance enables + // one user-defined clipping plane: fragments where gl_ClipDistance[i] < 0 + // are discarded. + int clip_distances{0}; + + // CULL_DISTANCES: like clip distances but per-primitive: a primitive whose + // every vertex has all gl_CullDistance[i] < 0 is fully culled before + // rasterisation. Useful for cheap frustum-/occlusion-style culling. + int cull_distances{0}; + + // DEPTH_LAYOUT: conservative-depth qualifier on gl_FragDepth. Allowed: + // "any" — driver default (no guarantee, disables early-Z when + // gl_FragDepth is written). + // "greater" — promise the value written is >= the value rasterisation + // would have produced. Lets the HW keep early-Z reject + // for fragments already deeper than the depth buffer. + // "less" — symmetric promise in the other direction. + // "unchanged" — promise the written value equals the rasterised value + // (mostly for documentation; same fast path as "greater" + // on hardware where reverse-Z applies). + // Empty = no qualifier emitted. + std::string depth_layout; }; class SCORE_PLUGIN_GFX_EXPORT parser diff --git a/src/plugins/score-plugin-gfx/CMakeLists.txt b/src/plugins/score-plugin-gfx/CMakeLists.txt index 175758b842..c43a169e60 100644 --- a/src/plugins/score-plugin-gfx/CMakeLists.txt +++ b/src/plugins/score-plugin-gfx/CMakeLists.txt @@ -136,6 +136,8 @@ set(HDRS Gfx/Filter/Library.hpp Gfx/Filter/PreviewWidget.hpp + Gfx/Widgets/RhiPreviewWidget.hpp + Gfx/GeometryFilter/Executor.hpp Gfx/GeometryFilter/Metadata.hpp Gfx/GeometryFilter/Process.hpp @@ -173,11 +175,12 @@ set(HDRS Gfx/Graph/BackgroundNode.hpp Gfx/Graph/CommonUBOs.hpp + Gfx/Graph/PhongNode.hpp Gfx/Graph/CustomMesh.hpp - Gfx/Graph/DepthNode.hpp Gfx/Graph/GeometryFilterNode.hpp Gfx/Graph/GeometryFilterNodeRenderer.hpp Gfx/Graph/RhiComputeBarrier.hpp + Gfx/Graph/RhiClearBuffer.hpp Gfx/Graph/GPUBufferScatter.hpp Gfx/Graph/RenderedCSFNode.hpp Gfx/Graph/Graph.hpp @@ -188,8 +191,18 @@ set(HDRS Gfx/Graph/Node.hpp Gfx/Graph/NodeRenderer.hpp Gfx/Graph/OutputNode.hpp - Gfx/Graph/PhongNode.hpp Gfx/Graph/PreviewNode.hpp + Gfx/Graph/SceneGPUState.hpp + Gfx/Graph/GpuResourceRegistry.hpp + Gfx/Graph/VertexFallbackDefaults.hpp + Gfx/Graph/VertexFallbackPlan.hpp + Gfx/Graph/VertexFallbackPool.hpp + Gfx/Graph/GpuTiming.hpp + Gfx/Graph/ScenePreprocessorNode.hpp + Gfx/Graph/CameraMath.hpp + Gfx/Graph/SceneFilterNode.hpp + Gfx/Graph/FlattenedSceneFilterNode.hpp + Gfx/Graph/MergeGeometriesNode.hpp Gfx/Graph/RenderList.hpp Gfx/Graph/RenderState.hpp Gfx/Graph/RenderedISFNode.hpp @@ -203,6 +216,7 @@ set(HDRS Gfx/Graph/SimpleRenderedISFNode.hpp Gfx/Graph/TexgenNode.hpp Gfx/Graph/TextNode.hpp + Gfx/Graph/TextureLoader.hpp Gfx/Graph/Uniforms.hpp Gfx/Graph/Utils.hpp Gfx/Graph/VideoNode.hpp @@ -268,10 +282,14 @@ set(HDRS Gfx/Settings/View.hpp Gfx/Settings/Factory.hpp + Gfx/AssetTable.hpp + Gfx/FormatRegistry.hpp + Gfx/Hashes.hpp Gfx/Window/BackgroundDevice.hpp Gfx/Window/CollapsibleSection.hpp Gfx/Window/DesktopLayout.hpp Gfx/Window/MultiWindowDevice.hpp + Gfx/Window/OffscreenDevice.hpp Gfx/Window/OutputMapping.hpp Gfx/Window/OutputPreview.hpp Gfx/Window/TestCard.hpp @@ -321,6 +339,8 @@ set(SRCS Gfx/Filter/Process.cpp Gfx/Filter/PreviewWidget.cpp + Gfx/Widgets/RhiPreviewWidget.cpp + Gfx/GeometryFilter/Executor.cpp Gfx/GeometryFilter/Process.cpp Gfx/GeometryFilter/Library.cpp @@ -353,9 +373,11 @@ set(SRCS Gfx/Graph/decoders/HAP.cpp Gfx/Graph/BackgroundNode.cpp Gfx/Graph/CustomMesh.cpp + Gfx/Graph/PhongNode.cpp Gfx/Graph/GeometryFilterNode.cpp Gfx/Graph/GeometryFilterNodeRenderer.cpp Gfx/Graph/RhiComputeBarrier.cpp + Gfx/Graph/RhiClearBuffer.cpp Gfx/Graph/GPUBufferScatter.cpp Gfx/Graph/RenderedCSFNode.cpp Gfx/Graph/Graph.cpp @@ -366,16 +388,30 @@ set(SRCS Gfx/Graph/Node.cpp Gfx/Graph/NodeRenderer.cpp Gfx/Graph/OutputNode.cpp - Gfx/Graph/PhongNode.cpp Gfx/Graph/PreviewNode.cpp + Gfx/Graph/SceneGPUState.cpp + Gfx/Graph/GpuResourceRegistry.cpp + Gfx/Graph/VertexFallbackDefaults.cpp + Gfx/Graph/VertexFallbackPool.cpp + Gfx/Graph/GpuTiming.cpp + Gfx/Graph/ScenePreprocessorNode.cpp + Gfx/Graph/CameraMath.cpp + Gfx/Graph/SceneFilterNode.cpp + Gfx/Graph/FlattenedSceneFilterNode.cpp + Gfx/Graph/MergeGeometriesNode.cpp Gfx/Graph/RenderList.cpp Gfx/Graph/RenderedISFNode.cpp Gfx/Graph/RenderedRawRasterPipelineNode.cpp Gfx/Graph/RenderedVSANode.cpp + Gfx/Graph/PipelineStateHelpers.hpp + Gfx/Graph/PipelineStateHelpers.cpp + Gfx/Graph/IsfBindingsBuilder.hpp + Gfx/Graph/IsfBindingsBuilder.cpp Gfx/Graph/ScreenNode.cpp Gfx/Graph/ShaderCache.cpp Gfx/Graph/SimpleRenderedISFNode.cpp Gfx/Graph/TextNode.cpp + Gfx/Graph/TextureLoader.cpp Gfx/Graph/Utils.cpp Gfx/Graph/VideoNode.cpp Gfx/Graph/VideoNodeRenderer.cpp @@ -383,6 +419,8 @@ set(SRCS Gfx/Graph/DirectVideoNodeRenderer.cpp Gfx/Graph/Window.cpp + Gfx/AssetTable.cpp + Gfx/FormatRegistry.cpp Gfx/GfxApplicationPlugin.cpp Gfx/GfxExecNode.cpp Gfx/GfxExecutionAction.cpp @@ -429,13 +467,17 @@ set_source_files_properties( "${3RDPARTY_FOLDER}/glsl-parser/glsl.parser.c" "${3RDPARTY_FOLDER}/glsl-parser/glsl.lexer.c" "${3RDPARTY_FOLDER}/dxv/dxv.c" + "${3RDPARTY_FOLDER}/OffsetAllocator/offsetAllocator.cpp" PROPERTIES SKIP_PRECOMPILE_HEADERS ON SKIP_UNITY_BUILD_INCLUSION ON ) # Creation of the library -add_library(${PROJECT_NAME} ${SRCS} ${HDRS}) +add_library(${PROJECT_NAME} ${SRCS} ${HDRS} + "${3RDPARTY_FOLDER}/OffsetAllocator/offsetAllocator.cpp" + "${3RDPARTY_FOLDER}/OffsetAllocator/offsetAllocator.hpp" +) # Code generation score_generate_command_list_file(${PROJECT_NAME} "${HDRS}") @@ -443,6 +485,7 @@ score_generate_command_list_file(${PROJECT_NAME} "${HDRS}") target_include_directories(${PROJECT_NAME} PUBLIC 3rdparty/libisf/src + "${3RDPARTY_FOLDER}/OffsetAllocator" PRIVATE "${3RDPARTY_FOLDER}/dxv" ) @@ -582,11 +625,13 @@ elseif(APPLE) target_sources(${PROJECT_NAME} PRIVATE Gfx/CameraDevice.avf.mm Gfx/Graph/RhiBufferCopyMetal.mm + Gfx/Graph/RhiClearBufferMetal.mm ) set_source_files_properties( Gfx/CameraDevice.avf.mm Gfx/Graph/RhiBufferCopyMetal.mm + Gfx/Graph/RhiClearBufferMetal.mm PROPERTIES SKIP_UNITY_BUILD_INCLUSION 1 ) diff --git a/src/plugins/score-plugin-gfx/Gfx/AssetTable.cpp b/src/plugins/score-plugin-gfx/Gfx/AssetTable.cpp new file mode 100644 index 0000000000..12d3a65b78 --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/AssetTable.cpp @@ -0,0 +1,187 @@ +#include + +#include +#include + +namespace Gfx +{ + +namespace +{ +std::size_t estimateSize(const AssetTable::DecodedAsset& a) noexcept +{ + std::size_t total = 0; + if(!a.image.isNull()) + total += static_cast(a.image.sizeInBytes()); + if(a.bytes) + total += a.bytes->size(); + return total; +} +} + +void AssetTable::stage(uint64_t content_hash, QImage image) +{ + std::lock_guard lock{m_mutex}; + auto it = m_entries.find(content_hash); + if(it != m_entries.end()) + return; // Hash contract: same hash = same bytes. Idempotent stage. + + auto e = std::make_shared(); + e->image = std::move(image); + e->byte_size = estimateSize(*e); + m_total_bytes += e->byte_size; + + Slot s; + s.asset = std::move(e); + m_entries.emplace(content_hash, std::move(s)); +} + +void AssetTable::stage( + uint64_t content_hash, + std::shared_ptr> bytes, + std::string mime_type) +{ + std::lock_guard lock{m_mutex}; + auto it = m_entries.find(content_hash); + if(it != m_entries.end()) + return; + + auto e = std::make_shared(); + e->bytes = std::move(bytes); + e->mime_type = std::move(mime_type); + e->byte_size = estimateSize(*e); + m_total_bytes += e->byte_size; + + Slot s; + s.asset = std::move(e); + m_entries.emplace(content_hash, std::move(s)); +} + +std::shared_ptr +AssetTable::acquire(uint64_t content_hash) +{ + std::lock_guard lock{m_mutex}; + auto it = m_entries.find(content_hash); + if(it == m_entries.end()) + return {}; + auto& slot = it->second; + + // Resurrect from LRU if cold. + if(slot.in_lru) + { + m_lru.erase(slot.lru_it); + slot.in_lru = false; + m_cold_bytes -= slot.asset->byte_size; + } + + ++slot.asset->refcount; + return slot.asset; +} + +std::shared_ptr +AssetTable::peek(uint64_t content_hash) const +{ + std::lock_guard lock{m_mutex}; + auto it = m_entries.find(content_hash); + if(it == m_entries.end()) + return {}; + // Intentionally does NOT move out of LRU nor bump refcount — the + // caller just wants a read-through. If the entry is cold it stays + // cold (still evictable next trim). shared_ptr semantics keep the + // DecodedAsset alive as long as the caller holds the returned ptr, + // even if eviction happens concurrently on another thread. + return it->second.asset; +} + +void AssetTable::release(uint64_t content_hash) +{ + std::lock_guard lock{m_mutex}; + auto it = m_entries.find(content_hash); + if(it == m_entries.end()) + return; + auto& slot = it->second; + if(slot.asset->refcount > 0) + --slot.asset->refcount; + if(slot.asset->refcount == 0 && !slot.in_lru) + { + // Newest-first: push_front, tail is oldest. trim() pops from tail. + m_lru.push_front(content_hash); + slot.lru_it = m_lru.begin(); + slot.in_lru = true; + m_cold_bytes += slot.asset->byte_size; + } +} + +void AssetTable::evictOne() noexcept +{ + // Caller holds m_mutex. + if(m_lru.empty()) + return; + const uint64_t hash = m_lru.back(); + m_lru.pop_back(); + + auto it = m_entries.find(hash); + if(it == m_entries.end()) + return; + + const std::size_t sz = it->second.asset->byte_size; + m_total_bytes -= sz; + m_cold_bytes -= sz; + m_entries.erase(it); +} + +std::size_t AssetTable::trim(std::size_t max_bytes_budget) +{ + std::lock_guard lock{m_mutex}; + std::size_t evicted = 0; + // Only evict from cold pool — hot entries stay regardless of budget. + while(m_cold_bytes > max_bytes_budget && !m_lru.empty()) + { + const std::size_t before_total = m_total_bytes; + evictOne(); + evicted += (before_total - m_total_bytes); + } + return evicted; +} + +void AssetTable::maybeAutoTrim( + float utilization, float high_watermark, float target) +{ + if(utilization < high_watermark) + return; + + std::lock_guard lock{m_mutex}; + if(m_cold_bytes == 0) + return; + + // Convert target utilization to a cold-pool budget. Heuristic: + // scale the current cold pool by (target / utilization). At + // util=0.85, target=0.60 → trim to ~70% of current cold total. + // Not a proper memory-pressure solver — a low-cost knob that + // kicks in on sustained overload. + const float scale = target / utilization; + const auto budget + = static_cast(static_cast(m_cold_bytes) * scale); + while(m_cold_bytes > budget && !m_lru.empty()) + evictOne(); +} + +std::size_t AssetTable::size() const noexcept +{ + std::lock_guard lock{m_mutex}; + return m_entries.size(); +} + +std::size_t AssetTable::totalBytes() const noexcept +{ + std::lock_guard lock{m_mutex}; + return m_total_bytes; +} + +std::size_t AssetTable::coldCount() const noexcept +{ + std::lock_guard lock{m_mutex}; + return m_lru.size(); +} + +} // namespace Gfx diff --git a/src/plugins/score-plugin-gfx/Gfx/AssetTable.hpp b/src/plugins/score-plugin-gfx/Gfx/AssetTable.hpp new file mode 100644 index 0000000000..5b91d7fb98 --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/AssetTable.hpp @@ -0,0 +1,169 @@ +#pragma once + +#include + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace Gfx +{ + +/** + * @brief Cross-RenderList content-hash dedup for decoded asset bytes. + * + * Lives on GfxContext, shared across all RenderLists in the session. + * Keyed by `content_hash` (64-bit stable hash of the source bytes — + * the canonical primitive is `ossia::hash_bytes` from + * `ossia/detail/hash.hpp`, which dispatches to rapidhash; parsers and + * the preprocessor produce content_hash values through that helper). + * + * Purpose: one decode per asset across the whole session. When two + * glTF files reference the same `baseColor.jpg`, we decode it once + * and reuse. Per-RenderList GpuResourceRegistries upload from the + * cached QImage independently (Plan 09 §4.2: one decode, N uploads). + * + * Not the GPU-resource owner — GpuResourceRegistry does that per + * QRhi. AssetTable only holds CPU-side bytes + format metadata + * during the window between decode and eviction. + * + * # Lifecycle (Plan 09 S1) + * + * Three states per entry: + * + * - **hot** (refcount > 0): actively held by at least one consumer. + * Never evicted. + * - **cool** (refcount == 0, still referenced in the LRU list): + * eviction candidate. `acquire()` resurrects it at zero cost. + * - **evicted**: dropped from the map. Next `acquire()` misses; + * the caller re-decodes and restage()s. + * + * Transitions: + * - `stage()` inserts into hot map (or no-op if already present). + * - `acquire()` bumps refcount and (if resurrecting) splices out + * of the LRU list. + * - `release()` decrements; at 0 the entry moves to the LRU head. + * - `trim(max_bytes)` pops from the LRU tail until under budget. + * - `maybeAutoTrim()` called periodically: reads a supplied + * utilization ratio and trims when above a threshold. + * + * Byte accounting is approximate — `sizeInBytes(DecodedAsset)` hits + * QImage::sizeInBytes and the raw bytes vector size. Good enough + * for budget bookkeeping without a full allocator hook. + * + * # Thread safety + * + * All public methods take `m_mutex`. Fine for the access pattern + * (parser worker threads stage, render threads acquire/release, + * GUI tick trims) — the mutex is held for microseconds at a time. + */ +class SCORE_PLUGIN_GFX_EXPORT AssetTable +{ +public: + /// Decoded image or raw byte payload. `image` is preferred for 2D + /// textures (carries QImage's format metadata); `bytes` for generic + /// buffer assets (vertex/index streams etc.). + struct DecodedAsset + { + QImage image; + std::shared_ptr> bytes; + std::string mime_type; + int64_t refcount{0}; + // Approximate storage cost. Computed at stage() time; the + // allocator may report a different value but this is the number + // the LRU trim budgets against. + std::size_t byte_size{0}; + }; + + // For byte-range hashing use `ossia::hash_bytes` from + // `ossia/detail/hash.hpp` — it's the canonical rapidhash-tiered + // dispatcher that produces stable `content_hash` values across + // the codebase. Parsers call it directly when stamping + // `texture_source::content_hash` / `buffer_resource::content_hash`. + + AssetTable() = default; + AssetTable(const AssetTable&) = delete; + AssetTable& operator=(const AssetTable&) = delete; + ~AssetTable() = default; + + /// Publish a decoded asset under its content hash. Idempotent — + /// a second stage() with the same hash is a no-op (hash contract: + /// same hash = same bytes). + void stage(uint64_t content_hash, QImage image); + void stage( + uint64_t content_hash, std::shared_ptr> bytes, + std::string mime_type = {}); + + /// Return a shared pointer to the decoded asset, bumping its + /// refcount. Null when not staged. O(1) average. + std::shared_ptr acquire(uint64_t content_hash); + + /// Read-through without refcount bump. The returned shared_ptr + /// keeps the DecodedAsset alive on the caller's side even if the + /// AssetTable evicts the entry — but does NOT prevent eviction. + /// Suitable for the "upload once to GPU, then done" path where + /// the consumer doesn't care if the CPU-side bytes live on. + std::shared_ptr peek(uint64_t content_hash) const; + + /// Decrement refcount. At 0 the entry moves to the LRU head and + /// is eligible for eviction on the next trim. + void release(uint64_t content_hash); + + /// Force eviction until the cold-pool byte total is below + /// @p max_bytes. Called explicitly by UI ("unload unused") or + /// implicitly by maybeAutoTrim. + /// @return bytes evicted. + std::size_t trim(std::size_t max_bytes_budget); + + /// Called on a cadence (e.g. from the Gfx thread idle tick) to + /// pressure-trim when the supplied utilization ratio exceeds + /// @p high_watermark. Cost: O(n) in the LRU list when a trim + /// fires; constant otherwise. + /// + /// @p utilization in [0, 1]. Compute externally from + /// QRhiStats::usedBytes / (usedBytes + unusedBytes), or from a + /// hard OS-level memory query. + /// @p high_watermark default 0.80. @p target default 0.60. + void maybeAutoTrim( + float utilization, float high_watermark = 0.80f, + float target = 0.60f); + + /// Debug / inspector. + std::size_t size() const noexcept; + /// Approx total bytes held in cold pool + hot pool. + std::size_t totalBytes() const noexcept; + /// Number of cold entries eligible for eviction. + std::size_t coldCount() const noexcept; + +private: + struct Slot; // forward + + // Linked list of cold entries, newest at head. std::list for + // stable iterators under concurrent erase. + using LruList = std::list; + + struct Slot + { + std::shared_ptr asset; + LruList::iterator lru_it; // valid only when refcount == 0 + bool in_lru{false}; + }; + + void evictOne() noexcept; // Pops the LRU tail. Caller holds m_mutex. + + mutable std::mutex m_mutex; + ossia::hash_map m_entries; + LruList m_lru; // cold entries, newest at front + std::size_t m_total_bytes{0}; + std::size_t m_cold_bytes{0}; +}; + +} // namespace Gfx diff --git a/src/plugins/score-plugin-gfx/Gfx/CSF/Library.cpp b/src/plugins/score-plugin-gfx/Gfx/CSF/Library.cpp index de27d2090b..3ee61ae569 100644 --- a/src/plugins/score-plugin-gfx/Gfx/CSF/Library.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/CSF/Library.cpp @@ -13,7 +13,7 @@ namespace Gfx::CSF QSet LibraryHandler::acceptedFiles() const noexcept { - return {"cs", "comp"}; + return {"cs", "comp", "csf"}; } void LibraryHandler::setup( @@ -62,7 +62,7 @@ QWidget* LibraryHandler::previewWidget( QSet DropHandler::fileExtensions() const noexcept { - return {"cs", "comp"}; + return {"cs", "comp", "csf"}; } void DropHandler::dropPath( diff --git a/src/plugins/score-plugin-gfx/Gfx/CSF/Process.cpp b/src/plugins/score-plugin-gfx/Gfx/CSF/Process.cpp index 95d1b063d3..20de4e309b 100644 --- a/src/plugins/score-plugin-gfx/Gfx/CSF/Process.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/CSF/Process.cpp @@ -6,10 +6,13 @@ #include #include +#include #include #include +#include #include +#include #include @@ -78,7 +81,10 @@ Model::Model( QFile f{init}; if(f.open(QIODevice::ReadOnly)) + { + m_scriptPath = init; (void)setCompute(f.readAll()); + } } Model::~Model() { } @@ -87,8 +93,18 @@ bool Model::validate(const QString& txt) const noexcept { try { + // Expand #include directives against the model's origin dir + the + // global search paths before handing the source to the ISF parser. + auto [resolved, err] + = Gfx::preprocessShaderIncludes(txt.toUtf8(), m_scriptPath); + if(!err.isEmpty()) + { + this->errorMessage(0, err); + return false; + } + // Parse the CSF shader to extract metadata - std::string str = txt.toStdString(); + std::string str(resolved.constData(), resolved.size()); isf::parser p{str, isf::parser::ShaderType::CSF}; // Check if it's a valid CSF shader @@ -144,15 +160,25 @@ Process::ScriptChangeResult Model::setScript(const QString& f) { m_compute = f; - QString processed = m_compute; - auto inls = score::clearAndDeleteLater(m_inlets); auto outls = score::clearAndDeleteLater(m_outlets); try { + // Expand #include directives against the model's origin dir before + // feeding the source to the ISF parser. + auto [resolved, err] + = Gfx::preprocessShaderIncludes(m_compute.toUtf8(), m_scriptPath); + if(!err.isEmpty()) + { + this->errorMessage(0, err); + return {.valid = false, .inlets = std::move(inls), .outlets = std::move(outls)}; + } + // Parse CSF shader - isf::parser p{processed.toStdString(), isf::parser::ShaderType::CSF}; + isf::parser p{ + std::string(resolved.constData(), resolved.size()), + isf::parser::ShaderType::CSF}; m_processedProgram.descriptor = p.data(); m_processedProgram.fragment = QString::fromStdString(p.compute_shader()); m_processedProgram.type = isf::parser::ShaderType::CSF; @@ -310,8 +336,19 @@ void Model::setupCSF(const isf::descriptor& desc) alternatives.emplace_back("2", 2); } + // ComboBox::init is a VALUE that should match one of the alternatives' + // values — NOT an index. libisf stores `v.def` as the INDEX into + // values (see isf.hpp comment on long_input::def). Passing the raw + // index made the ComboBox fail to match any alternative and silently + // default to alternatives[0], which is why DEFAULT: 32 in + // VALUES: [16, 32, 64] showed up as 16 in the UI. Look up the + // alternative at v.def and pass its second (the value). + const std::size_t def_idx + = std::min(v.def, alternatives.size() - 1); + const ossia::value& init_value = alternatives[def_idx].second; + auto port = new Process::ComboBox( - std::move(alternatives), (int)v.def, QString::fromStdString(input.name), + std::move(alternatives), init_value, QString::fromStdString(input.name), Id(input_i++), &self); self.m_inlets.push_back(port); @@ -448,18 +485,34 @@ void Model::setupCSF(const isf::descriptor& desc) QString::fromStdString(input.name), Id(output_i++), &self); self.m_outlets.push_back(port); - auto size_inl = new Process::IntSpinBox{ - 1, - 536870911, - 1024, - QString::fromStdString(input.name) + " size", - Id(input_i++), - &self}; - self.m_inlets.push_back(size_inl); - self.controlAdded(size_inl->id()); + // Only writable buffers whose layout ends in a flexible-array member + // get a synthesized "size" inlet — this MUST match the renderer + // (isf_input_port_count_vis / isf_input_port_vis) and the generated + // GLSL, or every later control routes to the wrong port. + if(!v.layout.empty() + && v.layout.back().type.find("[]") != std::string::npos) + { + auto size_inl = new Process::IntSpinBox{ + 1, + 536870911, + 1024, + QString::fromStdString(input.name) + " size", + Id(input_i++), + &self}; + self.m_inlets.push_back(size_inl); + self.controlAdded(size_inl->id()); + } } } + void operator()(const uniform_input& v) + { + // UBO inputs sourced from upstream Buffer ports (read-only). + auto port = new Gfx::TextureInlet( + QString::fromStdString(input.name), Id(input_i++), &self); + self.m_inlets.push_back(port); + } + void operator()(const texture_input& v) { auto port = new Gfx::TextureInlet( @@ -606,7 +659,17 @@ Process::Descriptor ProcessFactory::descriptor(QString) const noexcept template <> void DataStreamReader::read(const Gfx::CSF::Model& proc) { - m_stream << proc.m_compute; + // documentContext() SCORE_ASSERTs when the model isn't in a document + // (e.g. saving a template / copy). Only relativize against the document + // when there's an actual script path to relativize — mirrors the + // JSON/load guards. The empty case writes an empty path verbatim. + QString relativeScriptPath; + if(!proc.m_scriptPath.isEmpty()) + { + auto& ctx = score::IDocument::documentContext(proc); + relativeScriptPath = score::relativizeFilePath(proc.m_scriptPath, ctx); + } + m_stream << proc.m_compute << relativeScriptPath; readPorts(*this, proc.m_inlets, proc.m_outlets); insertDelimiter(); @@ -616,7 +679,12 @@ template <> void DataStreamWriter::write(Gfx::CSF::Model& proc) { QString s; - m_stream >> s; + m_stream >> s >> proc.m_scriptPath; + if(!proc.m_scriptPath.isEmpty()) + { + auto& ctx = score::IDocument::documentContext(proc); + proc.m_scriptPath = score::locateFilePath(proc.m_scriptPath, ctx); + } (void)proc.setScript(s); writePorts( *this, components.interfaces(), proc.m_inlets, @@ -629,6 +697,11 @@ template <> void JSONReader::read(const Gfx::CSF::Model& proc) { obj["Compute"] = proc.script(); + if(!proc.m_scriptPath.isEmpty()) + { + auto& ctx = score::IDocument::documentContext(proc); + obj["Root"] = score::relativizeFilePath(proc.m_scriptPath, ctx); + } readPorts(*this, proc.m_inlets, proc.m_outlets); } @@ -636,6 +709,15 @@ template <> void JSONWriter::write(Gfx::CSF::Model& proc) { QString s = obj["Compute"].toString(); + if(auto r = obj.tryGet("Root")) + { + proc.m_scriptPath <<= *r; + if(!proc.m_scriptPath.isEmpty()) + { + auto& ctx = score::IDocument::documentContext(proc); + proc.m_scriptPath = score::locateFilePath(proc.m_scriptPath, ctx); + } + } (void)proc.setScript(s); writePorts( *this, components.interfaces(), proc.m_inlets, diff --git a/src/plugins/score-plugin-gfx/Gfx/CSF/Process.hpp b/src/plugins/score-plugin-gfx/Gfx/CSF/Process.hpp index a0c6580885..1ac120ddf7 100644 --- a/src/plugins/score-plugin-gfx/Gfx/CSF/Process.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/CSF/Process.hpp @@ -75,6 +75,11 @@ class Model final : public Process::ProcessModel void errorMessage(int line, const QString& err) const W_SIGNAL(errorMessage, line, err); + // Absolute path of the shader file this model was loaded from. Used as + // the base for quoted #include resolution in ProgramCache::get. Empty + // when the shader source is in-memory. Mirrors JS::ProcessModel::m_root. + QString rootPath() const noexcept { return m_scriptPath; } + private: void loadPreset(const Process::Preset& preset) override; Process::Preset savePreset() const noexcept override; @@ -84,6 +89,7 @@ class Model final : public Process::ProcessModel QString m_compute; ProcessedProgram m_processedProgram; + QString m_scriptPath; }; struct ProcessFactory final : Process::ProcessFactory_T diff --git a/src/plugins/score-plugin-gfx/Gfx/CameraDevice.win32.cpp b/src/plugins/score-plugin-gfx/Gfx/CameraDevice.win32.cpp index 725a13bf26..6f53be1ce5 100644 --- a/src/plugins/score-plugin-gfx/Gfx/CameraDevice.win32.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/CameraDevice.win32.cpp @@ -8,14 +8,19 @@ extern "C" { #include } -// ! +// clang-format off +// Order-sensitive — do NOT let clang-format sort these: +// - must precede / so the DirectShow GUIDs get +// a real definition (not just an extern declaration); +// - the Windows system headers must come before /. #include -// ! Needs to be present before, to ensure uuids get enumerated +#include #include #include -#include #include +#include +// clang-format on namespace Gfx { diff --git a/src/plugins/score-plugin-gfx/Gfx/Filter/Library.cpp b/src/plugins/score-plugin-gfx/Gfx/Filter/Library.cpp index bd1a1f1185..c733244f04 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Filter/Library.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Filter/Library.cpp @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -163,7 +164,7 @@ void DropHandler::dropPath( void DropHandler::dropCustom( std::vector& vec, const QMimeData& mime, - const score::DocumentContext& ctx) const noexcept + const score::DocumentContext& ctx) const { // FIXME handle multipass / multibuffer for(const auto& uri : mime.urls()) @@ -186,28 +187,40 @@ void DropHandler::dropCustom( { continue; } - isf::parser parser("", shader_json, 450, isf::parser::ShaderType::ShaderToy); - auto isf = parser.write_isf(); - auto spec = parser.data(); - if(isf.empty()) + // The ISF parser throws invalid_file on malformed Shadertoy + // JSON (empty body, non-JSON response, missing fields, parse- + // time validation failures like non-numeric LOCATION). Catch + // per URL so one bad URL doesn't abort the whole drop batch. + try + { + isf::parser parser("", shader_json, 450, isf::parser::ShaderType::ShaderToy); + auto isf = parser.write_isf(); + auto spec = parser.data(); + if(isf.empty()) + { + continue; + } + // For immediate feedback, add a placeholder + Process::ProcessDropHandler::ProcessDrop p; + p.creation.key = Metadata::get(); + p.creation.prettyName = "Shadertoy " + shaderId; + p.setup = [isf](Process::ProcessModel& p, score::Dispatcher& d) { + auto& filter = (Gfx::Filter::Model&)p; + Gfx::ShaderSource source; + source.vertex = ""; + source.fragment = QString::fromStdString(isf); + auto cmd = new Gfx::ChangeShader{ + filter, source, score::IDocument::documentContext(p)}; + d.submit(cmd); + }; + + vec.push_back(std::move(p)); + } + catch(const std::exception& e) { + qWarning() << "Shadertoy drop failed for" << shaderId << ":" << e.what(); continue; } - // For immediate feedback, add a placeholder - Process::ProcessDropHandler::ProcessDrop p; - p.creation.key = Metadata::get(); - p.creation.prettyName = "Shadertoy " + shaderId; - p.setup = [isf](Process::ProcessModel& p, score::Dispatcher& d) { - auto& filter = (Gfx::Filter::Model&)p; - Gfx::ShaderSource source; - source.vertex = ""; - source.fragment = QString::fromStdString(isf); - auto cmd = new Gfx::ChangeShader{ - filter, source, score::IDocument::documentContext(p)}; - d.submit(cmd); - }; - - vec.push_back(std::move(p)); } } } diff --git a/src/plugins/score-plugin-gfx/Gfx/Filter/Library.hpp b/src/plugins/score-plugin-gfx/Gfx/Filter/Library.hpp index 53a41e75d7..152e3f8aba 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Filter/Library.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Filter/Library.hpp @@ -44,7 +44,7 @@ class DropHandler final : public Process::ProcessDropHandler void dropCustom( std::vector& drops, const QMimeData& mime, - const score::DocumentContext& ctx) const noexcept override; + const score::DocumentContext& ctx) const override; }; struct VideoTextureDropHandler : public Process::ProcessDropHandler diff --git a/src/plugins/score-plugin-gfx/Gfx/Filter/PreviewWidget.cpp b/src/plugins/score-plugin-gfx/Gfx/Filter/PreviewWidget.cpp index 7175e95344..93df20d895 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Filter/PreviewWidget.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Filter/PreviewWidget.cpp @@ -1,12 +1,15 @@ #include #include +#include #include -#include +#include #include +#include #include +#include #include #include @@ -137,7 +140,8 @@ struct PreviewInputVisitor // CSF-specific input handlers score::gfx::NodeModel* operator()(const isf::storage_input& v) { return nullptr; } - + score::gfx::NodeModel* operator()(const isf::uniform_input& v) { return nullptr; } + score::gfx::NodeModel* operator()(const isf::texture_input& v) { static std::array images{ @@ -175,61 +179,80 @@ struct PreviewPresetVisitor { score::gfx::ISFNode& node; ossia::flat_map& controls; + // Descriptor-input index: matches both the saved preset control keys + // (model inlet id == desc.inputs index, see setupISFModelPorts) and the + // controls flat_map key. int i{}; + // Render-port index: index into node.input[], advanced via + // walk_descriptor_inputs (an input may create 0 or 2 ports, so this + // drifts from the descriptor index). + int port{}; + + // Guarded material pointer for the current render port: nullptr if the + // port index is out of range or the port carries no material storage. + float* portValue() const noexcept + { + if(port < 0 || port >= (int)node.input.size()) + return nullptr; + return reinterpret_cast(node.input[port]->value); + // NB: for scalar/vector inputs value always points into the material + // UBO blob; image/audio inputs never reach this (their visitors no-op). + } + void operator()(const isf::float_input& v) { - if(float* v = controls[i].target()) - { - (*(float*)node.input[i]->value) = *v; - } + if(float* dst = portValue(); dst) + if(float* val = controls[i].target()) + *dst = *val; } void operator()(const isf::long_input& v) { - if(int* v = controls[i].target()) - { - (*(int*)node.input[i]->value) = *v; - } + if(float* dst = portValue(); dst) + if(int* val = controls[i].target()) + *reinterpret_cast(dst) = *val; } void operator()(const isf::event_input& v) { } void operator()(const isf::bool_input& v) { - if(bool* v = controls[i].target()) - { - (*(int*)node.input[i]->value) = *v ? 1 : 0; - } + if(float* dst = portValue(); dst) + if(bool* val = controls[i].target()) + *reinterpret_cast(dst) = *val ? 1 : 0; } void operator()(const isf::point2d_input& v) { - if(ossia::vec2f* v = controls[i].target()) - { - (*(float*)node.input[i]->value) = (*v)[0]; - (*((float*)node.input[i]->value + 1)) = (*v)[1]; - } + if(float* dst = portValue(); dst) + if(ossia::vec2f* val = controls[i].target()) + { + dst[0] = (*val)[0]; + dst[1] = (*val)[1]; + } } void operator()(const isf::point3d_input& v) { - if(ossia::vec3f* v = controls[i].target()) - { - (*(float*)node.input[i]->value) = (*v)[0]; - (*((float*)node.input[i]->value + 1)) = (*v)[1]; - (*((float*)node.input[i]->value + 2)) = (*v)[2]; - } + if(float* dst = portValue(); dst) + if(ossia::vec3f* val = controls[i].target()) + { + dst[0] = (*val)[0]; + dst[1] = (*val)[1]; + dst[2] = (*val)[2]; + } } void operator()(const isf::color_input& v) { - if(ossia::vec4f* v = controls[i].target()) - { - (*(float*)node.input[i]->value) = (*v)[0]; - (*((float*)node.input[i]->value + 1)) = (*v)[1]; - (*((float*)node.input[i]->value + 2)) = (*v)[2]; - (*((float*)node.input[i]->value + 3)) = (*v)[3]; - } + if(float* dst = portValue(); dst) + if(ossia::vec4f* val = controls[i].target()) + { + dst[0] = (*val)[0]; + dst[1] = (*val)[1]; + dst[2] = (*val)[2]; + dst[3] = (*val)[3]; + } } void operator()(const isf::image_input& v) { } @@ -244,6 +267,7 @@ struct PreviewPresetVisitor // CSF-specific input handlers void operator()(const isf::storage_input& v) { } + void operator()(const isf::uniform_input& v) { } void operator()(const isf::texture_input& v) { } @@ -256,18 +280,17 @@ struct PreviewPresetVisitor ShaderPreviewManager* g_shaderPreview{}; bool g_shaderPreviewScheduledForDeletion{}; -// Creating and destroying QRhi is fairly expensive, so -// we keep one around when we are showing ISF previews +// Holds the source ISF + image nodes shared across hover previews. +// The output side is owned by individual ShaderPreviewWidget / +// RhiPreviewWidget instances: each contributes a score::gfx::PreviewNode +// targeting its own QRhiWidget render target. Multiple previews can be +// attached at once (e.g. library hover + live texture-port preview). class ShaderPreviewManager : public QObject { public: ShaderPreviewManager() : QObject{qApp} { - score::gfx::OutputNode::Configuration conf{}; - m_screen = std::make_unique(conf, true); - m_graph.addNode(m_screen.get()); - connect(qApp, &QCoreApplication::aboutToQuit, this, [] { delete g_shaderPreview; g_shaderPreviewScheduledForDeletion = false; @@ -288,7 +311,8 @@ class ShaderPreviewManager : public QObject if(path.contains(".vs") || path.contains(".vert")) program = programFromVSAVertexShaderPath(path, {}); - if(const auto& [processed, error] = ProgramCache::instance().get(program); + if(const auto& [processed, error] + = ProgramCache::instance().get(program, path); bool(processed)) { m_program = *processed; @@ -311,6 +335,8 @@ class ShaderPreviewManager : public QObject auto vert = obj["Vertex"].GetString(); ShaderSource program{type, vert, frag}; + // Preset-loaded source has no origin file; includes resolve against + // global search paths only. if(const auto& [processed, error] = ProgramCache::instance().get(program); bool(processed)) { @@ -324,21 +350,49 @@ class ShaderPreviewManager : public QObject controls[arr[0].GetInt()] = JsonValue{arr[1]}.to(); } + // controls is keyed by descriptor-input index (== model inlet id); + // node.input[] is keyed by render-port index. walk_descriptor_inputs + // gives the render-port index (cur.inlets) for each descriptor entry, + // which drifts from the descriptor index for 0-/2-port inputs. int i = 0; - for(const isf::input& input : m_program.descriptor.inputs) - { - ossia::visit(PreviewPresetVisitor{*m_isf, controls, i}, input.data); - i++; - } + score::gfx::walk_descriptor_inputs( + m_program.descriptor, + [&](const isf::input& input, const score::gfx::port_counts& cur, + const score::gfx::port_counts&) { + ossia::visit( + PreviewPresetVisitor{*m_isf, controls, i, cur.inlets}, + input.data); + i++; + }); } } } - std::shared_ptr getWindow() + score::gfx::Graph& graph() noexcept { return m_graph; } + + // True while at least one preview widget is still attached to the shared + // graph. The deferred manager deletion must NOT fire while this holds, or + // a surviving widget's RhiPreviewWidget::m_graph would dangle (UAF on its + // detach()). + bool hasPreviews() const noexcept { return !m_previews.empty(); } + + void attachPreview(score::gfx::BackgroundNode& node) + { + m_previews.push_back(&node); + if(m_isf) + { + m_graph.addEdge( + m_isf->output[0], node.input[0], Process::CableType::ImmediateGlutton); + const auto& settings = score::AppContext().settings(); + m_graph.createAllRenderLists(settings.graphicsApiEnum()); + } + } + + void detachPreview(score::gfx::BackgroundNode& node) { - if(m_screen && m_screen.get()) - return m_screen.get()->window(); - return {}; + ossia::remove_erase(m_previews, &node); + if(m_isf) + m_graph.removeEdge(m_isf->output[0], node.input[0]); } std::vector> m_previewEdges; @@ -346,7 +400,7 @@ class ShaderPreviewManager : public QObject void setup() { const auto& settings = score::AppContext().settings(); - // Create our graph + // Tear down the previous set of source nodes. for(auto [a, b] : m_previewEdges) m_graph.removeEdge(a, b); m_previewEdges.clear(); @@ -359,48 +413,63 @@ class ShaderPreviewManager : public QObject if(m_isf) { - m_graph.removeEdge(m_isf->output[0], m_screen->input[0]); + for(auto* p : m_previews) + m_graph.removeEdge(m_isf->output[0], p->input[0]); m_graph.removeNode(m_isf.get()); } - m_graph.removeNode(m_screen.get()); - // Clear the graph, renderers etc. m_graph.createAllRenderLists(settings.graphicsApiEnum()); m_isf.reset(); m_textures.clear(); - // Recreate what we need - m_graph.addNode(m_screen.get()); - // FIXME add an error image if the shader did not parse m_isf = std::make_unique( m_program.descriptor, m_program.vertex, m_program.fragment); m_graph.addNode(m_isf.get()); - // Edge from filter to output - m_graph.addEdge( - m_isf->output[0], m_screen->input[0], Process::CableType::ImmediateGlutton); - // Edges from image nodes to image inputs - int image_i = 0; - int i = 0; - for(const isf::input& input : m_program.descriptor.inputs) - { - auto node = ossia::visit(PreviewInputVisitor{image_i}, input.data); - if(node) - { - m_graph.addNode(node); + // Wire ISF output to every currently-attached preview. + for(auto* p : m_previews) + m_graph.addEdge( + m_isf->output[0], p->input[0], Process::CableType::ImmediateGlutton); - m_graph.addEdge( - node->output[0], m_isf->input[i], Process::CableType::ImmediateGlutton); - m_previewEdges.emplace_back(node->output[0], m_isf->input[i]); - - m_textures.push_back(std::unique_ptr(node)); - } - i++; - } + // Edges from image nodes to image inputs. The render-port index of an + // input (cur.inlets, via walk_descriptor_inputs) drifts from the + // descriptor index for inputs that create 0 or 2 ports, so we must not + // equate them. PreviewInputVisitor only yields a node for image-like + // inputs, each of which creates exactly one input port at cur.inlets. + int image_i = 0; + score::gfx::walk_descriptor_inputs( + m_program.descriptor, + [&](const isf::input& input, const score::gfx::port_counts& cur, + const score::gfx::port_counts& delta) { + auto node = ossia::visit(PreviewInputVisitor{image_i}, input.data); + if(node) + { + const int port_idx = cur.inlets; + // Only wire when this input actually creates an input port: + // write-access csf_image_input yields a node but 0 inlets, and + // the render-port index must come from cur.inlets (not the + // descriptor index, which drifts for 0-/2-port inputs). + if(delta.inlets < 1 || port_idx < 0 + || port_idx >= (int)m_isf->input.size()) + { + delete node; + return; + } + + m_graph.addNode(node); + + m_graph.addEdge( + node->output[0], m_isf->input[port_idx], + Process::CableType::ImmediateGlutton); + m_previewEdges.emplace_back(node->output[0], m_isf->input[port_idx]); + + m_textures.push_back(std::unique_ptr(node)); + } + }); m_graph.createAllRenderLists(settings.graphicsApiEnum()); } @@ -463,10 +532,10 @@ class ShaderPreviewManager : public QObject } } - std::unique_ptr m_screen{}; private: std::unique_ptr m_isf{}; std::vector> m_textures; + std::vector m_previews; score::gfx::Graph m_graph{}; ProcessedProgram m_program; }; @@ -497,45 +566,59 @@ ShaderPreviewWidget::ShaderPreviewWidget(const Process::Preset& preset, QWidget* ShaderPreviewWidget::~ShaderPreviewWidget() { + // Tearing down the RhiPreviewWidget triggers detachPreview() on the + // manager, which removes the producer→preview edge. Do this before + // scheduling manager deletion so the deferred delete sees a clean + // graph. + delete m_rhi; + m_rhi = nullptr; + g_shaderPreviewScheduledForDeletion = true; QTimer::singleShot(std::chrono::seconds(5), qApp, []() { - if(g_shaderPreviewScheduledForDeletion) + // Multi-client safety: several ShaderPreviewWidgets can share the same + // manager (library hover + live texture-port preview). Destroying one + // schedules this deletion, but another may still be attached — its + // RhiPreviewWidget holds a raw pointer into g_shaderPreview->graph(). + // Only tear the manager down once no preview remains attached, otherwise + // the surviving widget would dereference a freed Graph on its own + // destruction (use-after-free). + if(g_shaderPreviewScheduledForDeletion && g_shaderPreview + && !g_shaderPreview->hasPreviews()) { delete g_shaderPreview; g_shaderPreview = nullptr; g_shaderPreviewScheduledForDeletion = false; } }); - - if(m_window) - m_window->setParent(nullptr); } void ShaderPreviewWidget::setup() { // UI setup auto lay = new QHBoxLayout(this); - if((m_window = g_shaderPreview->getWindow())) - { - auto widg = createWindowContainer(m_window.get(), this); - widg->setMinimumWidth(300); - widg->setMaximumWidth(300); - widg->setMinimumHeight(200); - widg->setMaximumHeight(200); - lay->addWidget(widg); - } - // FIXME else { display error widget } - - // so anyways, I started blasting... + m_rhi = new RhiPreviewWidget(this); + m_rhi->setMinimumSize(300, 200); + m_rhi->setMaximumSize(300, 200); + m_rhi->useGraph( + &g_shaderPreview->graph(), + [](score::gfx::BackgroundNode& n) { + if(g_shaderPreview) + g_shaderPreview->attachPreview(n); + }, + [](score::gfx::BackgroundNode& n) { + if(g_shaderPreview) + g_shaderPreview->detachPreview(n); + }); + lay->addWidget(m_rhi); + + // Drives ISF time/progress uniforms. Frame submission is owned by + // the QRhiWidget (it calls update() each render). startTimer(16); } void ShaderPreviewWidget::timerEvent(QTimerEvent* event) { if(g_shaderPreview) - { g_shaderPreview->updateControls(); - g_shaderPreview->m_screen->render(); - } } } diff --git a/src/plugins/score-plugin-gfx/Gfx/Filter/PreviewWidget.hpp b/src/plugins/score-plugin-gfx/Gfx/Filter/PreviewWidget.hpp index e58e7ded5a..76318f8189 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Filter/PreviewWidget.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Filter/PreviewWidget.hpp @@ -3,11 +3,10 @@ #include #include #include -#include #include -#include #include +#include namespace score::gfx { class ISFNode; @@ -18,6 +17,7 @@ struct Preset; } namespace Gfx { +class RhiPreviewWidget; class ShaderPreviewManager; class ShaderPreviewWidget : public QWidget { @@ -30,7 +30,7 @@ class ShaderPreviewWidget : public QWidget void setup(); void timerEvent(QTimerEvent* event) override; - std::shared_ptr m_window; + RhiPreviewWidget* m_rhi{}; }; } diff --git a/src/plugins/score-plugin-gfx/Gfx/Filter/Process.cpp b/src/plugins/score-plugin-gfx/Gfx/Filter/Process.cpp index b6a900ae26..3499e835f3 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Filter/Process.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Filter/Process.cpp @@ -11,8 +11,10 @@ #include #include +#include #include #include +#include #include @@ -71,10 +73,12 @@ Model::Model( if(init.endsWith("fs") || init.endsWith("frag")) { + m_scriptPath = init; (void)setProgram(programFromISFFragmentShaderPath(init, {})); } else if(init.endsWith("vs") || init.endsWith("vert")) { + m_scriptPath = init; (void)setProgram(programFromVSAVertexShaderPath(init, {})); } } @@ -83,7 +87,7 @@ Model::~Model() { } bool Model::validate(const ShaderSource& txt) const noexcept { - const auto& [_, error] = ProgramCache::instance().get(txt); + const auto& [_, error] = ProgramCache::instance().get(txt, m_scriptPath); if(!error.isEmpty()) { this->errorMessage(error); @@ -116,7 +120,9 @@ Process::ScriptChangeResult Model::setProgram(const ShaderSource& f) { setVertex(f.vertex); setFragment(f.fragment); - if(const auto& [processed, error] = ProgramCache::instance().get(f); bool(processed)) + if(const auto& [processed, error] + = ProgramCache::instance().get(f, m_scriptPath); + bool(processed)) { ossia::flat_map previous_values; for(auto inl : m_inlets) @@ -203,7 +209,17 @@ void DataStreamWriter::write(Gfx::ShaderSource& p) template <> void DataStreamReader::read(const Gfx::Filter::Model& proc) { - m_stream << proc.m_program; + // documentContext() SCORE_ASSERTs when the model isn't in a document + // (e.g. saving a template / copy). Only relativize against the document + // when there's an actual script path to relativize — mirrors the + // JSON/load guards. The empty case writes an empty path verbatim. + QString relativeScriptPath; + if(!proc.m_scriptPath.isEmpty()) + { + auto& ctx = score::IDocument::documentContext(proc); + relativeScriptPath = score::relativizeFilePath(proc.m_scriptPath, ctx); + } + m_stream << proc.m_program << relativeScriptPath; readPorts(*this, proc.m_inlets, proc.m_outlets); @@ -214,7 +230,12 @@ template <> void DataStreamWriter::write(Gfx::Filter::Model& proc) { Gfx::ShaderSource s; - m_stream >> s; + m_stream >> s >> proc.m_scriptPath; + if(!proc.m_scriptPath.isEmpty()) + { + auto& ctx = score::IDocument::documentContext(proc); + proc.m_scriptPath = score::locateFilePath(proc.m_scriptPath, ctx); + } s.type = isf::parser::ShaderType::ISF; (void)proc.setProgram(s); @@ -230,6 +251,11 @@ void JSONReader::read(const Gfx::Filter::Model& proc) { obj["Vertex"] = proc.vertex(); obj["Fragment"] = proc.fragment(); + if(!proc.m_scriptPath.isEmpty()) + { + auto& ctx = score::IDocument::documentContext(proc); + obj["Root"] = score::relativizeFilePath(proc.m_scriptPath, ctx); + } readPorts(*this, proc.m_inlets, proc.m_outlets); } @@ -241,6 +267,15 @@ void JSONWriter::write(Gfx::Filter::Model& proc) s.vertex = obj["Vertex"].toString(); s.fragment = obj["Fragment"].toString(); s.type = isf::parser::ShaderType::ISF; + if(auto r = obj.tryGet("Root")) + { + proc.m_scriptPath <<= *r; + if(!proc.m_scriptPath.isEmpty()) + { + auto& ctx = score::IDocument::documentContext(proc); + proc.m_scriptPath = score::locateFilePath(proc.m_scriptPath, ctx); + } + } (void)proc.setProgram(s); writePorts( diff --git a/src/plugins/score-plugin-gfx/Gfx/Filter/Process.hpp b/src/plugins/score-plugin-gfx/Gfx/Filter/Process.hpp index a6e04b48c2..b8fd28005b 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Filter/Process.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Filter/Process.hpp @@ -64,6 +64,12 @@ class Model final : public Process::ProcessModel return m_processedProgram; } + // Absolute path of the shader file this model was loaded from. Used as + // the base for quoted #include resolution in ProgramCache::get. Empty + // when the shader source is in-memory (default preset, pasted text). + // Mirrors JS::ProcessModel::m_root. + QString rootPath() const noexcept { return m_scriptPath; } + void errorMessage(const QString& arg_2) const W_SIGNAL(errorMessage, arg_2); private: @@ -73,6 +79,7 @@ class Model final : public Process::ProcessModel ShaderSource m_program; ProcessedProgram m_processedProgram; + QString m_scriptPath; }; struct ProcessFactory final : Process::ProcessFactory_T diff --git a/src/plugins/score-plugin-gfx/Gfx/FormatRegistry.cpp b/src/plugins/score-plugin-gfx/Gfx/FormatRegistry.cpp new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/plugins/score-plugin-gfx/Gfx/FormatRegistry.hpp b/src/plugins/score-plugin-gfx/Gfx/FormatRegistry.hpp new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/plugins/score-plugin-gfx/Gfx/GStreamer/GStreamerOutputDevice.cpp b/src/plugins/score-plugin-gfx/Gfx/GStreamer/GStreamerOutputDevice.cpp index 756db583a4..b54882a415 100644 --- a/src/plugins/score-plugin-gfx/Gfx/GStreamer/GStreamerOutputDevice.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/GStreamer/GStreamerOutputDevice.cpp @@ -85,6 +85,7 @@ struct GStreamerOutputNode : score::gfx::OutputNode GstElement* m_audio_src{}; GStreamerSettings m_settings; bool m_started{}; + uint64_t m_video_max_bytes{}; // appsrc queue cap; 0 = disabled std::unique_ptr m_encoder[2]; int m_encoderIdx{}; // ping-pong index for double-buffered encoder QString m_detectedFormat; // UYVY, NV12, I420, or empty for RGBA @@ -125,6 +126,14 @@ struct GStreamerOutputNode : score::gfx::OutputNode qDebug() << "GStreamer output parse error:" << err->message; if(gst.g_error_free) gst.g_error_free(err); + // gst_parse_launch (non-_full) can return a non-NULL *partial* pipeline + // with *error set. Such a pipeline is broken (e.g. missing appsrcs) — + // unref it so we don't leak/retain it and never set it PLAYING. + if(m_pipeline) + { + gst.object_unref(m_pipeline); + m_pipeline = nullptr; + } return false; } if(!m_pipeline) @@ -177,10 +186,29 @@ struct GStreamerOutputNode : score::gfx::OutputNode gst.object_set_property(elem, prop, &gv); gst.value_unset(&gv); }; + auto setUInt64 = [&](GstElement* elem, const char* prop, uint64_t val) { + if(!gst.value_set_uint64) + return; + GValue gv{}; + gst.value_init(&gv, G_TYPE_UINT64); + gst.value_set_uint64(&gv, val); + gst.object_set_property(elem, prop, &gv); + gst.value_unset(&gv); + }; setBool(m_video_src, "is-live", true); setBool(m_video_src, "do-timestamp", true); setInt(m_video_src, "format", 3); // GST_FORMAT_TIME + + // Backpressure: the appsrc default max-bytes is 200000, far below a + // single 1080p RGBA frame (~8 MB). Bound the queue to a few frames so + // RSS can't grow without limit when downstream stalls. We additionally + // drop frames ourselves (see push_video_frame_*) by polling + // current-level-bytes, which gives downstream-leaky behaviour without + // depending on the leaky-type enum GType (not introspectable here) and + // without blocking the render thread. + m_video_max_bytes = (uint64_t)16 * 1024 * 1024; // ~2 frames @1080p RGBA + setUInt64(m_video_src, "max-bytes", m_video_max_bytes); } } @@ -279,6 +307,36 @@ struct GStreamerOutputNode : score::gfx::OutputNode m_started = true; } + // Non-blocking bus poll: surfaces otherwise-silent encoder/filesink/muxer + // errors. Called once per rendered frame; logs the first error then stops + // pushing (m_started=false) so we don't spam or feed a dead pipeline. + void poll_bus_errors() + { + if(!m_pipeline || !m_started) + return; + + auto& gst = libgstreamer::instance(); + if(!gst.element_get_bus || !gst.bus_timed_pop_filtered) + return; + + GstBus* bus = gst.element_get_bus(m_pipeline); + if(!bus) + return; + + // timeout==0 => return immediately if no matching message is queued. + while(GstMessage* msg = gst.bus_timed_pop_filtered( + bus, 0, (GstMessageType)(GST_MESSAGE_ERROR | GST_MESSAGE_WARNING))) + { + qWarning() << "GStreamer output: pipeline error/warning on the bus"; + if(gst.message_unref) + gst.message_unref(msg); + // An ERROR aborts the pipeline; stop feeding it. + m_started = false; + break; + } + gst.object_unref(bus); + } + void stop_pipeline() { if(!m_pipeline || !m_started) @@ -292,6 +350,33 @@ struct GStreamerOutputNode : score::gfx::OutputNode if(m_audio_src && gst.app_src_end_of_stream) gst.app_src_end_of_stream(m_audio_src); + // appsrc EOS is ASYNC: it travels through the pipeline as a buffer would, + // and muxers (mp4mux/matroskamux/...) only finalize the file once EOS + // reaches them. Setting the pipeline to NULL immediately would truncate + // the moov atom / cluster index, producing unplayable files. Wait for the + // EOS (or ERROR) message on the bus, with a bounded timeout so we never + // hang the UI thread on a stuck pipeline. + if(gst.element_get_bus && gst.bus_timed_pop_filtered) + { + if(GstBus* bus = gst.element_get_bus(m_pipeline)) + { + GstMessage* msg = gst.bus_timed_pop_filtered( + bus, 5 * GST_SECOND, + (GstMessageType)(GST_MESSAGE_EOS | GST_MESSAGE_ERROR)); + if(msg) + { + if(gst.message_unref) + gst.message_unref(msg); + } + else + { + qWarning() << "GStreamer output: timed out waiting for EOS; " + "output file may be truncated"; + } + gst.object_unref(bus); + } + } + gst.element_set_state(m_pipeline, GST_STATE_NULL); m_started = false; } @@ -309,12 +394,35 @@ struct GStreamerOutputNode : score::gfx::OutputNode } } + // Downstream-leaky backpressure: if appsrc's queued bytes already exceed the + // configured budget, drop this frame instead of growing RSS without bound. + // Reading current-level-bytes (guint64) is cheap and lock-free in appsrc. + bool video_queue_full() const + { + if(m_video_max_bytes == 0 || !m_video_src) + return false; + + auto& gst = libgstreamer::instance(); + if(!gst.object_get_property || !gst.value_init || !gst.value_unset + || !gst.value_get_uint64) + return false; + + GValue gv{}; + gst.value_init(&gv, G_TYPE_UINT64); + gst.object_get_property(m_video_src, "current-level-bytes", &gv); + uint64_t level = gst.value_get_uint64(&gv); + gst.value_unset(&gv); + return level >= m_video_max_bytes; + } + // Zero-copy push: takes a shallow copy of the QByteArray. // The QByteArray's refcount keeps the data alive until GStreamer is done. void push_video_frame_zerocopy(QByteArray data) { if(!m_video_src || !m_started) return; + if(video_queue_full()) + return; // drop: downstream can't keep up auto& gst = libgstreamer::instance(); if(!gst.buffer_new_wrapped_full) @@ -405,6 +513,9 @@ struct GStreamerOutputNode : score::gfx::OutputNode if(!renderer || !m_renderState) return; + // Surface any silent pipeline errors (encoder/filesink/muxer failures). + poll_bus_errors(); + auto rhi = m_renderState->rhi; QRhiCommandBuffer* cb{}; if(rhi->beginOffscreenFrame(&cb) != QRhi::FrameOpSuccess) @@ -492,18 +603,15 @@ struct GStreamerOutputNode : score::gfx::OutputNode void createOutput(score::gfx::OutputConfiguration conf) override { - m_renderState = std::make_shared(); - - m_renderState->surface = QRhiGles2InitParams::newFallbackSurface(); - QRhiGles2InitParams params; - params.fallbackSurface = m_renderState->surface; - score::GLCapabilities caps; - caps.setupFormat(params.format); - m_renderState->rhi = QRhi::create(QRhi::OpenGLES2, ¶ms, {}); - m_renderState->renderSize = QSize(m_settings.width, m_settings.height); + m_renderState = score::gfx::createRenderState( + conf.graphicsApi, QSize(m_settings.width, m_settings.height), nullptr); + if(!m_renderState || !m_renderState->rhi) + { + qWarning() << "GStreamerOutputNode: failed to create QRhi"; + m_renderState.reset(); + return; + } m_renderState->outputSize = m_renderState->renderSize; - m_renderState->api = score::gfx::GraphicsApi::OpenGL; - m_renderState->version = caps.qShaderVersion; auto rhi = m_renderState->rhi; m_texture = rhi->newTexture( @@ -517,10 +625,12 @@ struct GStreamerOutputNode : score::gfx::OutputNode m_renderState->renderPassDescriptor); m_renderTarget->create(); - init_pipeline(); + const bool pipeline_ok = init_pipeline(); + if(!pipeline_ok) + qWarning() << "GStreamerOutputNode: pipeline init failed; output disabled"; // Create GPU encoder if a YUV target format was detected - if(!m_detectedFormat.isEmpty() && rhi) + if(pipeline_ok && !m_detectedFormat.isEmpty() && rhi) { auto makeEncoder = [&]() -> std::unique_ptr { if(m_detectedFormat == "UYVY" || m_detectedFormat == "YUY2") @@ -538,6 +648,24 @@ struct GStreamerOutputNode : score::gfx::OutputNode if(m_encoder[0] && m_encoder[1]) { + // Stride alignment: QRhi reads textures back with TIGHTLY packed rows, + // but GStreamer's default GstVideoInfo strides are GST_ROUND_UP_4. For + // the planar/semi-planar YUV formats the two only agree when each plane + // row is already a multiple of 4: + // I420: Y stride = width, chroma stride = width/2 -> need width%8==0 + // NV12: Y stride = width, UV stride = width -> need width%4==0 + // UYVY: stride = width*2 (4:2:2 macropixels) -> need width%2==0 + // height must be even for 4:2:0 vertical subsampling. We round DOWN so + // we never sample past the rendered texture, and feed the SAME aligned + // dimensions to both the encoder and the negotiated caps so the tight + // readback matches GStreamer's expected (now no-op ROUND_UP_4) strides. + const int enc_w = std::max(8, m_settings.width & ~7); // mult of 8 (covers 4 & 2) + const int enc_h = std::max(2, m_settings.height & ~1); // mult of 2 + if(enc_w != m_settings.width || enc_h != m_settings.height) + qDebug() << "GStreamer output: aligning" << m_detectedFormat + << "from" << m_settings.width << "x" << m_settings.height + << "to" << enc_w << "x" << enc_h << "for packed strides"; + auto input_trc = static_cast(m_settings.input_transfer); auto colorShader = colorShaderFromColorimetry(m_detectedColorimetry, input_trc); qDebug() << "GStreamer output: GPU encoder" @@ -546,9 +674,9 @@ struct GStreamerOutputNode : score::gfx::OutputNode << "inputTrc=" << m_settings.input_transfer << "shaderLen=" << colorShader.size(); m_encoder[0]->init(*rhi, *m_renderState, m_texture, - m_settings.width, m_settings.height, colorShader); + enc_w, enc_h, colorShader); m_encoder[1]->init(*rhi, *m_renderState, m_texture, - m_settings.width, m_settings.height, colorShader); + enc_w, enc_h, colorShader); // Update appsrc caps to match the encoder's output format if(auto& gst = libgstreamer::instance(); @@ -556,8 +684,8 @@ struct GStreamerOutputNode : score::gfx::OutputNode { auto capsStr = QString("video/x-raw,format=%1,width=%2,height=%3,framerate=%4/1") .arg(m_detectedFormat) - .arg(m_settings.width) - .arg(m_settings.height) + .arg(enc_w) + .arg(enc_h) .arg(m_settings.rate); if(auto* caps = gst.caps_from_string(capsStr.toStdString().c_str())) { @@ -582,6 +710,38 @@ struct GStreamerOutputNode : score::gfx::OutputNode } } cleanup_pipeline(); + + // Reset per-instance frame/encoder state so a subsequent createOutput() + // (re-create on settings change) starts clean instead of reusing a stale + // readback, ping-pong index, detected format or dangling renderer pointer. + m_currentReadback = &m_readback[0]; + m_readback[0] = {}; + m_readback[1] = {}; + m_encoderIdx = 0; + m_detectedFormat.clear(); + m_detectedColorimetry.clear(); + m_inv_y_renderer = nullptr; + m_video_max_bytes = 0; + + if(!m_renderState) + return; + + // Persist-across-rebuild contract: registry survives RL teardown, + // so we tear down its QRhi resources here BEFORE + // RenderState::destroy() (called below) frees the device. + releaseRegistry(); + + delete m_renderTarget; + m_renderTarget = nullptr; + + delete m_renderState->renderPassDescriptor; + m_renderState->renderPassDescriptor = nullptr; + + delete m_texture; + m_texture = nullptr; + + m_renderState->destroy(); + m_renderState.reset(); } std::shared_ptr renderState() const override diff --git a/src/plugins/score-plugin-gfx/Gfx/GeometryFilter/Process.cpp b/src/plugins/score-plugin-gfx/Gfx/GeometryFilter/Process.cpp index 4624286cd9..001e1ac064 100644 --- a/src/plugins/score-plugin-gfx/Gfx/GeometryFilter/Process.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/GeometryFilter/Process.cpp @@ -324,8 +324,17 @@ void Model::setupIsf(const isf::descriptor& desc) alternatives.emplace_back("2", 2); } + // ComboBox::init expects the VALUE that should be initially selected, + // not an index. libisf stores `v.def` as the INDEX into values. + // Pass the alternative's value at v.def so the widget initialises + // to the author-intended entry instead of falling back to + // alternatives[0]. Same fix as CSF/Process.cpp. + const std::size_t def_idx + = std::min(v.def, alternatives.size() - 1); + const ossia::value& init_value = alternatives[def_idx].second; + auto port = new Process::ComboBox( - std::move(alternatives), (int)v.def, QString::fromStdString(input.name), + std::move(alternatives), init_value, QString::fromStdString(input.name), Id(i), &self); self.m_inlets.push_back(port); @@ -456,7 +465,9 @@ void Model::setupIsf(const isf::descriptor& desc) // They're managed by the system, so we don't create a UI control return nullptr; } - + + Process::Inlet* operator()(const uniform_input& v) { return nullptr; } + Process::Inlet* operator()(const texture_input& v) { auto port = new Gfx::TextureInlet( diff --git a/src/plugins/score-plugin-gfx/Gfx/GfxContext.cpp b/src/plugins/score-plugin-gfx/Gfx/GfxContext.cpp index 5fc416fffe..86c728aeb5 100644 --- a/src/plugins/score-plugin-gfx/Gfx/GfxContext.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/GfxContext.cpp @@ -10,6 +10,8 @@ #include #include + +#include #include #include @@ -36,6 +38,10 @@ GfxContext::GfxContext(const score::DocumentContext& ctx) &GfxContext::recompute_graph); m_graph = new score::gfx::Graph; + // Hand the session-wide AssetTable down to the Graph so every + // RenderList it creates can participate in content-hash decode + // dedup. Plan 09 S1: one decode per asset per session, N uploads. + m_graph->setAssetTable(&m_assets); double rate = m_context.app.settings().getRate(); rate = qBound(1.0, rate, 1000.); @@ -61,6 +67,14 @@ GfxContext::~GfxContext() m_thread.wait(); #endif + // Stop all timers before destroying the graph and nodes, + // to prevent timer callbacks from accessing stale pointers. + m_manualTimers.clear(); + m_no_vsync_timer = nullptr; + m_watchdog_timer = nullptr; + std::destroy_at(&m_timers); + std::construct_at(&m_timers); + delete m_graph; } @@ -122,63 +136,79 @@ void GfxContext::disconnect_preview_node(EdgeSpec e) void GfxContext::add_edge(EdgeSpec edge) { auto source_node_it = this->nodes.find(edge.first.node); - if(source_node_it != this->nodes.end()) - { - auto sink_node_it = this->nodes.find(edge.second.node); - if(sink_node_it != this->nodes.end()) - { - assert(source_node_it->second); - assert(sink_node_it->second); - - auto& source_ports = source_node_it->second->output; - auto& sink_ports = sink_node_it->second->input; - - SCORE_ASSERT(source_ports.size() > 0); - SCORE_ASSERT(sink_ports.size() > 0); - SCORE_ASSERT(source_ports.size() > edge.first.port); - SCORE_ASSERT(sink_ports.size() > edge.second.port); - auto source_port = source_ports[edge.first.port]; - auto sink_port = sink_ports[edge.second.port]; - - m_graph->addEdge(source_port, sink_port, edge.type); - } - } + if(source_node_it == this->nodes.end()) + return; + auto sink_node_it = this->nodes.find(edge.second.node); + if(sink_node_it == this->nodes.end()) + return; + if(!source_node_it->second || !sink_node_it->second) + return; + + auto& source_ports = source_node_it->second->output; + auto& sink_ports = sink_node_it->second->input; + + // Silently drop malformed edges. A live-coded or half-wired patch can + // produce an edge whose declared port index doesn't exist on either side + // (e.g. a shader that parses to zero input ports but the script still + // issued a `connect(..., 0, consumer, 0)`). Aborting the whole renderer + // on a script-level wiring mistake is not an option — drop the edge and + // keep rendering. + if(edge.first.port >= source_ports.size() + || edge.second.port >= sink_ports.size()) + return; + + m_graph->addEdge(source_ports[edge.first.port], sink_ports[edge.second.port], + edge.type); } void GfxContext::remove_edge(EdgeSpec edge) { auto source_node_it = this->nodes.find(edge.first.node); - if(source_node_it != this->nodes.end()) - { - auto sink_node_it = this->nodes.find(edge.second.node); - if(sink_node_it != this->nodes.end()) - { - assert(source_node_it->second); - assert(sink_node_it->second); - - auto source_port = source_node_it->second->output[edge.first.port]; - auto sink_port = sink_node_it->second->input[edge.second.port]; - - m_graph->removeEdge(source_port, sink_port); - } - } + if(source_node_it == this->nodes.end()) + return; + auto sink_node_it = this->nodes.find(edge.second.node); + if(sink_node_it == this->nodes.end()) + return; + if(!source_node_it->second || !sink_node_it->second) + return; + + auto& source_ports = source_node_it->second->output; + auto& sink_ports = sink_node_it->second->input; + if(edge.first.port >= source_ports.size() + || edge.second.port >= sink_ports.size()) + return; + + m_graph->removeEdge(source_ports[edge.first.port], + sink_ports[edge.second.port]); } void GfxContext::recompute_edges() { m_graph->clearEdges(); - for(auto edge : edges) + // Snapshot under lock: writer in updateGraph reassigns `edges` under + // edges_lock on the render-driving thread, while this can be invoked from + // settings-change signals on the UI thread. Iterating the live container + // would race with that reassignment. + ossia::flat_set edges_snapshot; + ossia::flat_set preview_snapshot; + { + std::lock_guard l{edges_lock}; + edges_snapshot = edges; + preview_snapshot = preview_edges; + } + + for(auto edge : edges_snapshot) { add_edge(edge); } - for(auto edge : preview_edges) + for(auto edge : preview_snapshot) { add_edge(edge); } } -void GfxContext::recompute_graph() +void GfxContext::recomputeTimers() { // Clear previous timers std::destroy_at(&m_timers); @@ -195,15 +225,10 @@ void GfxContext::recompute_graph() output->setVSyncCallback({}); } - // Recreate the graph - recompute_edges(); - auto& settings = m_context.app.settings(); - const double settings_rate = m_context.app.settings().getRate(); + const double settings_rate = settings.getRate(); const auto api = settings.graphicsApiEnum(); - m_graph->createAllRenderLists(api); - // Recreate new timers const bool vsync = settings.getVSync() && m_graph->canDoVSync(); @@ -274,6 +299,24 @@ void GfxContext::recompute_graph() } } +void GfxContext::recomputeGraphTopology() +{ + recompute_edges(); + + auto& settings = m_context.app.settings(); + const auto api = settings.graphicsApiEnum(); + + m_graph->createAllRenderLists(api); +} + +void GfxContext::recompute_graph() +{ + // Topology first: refreshes m_graph->outputs() which recomputeTimers reads. + // Must run before timers because recomputeTimers iterates outputs(). + recomputeGraphTopology(); + recomputeTimers(); +} + void GfxContext::add_preview_output(score::gfx::OutputNode& node) { auto& settings = m_context.app.settings(); @@ -296,12 +339,131 @@ void GfxContext::add_preview_output(score::gfx::OutputNode& node) void GfxContext::recompute_connections() { recompute_graph(); - // FIXME for more performance - /* - recompute_edges(); - // m_graph->setupOutputs(m_api); - m_graph->relinkGraph(); - */ +} + +void GfxContext::incrementalEdgeUpdate( + const ossia::flat_set& old_edges, + const ossia::flat_set& cur_edges) +{ + // Compute diff + std::vector removed; + std::vector added; + + std::set_difference( + old_edges.begin(), old_edges.end(), + cur_edges.begin(), cur_edges.end(), + std::back_inserter(removed)); + + std::set_difference( + cur_edges.begin(), cur_edges.end(), + old_edges.begin(), old_edges.end(), + std::back_inserter(added)); + + // Pre-compute the set of sink ports that will be fed by an incoming edge + // in this same batch. Handing that set to onEdgeRemoved prevents the + // "remove A→B, add F→B" sequence from destroying B's input RT in the + // gap between the two, which was pure churn when the old and new feeds + // share a sink port (classic filter insertion). Reconcile reallocates + // RTs only when the slot is empty, so preserving the existing RT lets + // the new pass slot straight into place. Source: Graph.cpp + // createPassForEdgeIfMissing already treats a present RT as valid + // regardless of the edge that produced it. + ossia::hash_set preserveSinks; + preserveSinks.reserve(added.size()); + for(auto& spec : added) + { + auto sink_it = nodes.find(spec.second.node); + if(sink_it == nodes.end()) + continue; + // EdgeSpecs are script-supplied: guard against null nodes and + // out-of-range port indices before indexing, exactly as + // add_edge/remove_edge do. An OOB std::vector access is UB, not a + // catchable exception, so the try/catch around the caller cannot + // save us here. + if(!sink_it->second) + continue; + auto& sink_ports = sink_it->second->input; + if(spec.second.port >= sink_ports.size()) + continue; + preserveSinks.insert(sink_ports[spec.second.port]); + } + + // Process removals first (while edge objects still exist). + for(auto& spec : removed) + { + auto source_it = nodes.find(spec.first.node); + auto sink_it = nodes.find(spec.second.node); + if(source_it == nodes.end() || sink_it == nodes.end()) + continue; + if(!source_it->second || !sink_it->second) + continue; + + auto& source_ports = source_it->second->output; + auto& sink_ports = sink_it->second->input; + if(spec.first.port >= source_ports.size() + || spec.second.port >= sink_ports.size()) + continue; + + auto* source_port = source_ports[spec.first.port]; + auto* sink_port = sink_ports[spec.second.port]; + + // Find the actual Edge object + score::gfx::Edge* edge = nullptr; + for(auto* e : source_port->edges) + { + if(e->sink == sink_port) + { + edge = e; + break; + } + } + + if(edge) + { + // Notify graph BEFORE destroying the edge + m_graph->onEdgeRemoved(*edge, &preserveSinks); + m_graph->removeEdge(source_port, sink_port); + } + } + + // Process additions: first create all edge objects in the graph, + // then reconcile render lists in one pass. Processing edges one + // at a time doesn't work because edge ordering creates dependencies + // (e.g. edge A->B is skipped because B isn't in the RL yet, then + // edge B->C brings B into the RL, but A never gets a renderer). + for(auto& spec : added) + { + auto source_it = nodes.find(spec.first.node); + auto sink_it = nodes.find(spec.second.node); + if(source_it == nodes.end() || sink_it == nodes.end()) + continue; + if(!source_it->second || !sink_it->second) + continue; + + auto& source_ports = source_it->second->output; + auto& sink_ports = sink_it->second->input; + if(spec.first.port >= source_ports.size() + || spec.second.port >= sink_ports.size()) + continue; + + auto* source_port = source_ports[spec.first.port]; + auto* sink_port = sink_ports[spec.second.port]; + + m_graph->addEdge(source_port, sink_port, spec.type); + } + + // Reconcile: ensure all reachable nodes have renderers and passes. + // This handles NEW nodes (creates renderers + passes for all their edges). + if(!added.empty() || !removed.empty()) + m_graph->reconcileAllRenderLists(); + + // Create missing passes and update samplers for ALL edges in the graph, + // not just the newly-added ones. When a node becomes reachable through a + // new edge (e.g. filter→Grid makes filter reachable), pre-existing edges + // TO that node (e.g. A→filter) also need passes created. Checking only + // the diff misses these. + m_graph->createAllMissingPasses(); + m_graph->updateAllSinkSamplers(); } void GfxContext::update_inputs() @@ -328,13 +490,17 @@ void GfxContext::update_inputs() void GfxContext::remove_node( std::vector>& nursery, int32_t index) { - // Remove all edges involving that node - for(auto it = this->edges.begin(); it != this->edges.end();) + // Remove all edges involving that node. recompute_edges snapshots + // `edges` under edges_lock, so take it here too while mutating. { - if(it->first.node == index || it->second.node == index) - it = this->edges.erase(it); - else - ++it; + std::lock_guard l{edges_lock}; + for(auto it = this->edges.begin(); it != this->edges.end();) + { + if(it->first.node == index || it->second.node == index) + it = this->edges.erase(it); + else + ++it; + } } if(auto node_it = nodes.find(index); node_it != nodes.end()) @@ -392,7 +558,11 @@ void GfxContext::run_commands() case NodeCommand::ADD_NODE: { m_graph->addNode(cmd.node.get()); nodes[cmd.index] = {std::move(cmd.node)}; - recompute = true; + // Only output nodes require a full rebuild (new window/timer). + // Non-output nodes just wait for edges — the incremental + // reconciliation path creates their renderers when connected. + if(dynamic_cast(nodes[cmd.index].get())) + recompute = true; break; } case NodeCommand::REMOVE_PREVIEW_NODE: { @@ -400,13 +570,27 @@ void GfxContext::run_commands() auto n = dynamic_cast(node.get()); SCORE_ASSERT(n); { - auto it = ossia::find_if(this->preview_edges, [idx = cmd.index](EdgeSpec e) { - return e.second.node == idx; - }); - if(it != this->preview_edges.end()) + // recompute_edges snapshots preview_edges under edges_lock, + // so guard reads/mutations of it here too. remove_edge only + // touches m_graph, so keep it outside the lock. + EdgeSpec to_remove; + bool found = false; + { + std::lock_guard l{edges_lock}; + auto it = ossia::find_if(this->preview_edges, [idx = cmd.index](EdgeSpec e) { + return e.second.node == idx; + }); + if(it != this->preview_edges.end()) + { + to_remove = *it; + found = true; + } + } + if(found) { - this->remove_edge(*it); - this->preview_edges.erase(*it); + this->remove_edge(to_remove); + std::lock_guard l{edges_lock}; + this->preview_edges.erase(to_remove); } } m_graph->destroyOutputRenderList(*n); @@ -414,8 +598,27 @@ void GfxContext::run_commands() break; } case NodeCommand::REMOVE_NODE: { - remove_node(nursery, cmd.index); - recompute = true; + if(auto node_it = nodes.find(cmd.index); node_it != nodes.end()) + { + bool is_output = dynamic_cast(node_it->second.get()); + if(!is_output) + { + // Incremental removal: clean up edges, renderers, retopo sort. + // Must happen BEFORE remove_node deletes the node. + m_graph->removeNodeAndEdges(node_it->second.get()); + } + remove_node(nursery, cmd.index); + if(is_output) + { + // Recompute immediately so subsequent commands in this tick + // see a consistent graph state. Deferring until the end of + // the loop leaves the graph half-broken (node gone from + // m_nodes but renderer/output still wired) for any further + // commands or render frames that fire in this window. + recompute_graph(); + m_fullRebuildThisFrame = true; + } + } break; } case NodeCommand::RELINK: { @@ -430,12 +633,18 @@ void GfxContext::run_commands() switch(cmd.cmd) { case EdgeCommand::CONNECT_PREVIEW_NODE: { - this->preview_edges.emplace(cmd.edge); + { + std::lock_guard l{edges_lock}; + this->preview_edges.emplace(cmd.edge); + } add_edge(cmd.edge); break; } case EdgeCommand::DISCONNECT_PREVIEW_NODE: { - this->preview_edges.erase(cmd.edge); + { + std::lock_guard l{edges_lock}; + this->preview_edges.erase(cmd.edge); + } remove_edge(cmd.edge); break; } @@ -452,6 +661,11 @@ void GfxContext::run_commands() if(recompute) { recompute_graph(); + // Signal to updateGraph() that a full rebuild happened this frame. + // The incremental edge path should NOT run after a full rebuild, + // because the graph was just rebuilt with the old edge set and + // applying an incremental diff would result in a half-built state. + m_fullRebuildThisFrame = true; } // This will force the nodes to be deleted in the main thread a bit later @@ -470,14 +684,49 @@ void GfxContext::updateGraph() update_inputs(); - if(edges_changed) + // Clear the flag BEFORE copying new_edges so a producer that publishes a + // fresh edge set after our copy (and re-sets the flag) cannot have its + // signal lost: the worst case is one redundant reprocess next tick, never + // a dropped update. Clearing it after the copy (the previous behaviour) + // could clobber a set-after-copy and, with prev_edges dedup on the + // producer side, that update would never be re-sent. + if(edges_changed.exchange(false)) { + ossia::flat_set old_edges; + ossia::flat_set cur_edges; { std::lock_guard l{edges_lock}; - std::swap(edges, new_edges); + old_edges = edges; + edges = new_edges; + cur_edges = edges; + } + + // If a full rebuild happened this frame (nodes added/removed), + // use the nuclear path for edges too. The incremental path + // doesn't work correctly after a full rebuild because the graph + // was rebuilt with the old edge set. + if(m_fullRebuildThisFrame) + { + m_fullRebuildThisFrame = false; + recompute_connections(); + return; + } + // Incremental edge update: apply the diff between old and new edges. + try + { + incrementalEdgeUpdate(old_edges, cur_edges); + } + catch(const std::exception& e) + { + qWarning("Incremental edge update failed (%s), falling back to full rebuild", + e.what()); + recompute_connections(); + } + catch(...) + { + qWarning("Incremental edge update failed, falling back to full rebuild"); + recompute_connections(); } - recompute_connections(); - edges_changed = false; } } @@ -497,7 +746,8 @@ void GfxContext::on_manual_timer(score::HighResolutionTimer* self) if(auto ptr = m_manualTimers.find(self); ptr != m_manualTimers.end()) { for(auto output : ptr->second) { - output->render(); + if(output && output->canRender()) + output->render(); } } } diff --git a/src/plugins/score-plugin-gfx/Gfx/GfxContext.hpp b/src/plugins/score-plugin-gfx/Gfx/GfxContext.hpp index 7422bd1212..d2d104b795 100644 --- a/src/plugins/score-plugin-gfx/Gfx/GfxContext.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/GfxContext.hpp @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -75,6 +76,11 @@ class SCORE_PLUGIN_GFX_EXPORT GfxContext : public QObject void recompute_edges(); void recompute_graph(); void recompute_connections(); + void recomputeTimers(); + void recomputeGraphTopology(); + void incrementalEdgeUpdate( + const ossia::flat_set& old_edges, + const ossia::flat_set& cur_edges); void update_inputs(); void updateGraph(); @@ -84,6 +90,18 @@ class SCORE_PLUGIN_GFX_EXPORT GfxContext : public QObject tick_messages.enqueue(std::move(msg)); } + /** + * @brief Session-wide content-hash decode cache. + * + * Shared across all RenderLists in this GfxContext. Loaders stage + * decoded bytes here on their worker thread; downstream consumers + * (texture upload, mesh VB/IB assembly) acquire by content hash, + * avoiding re-decoding the same source asset across multiple outputs + * or reloads. See Gfx/AssetTable.hpp. + */ + AssetTable& assets() noexcept { return m_assets; } + const AssetTable& assets() const noexcept { return m_assets; } + private: void run_commands(); void add_preview_output(score::gfx::OutputNode& out); @@ -132,9 +150,10 @@ class SCORE_PLUGIN_GFX_EXPORT GfxContext : public QObject std::mutex edges_lock; ossia::flat_set new_edges TS_GUARDED_BY(edges_lock); - ossia::flat_set edges; - ossia::flat_set preview_edges; + ossia::flat_set edges TS_GUARDED_BY(edges_lock); + ossia::flat_set preview_edges TS_GUARDED_BY(edges_lock); std::atomic_bool edges_changed{}; + bool m_fullRebuildThisFrame{}; score::HighResolutionTimer* m_no_vsync_timer{}; score::HighResolutionTimer* m_watchdog_timer{}; @@ -143,6 +162,8 @@ class SCORE_PLUGIN_GFX_EXPORT GfxContext : public QObject ossia::object_pool> m_buffers; + AssetTable m_assets; + score::Timers m_timers; }; diff --git a/src/plugins/score-plugin-gfx/Gfx/GfxDevice.cpp b/src/plugins/score-plugin-gfx/Gfx/GfxDevice.cpp index 8a152e0a8c..744dbdec13 100644 --- a/src/plugins/score-plugin-gfx/Gfx/GfxDevice.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/GfxDevice.cpp @@ -2,6 +2,7 @@ #include "GfxParameter.hpp" +#include #include #include diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/BackgroundNode.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/BackgroundNode.hpp index 24dd9c4675..51b9f9e787 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/BackgroundNode.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/BackgroundNode.hpp @@ -5,6 +5,10 @@ #include #include #include +#include + +#include +#include namespace score::gfx { @@ -21,7 +25,7 @@ struct BackgroundNode : OutputNode m_conf = {.manualRenderingRate = 1000. / settings_rate}; } - virtual ~BackgroundNode() { } + virtual ~BackgroundNode() { destroyOutput(); } void startRendering() override { } void render() override @@ -56,6 +60,12 @@ struct BackgroundNode : OutputNode void createOutput(score::gfx::OutputConfiguration conf) override { m_onResize = conf.onResize; + // Cache the requested graphics API so setSwapchainFormat can rebuild + // through createOutput when the format actually changes (live HDR↔SDR + // toggle). Without this the format setter was inert: m_swapchainFormat + // was updated but the underlying QRhiTexture stayed in its original + // format, silently downgrading HDR to SDR. + m_lastGraphicsApi = conf.graphicsApi; QSize newSz = m_renderSize; if(newSz.width() <= 0 || newSz.height() <= 0) @@ -64,22 +74,38 @@ struct BackgroundNode : OutputNode newSz = QSize{1024, 1024}; m_renderState = score::gfx::createRenderState(conf.graphicsApi, newSz, nullptr); + if(!m_renderState || !m_renderState->rhi) + { + qWarning() << "BackgroundNode: failed to create QRhi"; + m_renderState.reset(); + return; + } m_renderState->outputSize = m_renderState->renderSize; + m_renderState->renderFormat + = (m_swapchainFormat != Gfx::SwapchainFormat::SDR) + ? QRhiTexture::RGBA32F + : QRhiTexture::RGBA8; auto rhi = m_renderState->rhi; m_texture = rhi->newTexture( - QRhiTexture::RGBA8, m_renderState->renderSize, 1, + m_renderState->renderFormat, m_renderState->renderSize, 1, QRhiTexture::RenderTarget | QRhiTexture::UsedAsTransferSource); m_texture->create(); - m_depthBuffer = rhi->newRenderBuffer( - QRhiRenderBuffer::DepthStencil, m_renderState->renderSize, 1); - m_depthBuffer->create(); + // Reverse-Z project rule: depth attachment is D32F (float). Fixed-point + // D24 combined with reverse-Z gives strictly worse precision than + // standard-Z, so we must allocate a float texture here. RenderTarget + // flag is required for attaching as a depth target. + m_depthTexture = rhi->newTexture( + QRhiTexture::D32F, m_renderState->renderSize, 1, + QRhiTexture::RenderTarget); + m_depthTexture->setName("BackgroundNode::m_depthTexture"); + m_depthTexture->create(); QRhiTextureRenderTargetDescription desc; desc.setColorAttachments({QRhiColorAttachment(m_texture)}); - desc.setDepthStencilBuffer(m_depthBuffer); + desc.setDepthTexture(m_depthTexture); m_renderTarget = rhi->newTextureRenderTarget(desc); m_renderState->renderPassDescriptor = m_renderTarget->newCompatibleRenderPassDescriptor(); @@ -93,11 +119,33 @@ struct BackgroundNode : OutputNode { if(m_renderState) { + // Drain the GPU before tearing resources down. Same rationale as + // ScreenNode::destroyOutput: when setSwapchainFormat invokes + // destroyOutput synchronously (C-16 / commit e2afe7874), an + // unfinished cbWrapper from a prior offscreen frame can still be + // referenced by ScenePreprocessor's per-frame copyBuffer + // (C-01 / commit fe146c8de). Recording into that CB after we've + // freed the rhi triggers VUID-vkCmdCopyBuffer-commandBuffer- + // recording and a device loss. Mirrors MultiWindowNode.cpp:1068. + if(m_renderState->rhi) + { + // Pre-condition: destroyOutput must not be called inside a + // frame. Mirrors ScreenNode::destroyOutput. + SCORE_ASSERT(!m_renderState->rhi->isRecordingFrame()); + m_renderState->rhi->finish(); + } + + // Persist-across-rebuild contract: the registry survives RL + // teardown, so we must release its QRhi resources here BEFORE + // RenderState::destroy() tears down the QRhi. destroyOwned() + // `delete`s the wrappers directly while the device is alive. + releaseRegistry(); + delete m_renderTarget; m_renderTarget = nullptr; - delete m_depthBuffer; - m_depthBuffer = nullptr; + delete m_depthTexture; + m_depthTexture = nullptr; delete m_texture; m_texture = nullptr; @@ -109,7 +157,39 @@ struct BackgroundNode : OutputNode m_renderState.reset(); } } - void updateGraphicsAPI(GraphicsApi) override { } + void updateGraphicsAPI(GraphicsApi api) override + { + if(!m_renderState) + return; + if(m_renderState->api != api) + destroyOutput(); + } + + void setSwapchainFormat(Gfx::SwapchainFormat format) + { + if(m_swapchainFormat == format) + return; + m_swapchainFormat = format; + + // Live format change while rendering: the existing m_texture was + // allocated at createOutput-time with the prior format. setFormat alone + // wouldn't re-allocate the GPU memory backing — only setPixelSize + + // recreate-via-resize does. Re-route through destroyOutput + + // createOutput so the renderTarget / RPD / depth tex / colour tex all + // come back in matching format. Skipped before any output exists + // (m_renderState null) — createOutput will pick up the new format + // naturally via m_swapchainFormat. + if(m_renderState) + { + score::gfx::OutputConfiguration conf; + conf.graphicsApi = m_lastGraphicsApi; + conf.onResize = m_onResize; + destroyOutput(); + createOutput(std::move(conf)); + if(m_onResize) + m_onResize(); + } + } void setSize(QSize newSz) { @@ -143,24 +223,38 @@ struct BackgroundNode : OutputNode auto rhi = m_renderState->rhi; + // Drain the GPU before destroying m_renderTarget / m_texture / + // m_depthTexture. Same anti-pattern that destroyOutput already + // avoids via FIX-A: the current frame's offscreen CB (or a + // queued one) may still reference these resources, and Qt's + // setPixelSize+create dance below does not internally drain. + // Without this, validation fires on the next vkCmd*-recording + // (-recording / -commandBuffer-recording / -in-use) and may + // device-loss. + rhi->finish(); + m_renderTarget->destroy(); m_texture->destroy(); m_texture->setPixelSize(newSz); m_texture->create(); - if(m_depthBuffer) - m_depthBuffer->destroy(); + if(m_depthTexture) + m_depthTexture->destroy(); else - m_depthBuffer = rhi->newRenderBuffer(QRhiRenderBuffer::DepthStencil, newSz); - m_depthBuffer->setPixelSize(newSz); - m_depthBuffer->create(); - - delete m_renderTarget; - delete m_renderState->renderPassDescriptor; + m_depthTexture = rhi->newTexture( + QRhiTexture::D32F, newSz, 1, QRhiTexture::RenderTarget); + m_depthTexture->setPixelSize(newSz); + m_depthTexture->create(); + + m_renderTarget->deleteLater(); + if(auto* rpd = m_renderState->renderPassDescriptor) + rpd->deleteLater(); + m_renderState->renderPassDescriptor = nullptr; + m_renderTarget = nullptr; QRhiTextureRenderTargetDescription desc; desc.setColorAttachments({QRhiColorAttachment(m_texture)}); - desc.setDepthStencilBuffer(m_depthBuffer); + desc.setDepthTexture(m_depthTexture); m_renderTarget = rhi->newTextureRenderTarget(desc); m_renderState->renderPassDescriptor = m_renderTarget->newCompatibleRenderPassDescriptor(); @@ -179,7 +273,8 @@ struct BackgroundNode : OutputNode score::gfx::TextureRenderTarget rt{ .texture = m_texture, .renderPass = m_renderState->renderPassDescriptor, - .renderTarget = m_renderTarget}; + .renderTarget = m_renderTarget, + .depthTexture = m_depthTexture}; return new Gfx::InvertYRenderer{ *this, rt, const_cast(*shared_readback)}; } @@ -193,12 +288,17 @@ struct BackgroundNode : OutputNode std::weak_ptr m_renderer{}; QRhiTexture* m_texture{}; - QRhiRenderBuffer* m_depthBuffer{}; + QRhiTexture* m_depthTexture{}; QRhiTextureRenderTarget* m_renderTarget{}; std::shared_ptr m_renderState{}; std::function m_onResize; QSize m_size{1024, 1024}; QSize m_renderSize{}; + Gfx::SwapchainFormat m_swapchainFormat{}; + // Cached graphics API from the last createOutput so setSwapchainFormat + // can route a live format change through destroyOutput + createOutput + // without having to re-derive it from the host. + GraphicsApi m_lastGraphicsApi{}; }; } diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/CameraMath.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/CameraMath.cpp new file mode 100644 index 0000000000..fe9fc89c5e --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/CameraMath.cpp @@ -0,0 +1,48 @@ +#include + +#include + +namespace score::gfx +{ + +void packCameraUBO( + CameraUBOData& out, const ossia::camera_component& cam, + const QMatrix4x4& worldTransform, QSize renderSize, float timeSeconds, + float aspectOverride) +{ + const QVector3D eye = worldTransform.column(3).toVector3D(); + + QMatrix4x4 view = worldTransform.inverted(); + + const float fovYDeg = cam.yfov * (180.f / float(M_PI)); + float aspect = aspectOverride; + if(aspect <= 0.f) + { + aspect = (renderSize.height() > 0) + ? (float(renderSize.width()) / float(renderSize.height())) + : (cam.aspect_ratio > 0.f ? cam.aspect_ratio : 1.f); + } + + QMatrix4x4 proj; + setReverseZPerspective(proj, fovYDeg, aspect, cam.znear, cam.zfar); + + QMatrix4x4 vp = proj * view; + + writeMat4(out.view, view); + writeMat4(out.projection, proj); + writeMat4(out.viewProjection, vp); + out.cameraPosition[0] = eye.x(); + out.cameraPosition[1] = eye.y(); + out.cameraPosition[2] = eye.z(); + out.cameraPosition[3] = 0.f; + out.renderSize[0] = float(renderSize.width()); + out.renderSize[1] = float(renderSize.height()); + out.renderSize[2] = 0.f; + out.renderSize[3] = 0.f; + out.params[0] = timeSeconds; + out.params[1] = cam.znear; + out.params[2] = cam.zfar; + out.params[3] = 0.f; +} + +} diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/CameraMath.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/CameraMath.hpp new file mode 100644 index 0000000000..5196c94107 --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/CameraMath.hpp @@ -0,0 +1,82 @@ +#pragma once +#include +#include +#include + +#include +#include +#include + +namespace ossia +{ +struct camera_component; +} + +namespace score::gfx +{ + +// std140 layout; must byte-for-byte match every shader's `uniform camera_t`. +// Packed into ScenePreprocessor's per-camera Camera UBO aux buffer (attached +// to Geometry Out and auto-bound in consuming shaders by name). +struct CameraUBOData +{ + float view[16]{}; + float projection[16]{}; + float viewProjection[16]{}; + float cameraPosition[4]{}; + float renderSize[4]{}; + float params[4]{}; +}; +static_assert(sizeof(CameraUBOData) == 240, "CameraUBO layout must match shader"); + +inline void writeMat4(float dst[16], const QMatrix4x4& src) +{ + std::memcpy(dst, src.constData(), 16 * sizeof(float)); +} + +// Reverse-Z perspective projection in OpenGL NDC convention. +// +// Standard OpenGL perspective: view_z ∈ [-far, -near] → NDC z ∈ [-1, +1]. +// Reverse-Z (this function): view_z ∈ [-far, -near] → NDC z ∈ [-1, +1] +// but INVERTED: near → +1, far → -1. +// +// QRhi's clipSpaceCorrMatrix on Vulkan/Metal/D3D remaps the output NDC z ∈ +// [-1, +1] down to the backend-native [0, 1] without further flipping: +// near → 1.0, far → 0.0 in the depth buffer. +// +// This is paired project-wide with a float (D32F) depth attachment, a +// GREATER depth compare and a clear-depth of 0.0. Mixing conventions on a +// single depth buffer produces garbage. +inline void setReverseZPerspective( + QMatrix4x4& out, float fovYDeg, float aspect, float nearPlane, + float farPlane) +{ + out.setToIdentity(); + if(nearPlane == farPlane || aspect == 0.f) + return; + + const float radians = (fovYDeg * 0.5f) * float(M_PI / 180.0); + const float sine = std::sin(radians); + if(sine == 0.f) + return; + const float cotan = std::cos(radians) / sine; + const float clip = farPlane - nearPlane; + + out(0, 0) = cotan / aspect; + out(1, 1) = cotan; + out(2, 2) = (farPlane + nearPlane) / clip; + out(2, 3) = (2.f * farPlane * nearPlane) / clip; + out(3, 2) = -1.f; + out(3, 3) = 0.f; +} + +// Pack a camera_component's view/projection/position into a CameraUBOData. +// `worldTransform` is the camera node's accumulated world matrix (its +// column 3 is the eye position and its inverse is the view matrix). +// `aspectOverride` of <= 0 falls back to `renderSize.width / renderSize.height`. +void packCameraUBO( + CameraUBOData& out, const ossia::camera_component& cam, + const QMatrix4x4& worldTransform, QSize renderSize, float timeSeconds, + float aspectOverride = -1.f); + +} diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/CommonUBOs.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/CommonUBOs.hpp index 45b121e877..420e464f7d 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/CommonUBOs.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/CommonUBOs.hpp @@ -20,6 +20,15 @@ struct ProcessUBO float renderSize[2]{2048, 2048}; float date[4]{0.f, 0.f, 0.f, 0.f}; + + // Mirrors gl_NumWorkGroups for CSF compute shaders. Populated by + // RenderedCSFNode just before dispatch so the libisf-injected + // `#define gl_NumWorkGroups isf_process_uniforms.NUMWORKGROUPS_` + // resolves to real dispatch counts on every backend (especially D3D + // where SPIRV-Cross refuses to emit the built-in directly). + // std140 packs uvec3 into a vec4 slot — the trailing word is padding. + uint32_t numWorkgroups[3]{}; + uint32_t _numWorkgroups_pad{}; }; /** @@ -40,12 +49,18 @@ struct ModelCameraUBO float projection[16]{}; float modelNormal[9]{}; float padding[3]; // Needed as a mat3 needs a bit more space... - float fov = 90.; + float fov = 90.f; + // NB: must NOT be named `near`/`far` — those are legacy macros defined by + // ; naming members after them forces an #undef that then breaks + // any Windows system header (mmeapi.h, combaseapi.h) included afterwards. + float znear = 0.001f; //!< Used by non-matrix projections (fulldome, …) for reverse-Z depth. + float zfar = 10000.f; //!< idem. // clang-format on }; static_assert( - sizeof(ModelCameraUBO) == sizeof(float) * (16 + 16 + 16 + 16 + 16 + 9 + 3 + 1)); + sizeof(ModelCameraUBO) + == sizeof(float) * (16 + 16 + 16 + 16 + 16 + 9 + 3 + 1 + 1 + 1)); /** * @brief UBO shared across all entities shown on the same output. @@ -55,6 +70,13 @@ struct OutputUBO float clipSpaceCorrMatrix[16]{}; float renderSize[2]{}; + + // MSAA sample count of the bound output target. Mirrors + // RenderList::samples(); shaders need it because gl_NumSamples is + // stripped by glslang under SPIR-V. The trailing pad keeps the UBO + // aligned to a vec4 boundary (std140-friendly). + int32_t sampleCount{1}; + int32_t _pad0{0}; }; /** diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/CustomMesh.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/CustomMesh.cpp index cfb926a829..53439f861c 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/CustomMesh.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/CustomMesh.cpp @@ -2,10 +2,25 @@ #include #include +#include + +#include + // TODO: extend MeshBufs to hold multiple buffers // TODO: check that rendering e.g. sponza still works namespace score::gfx{ +// [BUFTRACE] implementation — see CustomMesh.hpp. Turn off at runtime +// by setting SCORE_BUFTRACE=0. +bool buftrace_enabled() +{ + static const bool on = [] { + const char* v = std::getenv("SCORE_BUFTRACE"); + return !v || v[0] != '0'; + }(); + return on; +} + CustomMesh::CustomMesh(const ossia::mesh_list &g, const ossia::geometry_filter_list_ptr &f) { reload(g, f); @@ -19,7 +34,9 @@ QRhiBuffer *CustomMesh::init_vbo(const ossia::geometry::cpu_buffer &buf, QRhi &r QRhiBuffer::Static, QRhiBuffer::StorageBuffer | QRhiBuffer::VertexBuffer, vtx_buf_size); mesh_buf->setName( - QString("Mesh::vtx_buf.%1").arg(idx.load(std::memory_order_relaxed)).toLatin1()); + QString("Mesh::vtx_buf.%1") + .arg(idx.fetch_add(1, std::memory_order_relaxed)) + .toLatin1()); mesh_buf->create(); return mesh_buf; @@ -32,11 +49,15 @@ QRhiBuffer *CustomMesh::init_vbo(const ossia::geometry::gpu_buffer &buf, QRhi &r QRhiBuffer *CustomMesh::init_index(const ossia::geometry::cpu_buffer &buf, QRhi &rhi) const noexcept { + static std::atomic_int idx = 0; QRhiBuffer* idx_buf{}; if(const auto idx_buf_size = buf.byte_size; idx_buf_size > 0) { idx_buf = rhi.newBuffer(QRhiBuffer::Static, QRhiBuffer::IndexBuffer, idx_buf_size); - idx_buf->setName("Mesh::idx_buf"); + idx_buf->setName( + QString("Mesh::idx_buf.%1") + .arg(idx.fetch_add(1, std::memory_order_relaxed)) + .toLatin1()); idx_buf->create(); } @@ -54,132 +75,232 @@ MeshBuffers CustomMesh::init(QRhi &rhi) const noexcept { return {}; } - if(geom.meshes[0].buffers.empty()) - { - return {}; - } MeshBuffers ret; - // FIXME multi-mesh - auto& mesh = geom.meshes[0]; - - // 1. Null check - bool any_is_null = false; - for(const auto& buf : mesh.buffers) - { - any_is_null |= ossia::visit([&](Buffer& buf) { - if constexpr(std::is_same_v) - { - return buf.byte_size == 0 || buf.data == nullptr; - } - else if constexpr(std::is_same_v) - { - return buf.handle == nullptr; - } - return false; - }, buf.data); - } - if(any_is_null) + // Multi-mesh: concatenate every mesh's buffers into ret.buffers in order. + // Each sub-mesh's local `input[].buffer` / `index.buffer` indices are + // remapped at draw time by adding the sub-mesh's starting offset in + // ret.buffers. The first sub-mesh's layout drives the pipeline + // (vertex bindings / attributes) in reload() — sub-meshes with a + // different layout are not supported today and will draw incorrectly. + for(std::size_t mi = 0; mi < geom.meshes.size(); ++mi) { - return {}; - } - - int i = 0; - int index_i = mesh.index.buffer; + const auto& mesh = geom.meshes[mi]; + if(mesh.buffers.empty()) + continue; - for(const auto& buf : mesh.buffers) - { - if(i != index_i) + // Null check — skip a sub-mesh whose data isn't ready yet. + bool any_is_null = false; + for(const auto& buf : mesh.buffers) { - auto rhi_buf - = ossia::visit([&](auto& buf) { return init_vbo(buf, rhi); }, buf.data); - ret.buffers.emplace_back(rhi_buf, 0, 0); + any_is_null |= ossia::visit([&](Buffer& buf) { + if constexpr(std::is_same_v) + return buf.byte_size == 0 || buf.data == nullptr; + else if constexpr(std::is_same_v) + return buf.handle == nullptr; + return false; + }, buf.data); } - else + if(any_is_null) + { + // Emit null placeholders so indexing stays aligned with geom.meshes. + for(std::size_t k = 0; k < mesh.buffers.size(); ++k) + ret.buffers.emplace_back(nullptr, 0, 0); + continue; + } + + int i = 0; + const int index_i = mesh.index.buffer; + for(const auto& buf : mesh.buffers) { - auto rhi_buf - = ossia::visit([&](auto& buf) { return init_index(buf, rhi); }, buf.data); - ret.buffers.emplace_back(rhi_buf, 0, 0); + QRhiBuffer* rhi_buf = (i != index_i) + ? ossia::visit([&](auto& b) { return init_vbo(b, rhi); }, buf.data) + : ossia::visit([&](auto& b) { return init_index(b, rhi); }, buf.data); + // Ownership follows the source variant: cpu_buffer paths allocate + // fresh QRhiBuffers (owned), gpu_buffer paths borrow an upstream + // handle (unowned — the original producer still owns it). + const bool owned = ossia::visit( + [](const Buffer&) { + return std::is_same_v; + }, buf.data); + BufferView bv{}; + bv.handle = rhi_buf; + bv.owned = owned; + ret.buffers.emplace_back(bv); + i++; } - i++; } -#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0) - // Populate indirect draw buffer from geometry's indirect_count - if(mesh.indirect_count.handle) + if(ret.buffers.empty()) + return {}; + + // Indirect draw / cpu_draw_commands: only meaningful when a single output + // mesh carries them (ScenePreprocessor's MDI mode). Pick them up from mesh[0]. + const auto& first_mesh = geom.meshes[0]; + if(first_mesh.indirect_count.handle) { - ret.indirectDrawBuffer = static_cast(mesh.indirect_count.handle); + ret.indirectDrawBuffer = static_cast(first_mesh.indirect_count.handle); ret.useIndirectDraw = true; - ret.indirectDrawIndexed = (mesh.index.buffer >= 0); + ret.indirectDrawIndexed = (first_mesh.index.buffer >= 0); + ret.indirectDrawCount + = first_mesh.indirect_count.byte_size / (5 * sizeof(uint32_t)); + ret.indirectDrawStride = 5 * sizeof(uint32_t); + if(ret.indirectDrawCount == 0) + ret.indirectDrawCount = 1; } -#endif + if(!first_mesh.cpu_draw_commands.empty()) + ret.cpuDrawCommands.assign( + first_mesh.cpu_draw_commands.begin(), first_mesh.cpu_draw_commands.end()); return ret; } void CustomMesh::update_vbo( int buffer_index, const ossia::geometry::cpu_buffer& vtx_buf, MeshBuffers& meshbuf, - QRhiResourceUpdateBatch& rb) const noexcept + QRhi& rhi, QRhiResourceUpdateBatch& rb) const noexcept { if(meshbuf.buffers.size() <= buffer_index) return; - auto buffer = meshbuf.buffers[buffer_index].handle; // FIXME use offset here? - if(auto sz = vtx_buf.byte_size; sz != buffer->size()) + auto& slot = meshbuf.buffers[buffer_index]; + // Diag 009 — guard the cpu→over-unowned-slot UAF: the slot was last + // populated by an upstream gpu_buffer producer (owned=false). Calling + // setSize/create on the upstream's QRhiBuffer destroys the underlying + // VkBuffer through QRhi's deferred-release queue and bumps the + // generation, silently clobbering every downstream consumer of that + // upstream handle. Allocate a fresh owned buffer instead — leave the + // upstream wrapper untouched. + if(!slot.handle || !slot.owned) { - buffer->destroy(); - buffer->setSize(sz); - buffer->create(); + static std::atomic_int idx = 0; + auto* fresh = rhi.newBuffer( + QRhiBuffer::Static, QRhiBuffer::StorageBuffer | QRhiBuffer::VertexBuffer, + vtx_buf.byte_size); + fresh->setName( + QString("Mesh::vtx_buf.%1") + .arg(idx.fetch_add(1, std::memory_order_relaxed)) + .toLatin1()); + if(!fresh->create()) + { + qWarning() << "CustomMesh::update_vbo: fresh buffer->create() FAILED"; + delete fresh; + return; + } + BUFTRACE() << "update_vbo(cpu) mesh=" << (void*)this + << " slot=" << buffer_index + << " allocating fresh owned buffer (was " + << (slot.handle ? "unowned upstream" : "empty") << ")" + << " new=" << (void*)fresh + << " size=" << (qint64)vtx_buf.byte_size; + slot.handle = fresh; + slot.owned = true; + } + else if(auto sz = vtx_buf.byte_size; sz != slot.handle->size()) + { + qDebug() << "CustomMesh::update_vbo: resizing buffer from" + << slot.handle->size() << "to" << sz + << "buffer=" << (void*)slot.handle; + slot.handle->setSize(sz); + if(!slot.handle->create()) + qWarning() << "CustomMesh::update_vbo: buffer->create() FAILED after resize!"; } // FIXME support offset uploadStaticBufferWithStoredData( - &rb, buffer, 0, buffer->size(), (const char*)vtx_buf.raw_data.get()); + &rb, slot.handle, 0, slot.handle->size(), (const char*)vtx_buf.raw_data.get()); } void CustomMesh::update_vbo( int buffer_index, const ossia::geometry::gpu_buffer& vtx_buf, MeshBuffers& meshbuf, - QRhiResourceUpdateBatch& rb) const noexcept + QRhi& rhi, QRhiResourceUpdateBatch& rb) const noexcept { if(meshbuf.buffers.size() <= buffer_index) return; // FIXME offset, size ? - // FIXME check if memory of previous buffer gets freed? - meshbuf.buffers[buffer_index] = {static_cast(vtx_buf.handle), 0, 0}; + auto& slot = meshbuf.buffers[buffer_index]; + auto* old_buf = slot.handle; + auto* new_buf = static_cast(vtx_buf.handle); + if(old_buf != new_buf) + { + // Diag 009 — when the slot previously held an owned cpu-fed buffer, + // route it through deleteLater so QRhi's release queue tears it + // down (and any SRBs auto-rebind via m_id generation tracking on + // their next setShaderResources). Without this we leak both the + // QRhiBuffer wrapper and its underlying VkBuffer. + if(slot.owned && old_buf) + { + BUFTRACE() << "update_vbo(gpu) mesh=" << (void*)this + << " slot=" << buffer_index + << " deleteLater old owned=" << (void*)old_buf + << " new=" << (void*)new_buf + << " size=" << (qint64)vtx_buf.byte_size; + old_buf->deleteLater(); + } + else + { + BUFTRACE() << "update_vbo(gpu) mesh=" << (void*)this + << " slot=" << buffer_index + << " old(unowned)=" << (void*)old_buf + << " new=" << (void*)new_buf + << " size=" << (qint64)vtx_buf.byte_size; + } + } + // Replacement entry must carry owned=false: the handle belongs to the + // upstream gpu_buffer producer. Default-constructed BufferView has + // owned=true → RenderList::release would `delete` a borrowed handle. + BufferView bv{}; + bv.handle = new_buf; + bv.owned = false; + slot = bv; } void CustomMesh::update_index( int buffer_index, const ossia::geometry::cpu_buffer& idx_buf, MeshBuffers& meshbuf, - QRhiResourceUpdateBatch& rb) const noexcept + QRhi& rhi, QRhiResourceUpdateBatch& rb) const noexcept { if(meshbuf.buffers.size() <= buffer_index) return; + auto& slot = meshbuf.buffers[buffer_index]; void* idx_buf_data = nullptr; - auto buffer = meshbuf.buffers[buffer_index].handle; // FIXME use offset here? - if(buffer) + if(geom.meshes[0].buffers.size() > 1) { - if(geom.meshes[0].buffers.size() > 1) + if(const auto idx_buf_size = idx_buf.byte_size; idx_buf_size > 0) { - if(const auto idx_buf_size = idx_buf.byte_size; idx_buf_size > 0) + idx_buf_data = idx_buf.raw_data.get(); + // Diag 009 — same UAF guard as update_vbo(cpu): if the slot is + // empty or holds an upstream-owned (unowned) handle, do NOT + // setSize/create on it; allocate a fresh owned index buffer. + if(!slot.handle || !slot.owned) { - idx_buf_data = idx_buf.raw_data.get(); - // FIXME what if index disappears - if(auto sz = idx_buf.byte_size; sz != buffer->size()) - { - buffer->destroy(); - buffer->setSize(sz); - buffer->create(); - } - else + static std::atomic_int idx = 0; + auto* fresh = rhi.newBuffer( + QRhiBuffer::Static, QRhiBuffer::IndexBuffer, idx_buf_size); + fresh->setName( + QString("Mesh::idx_buf.%1") + .arg(idx.fetch_add(1, std::memory_order_relaxed)) + .toLatin1()); + if(!fresh->create()) { + qWarning() << "CustomMesh::update_index: fresh buffer->create() FAILED"; + delete fresh; + return; } + BUFTRACE() << "update_index(cpu) mesh=" << (void*)this + << " slot=" << buffer_index + << " allocating fresh owned index buffer (was " + << (slot.handle ? "unowned upstream" : "empty") << ")" + << " new=" << (void*)fresh + << " size=" << (qint64)idx_buf_size; + slot.handle = fresh; + slot.owned = true; + } + else if(auto sz = idx_buf.byte_size; sz != slot.handle->size()) + { + slot.handle->setSize(sz); + slot.handle->create(); } - } - else - { - // FIXME what if index appears } } else @@ -187,19 +308,49 @@ void CustomMesh::update_index( // FIXME what if index appears } - if(buffer && idx_buf_data) + if(slot.handle && idx_buf_data) { // FIXME support offset uploadStaticBufferWithStoredData( - &rb, buffer, 0, buffer->size(), (const char*)idx_buf_data); + &rb, slot.handle, 0, slot.handle->size(), (const char*)idx_buf_data); } } void CustomMesh::update_index( int buffer_index, const ossia::geometry::gpu_buffer& idx_buf, MeshBuffers& meshbuf, - QRhiResourceUpdateBatch& rb) const noexcept + QRhi& rhi, QRhiResourceUpdateBatch& rb) const noexcept { SCORE_ASSERT(meshbuf.buffers.size() > buffer_index); + auto& slot = meshbuf.buffers[buffer_index]; + auto* old_buf = slot.handle; + auto* new_buf = static_cast(idx_buf.handle); + if(old_buf != new_buf) + { + // Diag 009 — leak-fix: route a previously-owned handle through + // QRhi's release queue so we don't drop the wrapper on the floor + // when transitioning cpu→gpu on this slot. + if(slot.owned && old_buf) + { + BUFTRACE() << "update_index(gpu) mesh=" << (void*)this + << " slot=" << buffer_index + << " deleteLater old owned=" << (void*)old_buf + << " new=" << (void*)new_buf + << " size=" << (qint64)idx_buf.byte_size; + old_buf->deleteLater(); + } + else + { + BUFTRACE() << "update_index(gpu) mesh=" << (void*)this + << " slot=" << buffer_index + << " old(unowned)=" << (void*)old_buf + << " new=" << (void*)new_buf + << " size=" << (qint64)idx_buf.byte_size; + } + BufferView bv{}; + bv.handle = new_buf; + bv.owned = false; + slot = bv; + } } void CustomMesh::update( @@ -208,47 +359,87 @@ void CustomMesh::update( if(geom.meshes.empty()) return; - // FIXME multi-mesh - auto& input_mesh = geom.meshes[0]; - if(input_mesh.buffers.empty()) + // Grow output_meshbuf.buffers when the geometry has added more + // buffers than mb has slots for (e.g. a model swap from Box.gltf → + // Duck.gltf where Duck has more vertex buffers, or + // ScenePreprocessor appending instance + scene-aux entries beyond + // the existing slot count). Without this, update_vbo's + // `if(meshbuf.buffers.size() <= buffer_index) return;` silently + // drops writes for new high-index buffers, stale handles persist, + // and the next setVertexInput binds them as vertex inputs — + // validation flags `pBuffers[N] is INDEX_BUFFER / STORAGE_BUFFER, + // requires VERTEX_BUFFER`. + // + // We *grow* rather than re-init: re-initialising forces init() + // through its any-buffer-null bail-out (which emits null placeholders + // for the WHOLE sub-mesh whenever any single buffer is null), which + // breaks scenes where a conditional aux buffer transiently goes + // null. Growing preserves the live handles already bound to + // populated slots; new slots get null placeholders and the + // update_vbo / update_index loop below fills them in. + // + // Shrinking is intentionally not done: extra trailing slots beyond + // what g.input / g.index reference are harmless (the draw path + // never indexes into them), and shrinking would require explicit + // release of the truncated owned buffers. + std::size_t total_geom_buffers = 0; + for(const auto& m : geom.meshes) + total_geom_buffers += m.buffers.size(); + if(output_meshbuf.buffers.size() < total_geom_buffers) { - return; + BUFTRACE() << "CustomMesh::update: growing MeshBuffers from " + << (qsizetype)output_meshbuf.buffers.size() + << " to " << (qsizetype)total_geom_buffers + << " slots (preserving existing handles)"; + output_meshbuf.buffers.resize( + total_geom_buffers, BufferView{nullptr, 0, 0}); } + if(output_meshbuf.buffers.empty()) - { output_meshbuf = init(rhi); - } if(output_meshbuf.buffers.empty()) - { return; - } - int i = 0; - int index_i = input_mesh.index.buffer; - - for(const auto& buf : input_mesh.buffers) + // Upload each sub-mesh's buffers, remapping local indices to the flat + // offset in output_meshbuf.buffers built by init(). + std::size_t base = 0; + for(const auto& input_mesh : geom.meshes) { - if(i != index_i) - { - ossia::visit( - [&](auto& buf) { return update_vbo(i, buf, output_meshbuf, rb); }, buf.data); - } - else + if(input_mesh.buffers.empty()) + continue; + if(base + input_mesh.buffers.size() > output_meshbuf.buffers.size()) + break; + + int i = 0; + const int index_i = input_mesh.index.buffer; + for(const auto& buf : input_mesh.buffers) { - ossia::visit( - [&](auto& buf) { return update_index(i, buf, output_meshbuf, rb); }, buf.data); + const int flat = int(base) + i; + if(i != index_i) + { + ossia::visit( + [&](auto& buf) { return update_vbo(flat, buf, output_meshbuf, rhi, rb); }, + buf.data); + } + else + { + ossia::visit( + [&](auto& buf) { return update_index(flat, buf, output_meshbuf, rhi, rb); }, + buf.data); + } + i++; } - i++; + base += input_mesh.buffers.size(); } -#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0) - // Update indirect draw buffer reference - if(input_mesh.indirect_count.handle) + // Indirect draw / cpu_draw_commands: same single-mesh scoping as init(). + const auto& first_mesh = geom.meshes[0]; + if(first_mesh.indirect_count.handle) { output_meshbuf.indirectDrawBuffer - = static_cast(input_mesh.indirect_count.handle); + = static_cast(first_mesh.indirect_count.handle); output_meshbuf.useIndirectDraw = true; - output_meshbuf.indirectDrawIndexed = (input_mesh.index.buffer >= 0); + output_meshbuf.indirectDrawIndexed = (first_mesh.index.buffer >= 0); } else { @@ -256,7 +447,16 @@ void CustomMesh::update( output_meshbuf.useIndirectDraw = false; output_meshbuf.indirectDrawIndexed = false; } -#endif + + if(!first_mesh.cpu_draw_commands.empty()) + { + output_meshbuf.cpuDrawCommands.assign( + first_mesh.cpu_draw_commands.begin(), first_mesh.cpu_draw_commands.end()); + } + + // Note: GPU readback for the indirect draw fallback is handled + // synchronously in RenderedRawRasterPipelineNode::runInitialPasses, + // which has access to both the command buffer and QRhi::finish(). } Mesh::Flags CustomMesh::flags() const noexcept @@ -306,6 +506,8 @@ void CustomMesh::preparePipeline(QRhiGraphicsPipeline &pip) const noexcept { pip.setDepthTest(true); pip.setDepthWrite(true); + // Reverse-Z project rule. + pip.setDepthOp(QRhiGraphicsPipeline::Greater); } pip.setTopology(this->topology); @@ -321,6 +523,11 @@ void CustomMesh::preparePipeline(QRhiGraphicsPipeline &pip) const noexcept void CustomMesh::reload(const ossia::mesh_list &ml, const ossia::geometry_filter_list_ptr &f) { + BUFTRACE() << "CustomMesh::reload mesh=" << (void*)this + << " meshes=" << (qsizetype)ml.meshes.size() + << " first_buf_count=" + << (ml.meshes.empty() ? (qsizetype)-1 + : (qsizetype)ml.meshes[0].buffers.size()); this->geom = ml; this->filters = f; @@ -368,59 +575,174 @@ void CustomMesh::reload(const ossia::mesh_list &ml, const ossia::geometry_filter frontFace = (QRhiGraphicsPipeline::FrontFace)g.front_face; } -void CustomMesh::draw(const MeshBuffers &bufs, QRhiCommandBuffer &cb) const noexcept +bool CustomMesh::drawSingleMesh( + std::size_t mesh_index, std::size_t base, const MeshBuffers& bufs, + QRhiCommandBuffer& cb, + std::span fallback_slots) const noexcept { - for(auto& g : this->geom.meshes) + if(mesh_index >= geom.meshes.size()) + return false; + const auto& g = geom.meshes[mesh_index]; + + // Total vertex-input count = mesh bindings + fallback bindings. The + // fallback slots' binding_index values were allocated sequentially + // past the mesh's own bindings when the pipeline was built + // (remapPipelineVertexInputs); they land at indices sz, sz+1, ... here. + const auto mesh_input_count = g.input.size(); + const auto total = mesh_input_count + fallback_slots.size(); + QVarLengthArray draw_inputs(total); + + int i = 0; + for(auto& in : g.input) { - const auto sz = g.input.size(); + const std::size_t flat = base + (std::size_t)in.buffer; + if(flat >= bufs.buffers.size()) + return false; + auto buf = bufs.buffers[flat].handle; + if(!buf) + return false; + draw_inputs[i++] = {buf, in.byte_offset}; + } - QVarLengthArray draw_inputs(sz); + // Fallback slots. Each Slot::binding_index is expressed in the global + // binding-index space; for a single-sub-mesh raw-raster draw it's + // always `mesh_input_count + k` for the k'th slot, so we place the + // buffers by index. + for(const auto& slot : fallback_slots) + { + const std::size_t idx = (std::size_t)slot.binding_index; + if(idx >= total || !slot.buffer) + continue; // defensive: skip malformed plans rather than dropping the draw + draw_inputs[idx] = {slot.buffer, 0}; + } - int i = 0; - for(auto& in : g.input) - { - // FIXME buffer offset? input offset? - if(bufs.buffers.size() <= in.buffer) - return; - - auto buf = bufs.buffers[in.buffer].handle; - if(!buf) - return; - draw_inputs[i++] = {buf, in.byte_offset}; - } + if(g.index.buffer >= 0) + { + const std::size_t flat_idx = base + (std::size_t)g.index.buffer; + if(flat_idx >= bufs.buffers.size()) + return false; + auto buf = bufs.buffers[flat_idx].handle; + const auto idxFmt = g.index.format == decltype(g.index)::uint16 + ? QRhiCommandBuffer::IndexUInt16 + : QRhiCommandBuffer::IndexUInt32; + // If this bind crashes with a dangling buffer, the `buf` pointer + // logged here will match ASan's freed-at report. The mesh= and + // slot= fields tell us which CustomMesh and which MeshBuffers + // entry retained it. + BUFTRACE() << "bindIndexBuffer mesh=" << (void*)this + << " sub=" << mesh_index << " slot=" << flat_idx + << " buf=" << (void*)buf + << " offset=" << (qint64)g.index.byte_offset + << " bufs.size=" << (qsizetype)bufs.buffers.size(); + cb.setVertexInput( + 0, (int)total, draw_inputs.data(), buf, g.index.byte_offset, idxFmt); + } + else + { + cb.setVertexInput(0, (int)total, draw_inputs.data()); + } - if(g.index.buffer >= 0) - { - auto buf = bufs.buffers[g.index.buffer].handle; - const auto idxFmt = g.index.format == decltype(g.index)::uint16 - ? QRhiCommandBuffer::IndexUInt16 - : QRhiCommandBuffer::IndexUInt32; - cb.setVertexInput(0, sz, draw_inputs.data(), buf, g.index.byte_offset, idxFmt); - } - else - { - cb.setVertexInput(0, sz, draw_inputs.data()); - } + // Per-mesh indirect override: when THIS submesh carries its own + // `indirect_count` handle (different from bufs.indirectDrawBuffer), + // use it instead. Required for multi-batch MDI (opaque + transparent + // split emitted by ScenePreprocessor) where each sub-mesh drives a + // separate indirect-cmd list. Same rule for `cpu_draw_commands`. + QRhiBuffer* effIndirectBuf = bufs.indirectDrawBuffer; + quint32 effIndirectCount = bufs.indirectDrawCount; + const auto* effCpuCmds = &bufs.cpuDrawCommands; + std::decay_t perMeshCmds; + if(auto* h = static_cast(g.indirect_count.handle)) + { + effIndirectBuf = h; + effIndirectCount + = (quint32)(g.indirect_count.byte_size / (5 * sizeof(uint32_t))); + if(effIndirectCount == 0) + effIndirectCount = 1; + } + if(!g.cpu_draw_commands.empty()) + { + perMeshCmds.assign(g.cpu_draw_commands.begin(), g.cpu_draw_commands.end()); + effCpuCmds = &perMeshCmds; + } + // Multi-draw indirect: runtime capability check, not compile-time. + // Only meaningful for single-sub-mesh MDI-mode geometries. + if(bufs.useIndirectDraw && effIndirectBuf) + { #if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0) - if(bufs.useIndirectDraw && bufs.indirectDrawBuffer) + if(bufs.gpuIndirectSupported) { if(bufs.indirectDrawIndexed) - cb.drawIndexedIndirect(bufs.indirectDrawBuffer, 0, 1); + cb.drawIndexedIndirect( + effIndirectBuf, bufs.indirectDrawOffset, + effIndirectCount, bufs.indirectDrawStride); else - cb.drawIndirect(bufs.indirectDrawBuffer, 0, 1); - continue; + cb.drawIndirect( + effIndirectBuf, bufs.indirectDrawOffset, + effIndirectCount, bufs.indirectDrawStride); + return true; } #endif - if(g.index.buffer > -1) - { - cb.drawIndexed(g.indices, g.instances); - } - else + // CPU fallback: iterate draw commands with correct firstInstance / + // baseVertex so each sub-draw gets its own per-draw data via + // gl_BaseInstance. Commands come from either the producer + // (ScenePreprocessor) or GPU readback (CSF). + if(!effCpuCmds->empty()) { - cb.draw(g.vertices, g.instances); + const bool indexed = (g.index.buffer >= 0); + for(const auto& cmd : *effCpuCmds) + { + if(indexed) + cb.drawIndexed( + cmd.index_or_vertex_count, cmd.instance_count, + cmd.first_index_or_vertex, cmd.base_vertex, cmd.first_instance); + else + cb.draw( + cmd.index_or_vertex_count, cmd.instance_count, + cmd.first_index_or_vertex, cmd.first_instance); + } + return true; } + // No CPU commands yet (readback pending or first frame) — skip. + return false; + } + + if(g.index.buffer > -1) + cb.drawIndexed(g.indices, g.instances); + else + cb.draw(g.vertices, g.instances); + return true; +} + +void CustomMesh::draw(const MeshBuffers &bufs, QRhiCommandBuffer &cb) const noexcept +{ + // Default draw path: iterate sub-meshes without any per-mesh state swap. + // Works for single-mesh geometries and for MDI mode (one sub-mesh with an + // indirect buffer). For multi-sub-mesh + per-mesh SRB auxes (classic + // per-mesh ScenePreprocessor output), the caller should instead iterate + // drawSingleMesh() itself and rebind the SRB between sub-meshes. + std::size_t base = 0; + for(std::size_t i = 0; i < geom.meshes.size(); ++i) + { + drawSingleMesh(i, base, bufs, cb); + base += geom.meshes[i].buffers.size(); + } +} + +void CustomMesh::drawWithFallbackBindings( + const MeshBuffers& bufs, QRhiCommandBuffer& cb, + std::span fallback_slots) const noexcept +{ + // Same as draw() but with the caller's fallback-binding plan threaded + // down to drawSingleMesh so the extra PerInstance identity buffers + // land in the vertex-input array at the indices the pipeline + // allocated for them. + std::size_t base = 0; + for(std::size_t i = 0; i < geom.meshes.size(); ++i) + { + drawSingleMesh(i, base, bufs, cb, fallback_slots); + base += geom.meshes[i].buffers.size(); } } diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/CustomMesh.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/CustomMesh.hpp index 5f8e977839..4dcfc7de72 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/CustomMesh.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/CustomMesh.hpp @@ -1,13 +1,25 @@ #pragma once #include +#include #include +#include #include +#include + namespace score::gfx { +// [BUFTRACE] — diagnostic logging around QRhiBuffer lifetime during +// live graph edits (defined in CustomMesh.cpp). Exposed so other TUs +// (RenderList, ScenePreprocessorNode, RenderedRawRasterPipelineNode) can +// use BUFTRACE() with the same env-var gating. +SCORE_PLUGIN_GFX_EXPORT bool buftrace_enabled(); +#define BUFTRACE() if(::score::gfx::buftrace_enabled()) qDebug().nospace() << "[BUFTRACE] " + + class CustomMesh : public score::gfx::Mesh { ossia::mesh_list geom; @@ -47,19 +59,19 @@ class CustomMesh : public score::gfx::Mesh void update_vbo( int buffer_index, const ossia::geometry::cpu_buffer& vtx_buf, MeshBuffers& meshbuf, - QRhiResourceUpdateBatch& rb) const noexcept; + QRhi& rhi, QRhiResourceUpdateBatch& rb) const noexcept; void update_vbo( int buffer_index, const ossia::geometry::gpu_buffer& vtx_buf, MeshBuffers& meshbuf, - QRhiResourceUpdateBatch& rb) const noexcept; + QRhi& rhi, QRhiResourceUpdateBatch& rb) const noexcept; void update_index( int buffer_index, const ossia::geometry::cpu_buffer& idx_buf, MeshBuffers& meshbuf, - QRhiResourceUpdateBatch& rb) const noexcept; + QRhi& rhi, QRhiResourceUpdateBatch& rb) const noexcept; void update_index( int buffer_index, const ossia::geometry::gpu_buffer& idx_buf, MeshBuffers& meshbuf, - QRhiResourceUpdateBatch& rb) const noexcept; + QRhi& rhi, QRhiResourceUpdateBatch& rb) const noexcept; void update(QRhi& rhi, MeshBuffers& output_meshbuf, QRhiResourceUpdateBatch& rb) const noexcept override; Flags flags() const noexcept override; @@ -72,6 +84,33 @@ class CustomMesh : public score::gfx::Mesh void draw(const MeshBuffers& bufs, QRhiCommandBuffer& cb) const noexcept override; + // Fallback-aware variant: appends each `FallbackBindingPlan::Slot` + // buffer to the vertex-input array before issuing the draw. Used by + // raw-raster pipelines whose shaders declared "REQUIRED: false" + // VERTEX_INPUTS the upstream geometry doesn't provide. Non-virtual on + // purpose — only CustomMesh participates in the fallback path. + void drawWithFallbackBindings( + const MeshBuffers& bufs, QRhiCommandBuffer& cb, + std::span fallback_slots) const noexcept; + + // Draw a single sub-mesh (geom.meshes[mesh_index]) using the portion of + // `bufs.buffers` starting at `buffer_offset`. `buffer_offset` must match + // init()'s flat-concat layout: sum of geom.meshes[0..mesh_index-1].buffers.size(). + // Returns true if a draw call was issued. + // + // Exposed so consumers that need per-sub-mesh state (e.g. RawRaster + // swapping the per_draw SSBO between meshes) can iterate sub-meshes + // themselves instead of invoking the fire-and-forget `draw()` above. + // + // `fallback_slots` (default empty) is merged into the vertex-input + // array at each slot's binding_index — bindings appended by the + // fallback-aware remap land past the mesh's own bindings, so slot + // indices are always contiguous after geom.meshes[mesh_index].input. + bool drawSingleMesh( + std::size_t mesh_index, std::size_t buffer_offset, + const MeshBuffers& bufs, QRhiCommandBuffer& cb, + std::span fallback_slots = {}) const noexcept; + const char* defaultVertexShader() const noexcept override; const ossia::geometry* semanticGeometry() const noexcept override diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/DepthNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/DepthNode.cpp deleted file mode 100644 index 78277bae17..0000000000 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/DepthNode.cpp +++ /dev/null @@ -1,506 +0,0 @@ -#include "depthnode.hpp" - -#include - -DepthNode::DepthNode(const QShader& compute) -{ - m_computeS = compute; - - input.push_back(new Port{this, {}, Types::Image, {}}); - output.push_back(new Port{this, {}, Types::Image, {}}); -} - -struct RenderedDepthNode : score::gfx::NodeRenderer -{ - struct Pass - { - QRhiSampler* sampler{}; - TextureRenderTarget renderTarget; - Pipeline p; - QRhiBuffer* processUBO{}; - }; - std::vector m_passes; - - DepthNode& n; - - TextureRenderTarget m_lastPassRT; - - std::vector m_samplers; - - // Pipeline - Pipeline m_p; - - QRhiBuffer* m_meshBuffer{}; - QRhiBuffer* m_idxBuffer{}; - - QRhiBuffer* m_materialUBO{}; - int m_materialSize{}; - int64_t materialChangedIndex{-1}; - - RenderedDepthNode(const DepthNode& node) noexcept - : score::gfx::NodeRenderer{} - , n{const_cast(node)} - { - } - - std::optional renderTargetSize() const noexcept override { return {}; } - - TextureRenderTarget createRenderTarget(const RenderState& state) override - { - auto sz = state.size; - if(auto true_sz = renderTargetSize()) - { - sz = *true_sz; - } - - m_lastPassRT = score::gfx::createRenderTarget(state, sz); - return m_lastPassRT; - } - - QSize computeTextureSize(const isf::pass& pass) - { - QSize res = m_lastPassRT.renderTarget->pixelSize(); - - exprtk::symbol_table syms; - - syms.add_constant("var_WIDTH", res.width()); - syms.add_constant("var_HEIGHT", res.height()); - int port_k = 0; - for(const isf::input& input : n.m_descriptor.inputs) - { - auto port = n.input[port_k]; - if(ossia::get_if(&input.data)) - { - syms.add_constant("var_" + input.name, *(float*)port->value); - } - else - { - // TODO exprtk only handles the expression type... - } - - port_k++; - } - - if(auto expr = pass.width_expression; !expr.empty()) - { - boost::algorithm::replace_all(expr, "$", "var_"); - exprtk::expression e; - e.register_symbol_table(syms); - exprtk::parser parser; - bool ok = parser.compile(expr, e); - if(ok) - res.setWidth(e()); - else - qDebug() << parser.error().c_str() << expr.c_str(); - } - if(auto expr = pass.height_expression; !expr.empty()) - { - boost::algorithm::replace_all(expr, "$", "var_"); - exprtk::expression e; - e.register_symbol_table(syms); - exprtk::parser parser; - bool ok = parser.compile(expr, e); - if(ok) - res.setHeight(e()); - else - qDebug() << parser.error().c_str() << expr.c_str(); - } - - return res; - } - - int initShaderSamplers(Renderer& renderer) - { - QRhi& rhi = *renderer.state.rhi; - auto& input = n.input; - int cur_pos = 0; - for(auto in : input) - { - switch(in->type) - { - case Types::Empty: - break; - case Types::Int: - case Types::Float: - cur_pos += 4; - break; - case Types::Vec2: - cur_pos += 8; - if(cur_pos % 8 != 0) - cur_pos += 4; - break; - case Types::Vec3: - while(cur_pos % 16 != 0) - { - cur_pos += 4; - } - cur_pos += 12; - break; - case Types::Vec4: - while(cur_pos % 16 != 0) - { - cur_pos += 4; - } - cur_pos += 16; - break; - case Types::Image: { - auto sampler = rhi.newSampler( - QRhiSampler::Linear, QRhiSampler::Linear, QRhiSampler::None, - QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge); - SCORE_ASSERT(sampler->create()); - - auto texture = renderer.textureTargetForInputPort(*in); - m_samplers.push_back({sampler, texture}); - - if(cur_pos % 8 != 0) - cur_pos += 4; - - *(float*)(n.m_materialData.get() + cur_pos) = texture->pixelSize().width(); - *(float*)(n.m_materialData.get() + cur_pos + 4) - = texture->pixelSize().height(); - - cur_pos += 8; - break; - } - default: - break; - } - } - return cur_pos; - } - - void initAudioTextures(Renderer& renderer) - { - QRhi& rhi = *renderer.state.rhi; - for(auto& texture : n.audio_textures) - { - auto sampler = rhi.newSampler( - QRhiSampler::Linear, QRhiSampler::Linear, QRhiSampler::None, - QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge); - sampler->create(); - - m_samplers.push_back({sampler, renderer.m_emptyTexture}); - texture.samplers[&renderer] = {sampler, nullptr}; - } - } - - void initPassSamplers(Renderer& renderer, int& cur_pos) - { - QRhi& rhi = *renderer.state.rhi; - auto& model_passes = n.m_descriptor.passes; - for(int i = 0, N = model_passes.size(); i < N - 1; i++) - { - auto sampler = rhi.newSampler( - QRhiSampler::Linear, QRhiSampler::Linear, QRhiSampler::None, - QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge); - sampler->create(); - - const QSize texSize = computeTextureSize(model_passes[i]); - - const auto fmt - = (model_passes[i].float_storage) ? QRhiTexture::RGBA32F : QRhiTexture::RGBA8; - - auto tex = rhi.newTexture( - fmt, texSize, 1, QRhiTexture::Flag{QRhiTexture::RenderTarget}); - tex->create(); - - m_samplers.push_back({sampler, tex}); - - if(cur_pos % 8 != 0) - cur_pos += 4; - - *(float*)(n.m_materialData.get() + cur_pos) = texSize.width(); - *(float*)(n.m_materialData.get() + cur_pos + 4) = texSize.height(); - - cur_pos += 8; - } - } - - Pipeline - buildPassPipeline(Renderer& renderer, TextureRenderTarget tgt, QRhiBuffer* processUBO) - { - return score::gfx::buildPipeline( - renderer, n.mesh(), n.m_vertexS, n.m_fragmentS, tgt, processUBO, m_materialUBO, - m_samplers); - }; - - Pass createPass(Renderer& renderer, Sampler target) - { - QRhi& rhi = *renderer.state.rhi; - auto [sampler, tex] = target; - - auto rt = rhi.newTextureRenderTarget({tex}); - auto rp = rt->newCompatibleRenderPassDescriptor(); - SCORE_ASSERT(rp); - rt->setRenderPassDescriptor(rp); - SCORE_ASSERT(rt->create()); - - QRhiBuffer* pubo{}; - pubo = rhi.newBuffer( - QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, sizeof(ProcessUBO)); - pubo->create(); - - auto pip = buildPassPipeline(renderer, TextureRenderTarget{.texture = tex, .renderPass = rp, .renderTarget = rt}, pubo); - auto srb = pip.srb; - - // We have to replace the rendered-to texture by an empty one in each pass, - // as RHI does not support both reading and writing to a texture in the same pass. - { - QVarLengthArray bindings; - for(auto it = srb->cbeginBindings(); it != srb->cendBindings(); ++it) - { - bindings.push_back(*it); - - if(it->data()->type == QRhiShaderResourceBinding::SampledTexture) - { - if(it->data()->u.stex.texSamplers->tex == tex) - { - bindings.back().data()->u.stex.texSamplers->tex = renderer.m_emptyTexture; - } - } - } - srb->setBindings(bindings.begin(), bindings.end()); - srb->create(); - } - return Pass{sampler, {tex, rp, rt}, pip, pubo}; - } - - void init(Renderer& renderer) override - { - // init() - { - const auto& mesh = n.mesh(); - if(!m_meshBuffer) - { - auto [mbuffer, ibuffer] = renderer.initMeshBuffer(mesh); - m_meshBuffer = mbuffer; - m_idxBuffer = ibuffer; - } - } - - QRhi& rhi = *renderer.state.rhi; - - m_materialSize = n.m_materialSize; - if(m_materialSize > 0) - { - m_materialUBO = rhi.newBuffer( - QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, m_materialSize); - SCORE_ASSERT(m_materialUBO->create()); - } - - int cur_pos = initShaderSamplers(renderer); - - initAudioTextures(renderer); - - auto& model_passes = n.m_descriptor.passes; - if(!model_passes.empty()) - { - int first_pass_sampler_idx = std::ssize(m_samplers); - - // First create all the samplers / textures - initPassSamplers(renderer, cur_pos); - - // Then create the passes - for(int i = 0, N = model_passes.size(); i < N - 1; i++) - { - auto target = m_samplers[first_pass_sampler_idx + i]; - auto pass = createPass(renderer, target); - m_passes.push_back(pass); - } - } - - // Last pass is the main write - { - QRhiBuffer* pubo{}; - pubo = rhi.newBuffer( - QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, sizeof(ProcessUBO)); - pubo->create(); - - auto p = buildPassPipeline(renderer, m_lastPassRT, pubo); - m_passes.push_back(Pass{nullptr, m_lastPassRT, p, pubo}); - } - } - - void update(Renderer& renderer, QRhiResourceUpdateBatch& res) override - { - { - if(m_materialUBO && m_materialSize > 0 - && materialChangedIndex != n.materialChanged) - { - char* data = n.m_materialData.get(); - res.updateDynamicBuffer(m_materialUBO, 0, m_materialSize, data); - materialChangedIndex = n.materialChanged; - } - } - - QRhi& rhi = *renderer.state.rhi; - for(auto& audio : n.audio_textures) - { - bool textureChanged = false; - auto& [rhiSampler, rhiTexture] = audio.samplers[&renderer]; - const auto curSz = (rhiTexture) ? rhiTexture->pixelSize() : QSize{}; - int numSamples = curSz.width() * curSz.height(); - if(numSamples != audio.data.size()) - { - delete rhiTexture; - rhiTexture = nullptr; - textureChanged = true; - } - - if(!rhiTexture) - { - if(audio.channels > 0) - { - int samples = audio.data.size() / audio.channels; - rhiTexture = rhi.newTexture( - QRhiTexture::D32F, {samples, audio.channels}, 1, QRhiTexture::Flag{}); - rhiTexture->create(); - textureChanged = true; - } - else - { - rhiTexture = nullptr; - textureChanged = true; - } - } - - if(textureChanged) - { - score::gfx::replaceTexture( - *m_p.srb, rhiSampler, rhiTexture ? rhiTexture : renderer.m_emptyTexture); - } - - if(rhiTexture) - { - QRhiTextureSubresourceUploadDescription subdesc( - audio.data.data(), audio.data.size() * 4); - QRhiTextureUploadEntry entry{0, 0, subdesc}; - QRhiTextureUploadDescription desc{entry}; - res.uploadTexture(rhiTexture, desc); - } - } - - { - // Update all the process UBOs - for(int i = 0, N = m_passes.size(); i < N; i++) - { - n.standardUBO.passIndex = i; - res.updateDynamicBuffer( - m_passes[i].processUBO, 0, sizeof(ProcessUBO), &this->n.standardUBO); - } - } - } - - void releaseWithoutRenderTarget(Renderer& r) override - { - // customRelease - { - for(auto& texture : n.audio_textures) - { - auto it = texture.samplers.find(&r); - if(it != texture.samplers.end()) - { - if(auto tex = it->second.second) - { - if(tex != r.m_emptyTexture) - tex->deleteLater(); - } - } - } - - for(auto& pass : m_passes) - { - // TODO do we also want to remove the last pass texture here ?! - pass.p.release(); - pass.renderTarget.release(); - pass.processUBO->deleteLater(); - } - - m_passes.clear(); - } - - for(auto sampler : m_samplers) - { - delete sampler.sampler; - // texture isdeleted elsewxheree - } - m_samplers.clear(); - - delete m_materialUBO; - m_materialUBO = nullptr; - - m_p.release(); - - m_meshBuffer = nullptr; - } - - void release(Renderer& r) override { releaseWithoutRenderTarget(r); } - - void runPass( - Renderer& renderer, QRhiCommandBuffer& cb, QRhiResourceUpdateBatch& res) override - { - // if(m_passes.empty()) - // return RenderedNode::runPass(renderer, cb, res); - - // Update a first time everything - - // PASSINDEX must be set to the last index - // FIXME - n.standardUBO.passIndex = m_passes.size() - 1; - - update(renderer, res); - - auto updateBatch = &res; - - // Draw the passes - for(const auto& pass : m_passes) - { - SCORE_ASSERT(pass.renderTarget.renderTarget); - SCORE_ASSERT(pass.p.pipeline); - SCORE_ASSERT(pass.p.srb); - // TODO : combine all the uniforms.. - - auto rt = pass.renderTarget.renderTarget; - auto pipeline = pass.p.pipeline; - auto srb = pass.p.srb; - auto texture = pass.renderTarget.texture; - - // TODO need to free stuff - cb.beginPass(rt, Qt::black, {1.0f, 0}, updateBatch); - { - cb.setGraphicsPipeline(pipeline); - cb.setShaderResources(srb); - - if(texture) - { - cb.setViewport(QRhiViewport( - 0, 0, texture->pixelSize().width(), texture->pixelSize().height())); - } - else - { - const auto sz = renderer.state.size; - cb.setViewport(QRhiViewport(0, 0, sz.width(), sz.height())); - } - - assert(this->m_meshBuffer); - assert(this->m_meshBuffer->usage().testFlag(QRhiBuffer::VertexBuffer)); - n.mesh().setupBindings(*this->m_meshBuffer, this->m_idxBuffer, cb); - - cb.draw(n.mesh().vertexCount); - } - - cb.endPass(); - - if(pass.p.pipeline != m_passes.back().p.pipeline) - { - // Not the last pass: we have to use another resource batch - updateBatch = renderer.state.rhi->nextResourceUpdateBatch(); - } - } - } -}; - -score::gfx::NodeRenderer* DepthNode::createRenderer(Renderer& r) const noexcept -{ - return new RenderedDepthNode{*this}; -} diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/DepthNode.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/DepthNode.hpp deleted file mode 100644 index 5ced2459a7..0000000000 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/DepthNode.hpp +++ /dev/null @@ -1,21 +0,0 @@ -#pragma once -#include "mesh.hpp" -#include "node.hpp" -#include "renderer.hpp" - -namespace score::gfx -{ -struct RenderedDepthNode; -struct DepthNode : score::gfx::ProcessNode -{ - DepthNode(const QShader& compute); - - virtual ~DepthNode(); - - score::gfx::NodeRenderer* createRenderer(RenderList& r) const noexcept; - -private: - friend struct RenderedISFNode; - QShader m_computeS; -}; -} diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/DirectVideoNodeRenderer.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/DirectVideoNodeRenderer.cpp index feb3d30c62..ca9254f3bb 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/DirectVideoNodeRenderer.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/DirectVideoNodeRenderer.cpp @@ -1114,6 +1114,8 @@ void DirectVideoNodeRenderer::createPipelines(RenderList& r) if(m_gpu) { auto shaders = m_gpu->init(r); + m_cachedVertexShader = shaders.first; + m_cachedFragmentShader = shaders.second; SCORE_ASSERT(m_p.empty()); score::gfx::defaultPassesInit( m_p, this->node().output[0]->edges, r, r.defaultQuad(), shaders.first, @@ -1122,6 +1124,15 @@ void DirectVideoNodeRenderer::createPipelines(RenderList& r) } void DirectVideoNodeRenderer::init(RenderList& renderer, QRhiResourceUpdateBatch& res) +{ + initState(renderer, res); + + for(Edge* edge : this->node().output[0]->edges) + addOutputPass(renderer, *edge, res); +} + +void DirectVideoNodeRenderer::initState( + RenderList& renderer, QRhiResourceUpdateBatch& res) { auto& rhi = *renderer.state.rhi; @@ -1148,7 +1159,15 @@ void DirectVideoNodeRenderer::init(RenderList& renderer, QRhiResourceUpdateBatch } createGpuDecoder(rhi); - createPipelines(renderer); + + // Cache shaders from the GPU decoder so addOutputPass() can use them + if(m_gpu) + { + auto shaders = m_gpu->init(renderer); + m_cachedVertexShader = shaders.first; + m_cachedFragmentShader = shaders.second; + } + m_recomputeScale = true; } @@ -1289,6 +1308,48 @@ void DirectVideoNodeRenderer::update( } void DirectVideoNodeRenderer::release(RenderList& r) +{ + releaseState(r); +} + +void DirectVideoNodeRenderer::addOutputPass( + RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) +{ + if(!m_gpu) + return; + if(!m_cachedVertexShader.isValid() || !m_cachedFragmentShader.isValid()) + return; + + auto rt = renderer.renderTargetForOutput(edge); + if(rt.renderTarget) + { + auto pip = score::gfx::buildPipeline( + renderer, renderer.defaultQuad(), m_cachedVertexShader, m_cachedFragmentShader, + rt, m_processUBO, m_materialUBO, m_gpu->samplers); + if(pip.pipeline) + m_p.emplace_back(&edge, Pass{rt, pip, nullptr}); + } +} + +void DirectVideoNodeRenderer::removeOutputPass(RenderList& renderer, Edge& edge) +{ + auto it = ossia::find_if(m_p, [&](auto& p) { return p.first == &edge; }); + if(it != m_p.end()) + { + it->second.p.release(); + if(it->second.processUBO) + it->second.processUBO->deleteLater(); + m_p.erase(it); + } +} + +bool DirectVideoNodeRenderer::hasOutputPassForEdge(Edge& edge) const +{ + return ossia::find_if(m_p, [&](const auto& p) { return p.first == &edge; }) + != m_p.end(); +} + +void DirectVideoNodeRenderer::releaseState(RenderList& r) { // Destroy GPU decoder BEFORE closeFile() frees m_hwDeviceCtx. // HW decoders (CUDA, Vulkan) hold references to the HW device context @@ -1299,6 +1360,9 @@ void DirectVideoNodeRenderer::release(RenderList& r) m_gpu.reset(); } + m_cachedVertexShader = {}; + m_cachedFragmentShader = {}; + delete m_processUBO; m_processUBO = nullptr; diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/DirectVideoNodeRenderer.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/DirectVideoNodeRenderer.hpp index 3c0e766f5c..bdee9ccd9f 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/DirectVideoNodeRenderer.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/DirectVideoNodeRenderer.hpp @@ -63,6 +63,13 @@ class DirectVideoNodeRenderer : public NodeRenderer void update(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) override; void release(RenderList& r) override; + void initState(RenderList& renderer, QRhiResourceUpdateBatch& res) override; + void releaseState(RenderList& renderer) override; + void addOutputPass( + RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) override; + void removeOutputPass(RenderList& renderer, Edge& edge) override; + bool hasOutputPassForEdge(Edge& edge) const override; + private: const VideoNodeBase& node() const noexcept { @@ -131,6 +138,8 @@ class DirectVideoNodeRenderer : public NodeRenderer }; std::unique_ptr m_gpu; + QShader m_cachedVertexShader; + QShader m_cachedFragmentShader; score::gfx::ScaleMode m_currentScaleMode{}; int64_t m_lastRequestedFlicks{-1}; diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/FlattenedSceneFilterNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/FlattenedSceneFilterNode.cpp new file mode 100644 index 0000000000..145ae597dd --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/FlattenedSceneFilterNode.cpp @@ -0,0 +1,186 @@ +#include +#include +#include + +#include +#include + +#include + +namespace score::gfx +{ + +struct RenderedFlattenedSceneFilterNode final : NodeRenderer +{ + const FlattenedSceneFilterNode& m_node; + ossia::geometry_spec m_outputSpec; + ossia::geometry_spec m_lastInput; + int m_lastMode{-1}; + int m_lastMatch{0}; + std::string m_lastMatchStr; + + RenderedFlattenedSceneFilterNode(const FlattenedSceneFilterNode& n) + : NodeRenderer{n} + , m_node{n} + { + } + + void init(RenderList&, QRhiResourceUpdateBatch&) override { m_initialized = true; } + void release(RenderList&) override + { + m_outputSpec = {}; + m_lastInput = {}; + m_lastMode = -1; + m_lastMatchStr.clear(); + m_initialized = false; + } + + bool predicate( + const ossia::geometry& g, int mode, uint32_t match, + uint32_t match_str_hash) const noexcept + { + switch(mode) + { + case 0: return g.filter_tag == match; + case 1: return g.filter_tag != match; + case 2: return g.filter_material_index == match; + case 3: return g.filter_material_index != match; + case 4: return (uint32_t)g.blend == match; + case 5: return (uint32_t)g.blend != match; + case 6: return g.depth_write == (match != 0); + case 7: return g.depth_write != (match != 0); + case 8: return (uint32_t)g.cull_mode == match; + case 9: return (uint32_t)g.cull_mode != match; + case 10: return (uint32_t)g.topology == match; + case 11: return (uint32_t)g.topology != match; + case 12: return g.filter_tag == match_str_hash; + case 13: return g.filter_tag != match_str_hash; + default: return true; + } + } + + void rebuild() + { + m_outputSpec.meshes = std::make_shared(); + m_outputSpec.filters + = this->geometry.filters + ? this->geometry.filters + : std::make_shared(); + + if(!this->geometry.meshes) + return; + + const uint32_t matchU = (uint32_t)m_node.m_match; + // Same hash producers stamp on filter_tag (rapidhash truncated to 32 + // bits). Empty match_str short-circuits to 0u so it matches the + // "untagged" sentinel rather than rapidhash-of-empty (a non-zero + // value that would never match anything in practice). + const uint32_t matchStrHash + = m_node.m_match_str.empty() + ? 0u + : (uint32_t)ossia::hash_string(m_node.m_match_str); + for(const auto& g : this->geometry.meshes->meshes) + { + if(predicate(g, m_node.m_mode, matchU, matchStrHash)) + m_outputSpec.meshes->meshes.push_back(g); + } + m_outputSpec.meshes->dirty_index = this->geometry.meshes->dirty_index; + } + + void update(RenderList&, QRhiResourceUpdateBatch&, Edge*) override + { + const bool geomChanged = (this->geometry != m_lastInput) || this->geometryChanged; + const bool paramsChanged + = (m_node.m_mode != m_lastMode) || (m_node.m_match != m_lastMatch) + || (m_node.m_match_str != m_lastMatchStr); + if(!geomChanged && !paramsChanged && m_outputSpec.meshes) + return; + + rebuild(); + m_lastInput = this->geometry; + m_lastMode = m_node.m_mode; + m_lastMatch = m_node.m_match; + m_lastMatchStr = m_node.m_match_str; + this->geometryChanged = false; + } + + void runInitialPasses( + RenderList& renderer, QRhiCommandBuffer&, QRhiResourceUpdateBatch*&, + Edge& edge) override + { + if(!m_outputSpec.meshes) + return; + auto* sink = edge.sink; + if(!sink || !sink->node) + return; + auto rn_it = sink->node->renderedNodes.find(&renderer); + if(rn_it == sink->node->renderedNodes.end()) + return; + auto it = std::find(sink->node->input.begin(), sink->node->input.end(), sink); + if(it == sink->node->input.end()) + return; + int port_idx = (int)(it - sink->node->input.begin()); + rn_it->second->process(port_idx, m_outputSpec, edge.source); + } + + void runRenderPass(RenderList&, QRhiCommandBuffer&, Edge&) override { } + + // Data-only renderer — no per-edge GPU pass state to release. + void removeOutputPass(RenderList&, Edge&) override { } +}; + +FlattenedSceneFilterNode::FlattenedSceneFilterNode() +{ + // Port 0: geometry input + input.push_back(new Port{this, {}, Types::Geometry, {}}); + // Port 1: filter mode + { + auto* data = new int{0}; + input.push_back(new Port{this, data, Types::Int, {}}); + } + // Port 2: match value (int, modes 0..11) + { + auto* data = new int{0}; + input.push_back(new Port{this, data, Types::Int, {}}); + } + // Port 3: match string (modes 12/13). Carried as a control-only port + // (no GPU edge type — strings flow through ossia::value via process() + // rather than as a GPU resource handle). + { + auto* data = new std::string{}; + input.push_back(new Port{this, data, Types::Empty, {}}); + } + output.push_back(new Port{this, {}, Types::Geometry, {}}); +} + +FlattenedSceneFilterNode::~FlattenedSceneFilterNode() = default; + +void FlattenedSceneFilterNode::process(int32_t port, const ossia::value& v) +{ + switch(port) + { + case 1: + m_mode = ossia::convert(v); + materialChange(); + break; + case 2: + m_match = ossia::convert(v); + materialChange(); + break; + case 3: + m_match_str = ossia::convert(v); + materialChange(); + break; + default: + ProcessNode::process(port, v); + break; + } +} + +NodeRenderer* +FlattenedSceneFilterNode::createRenderer(RenderList&) const noexcept +{ + return new RenderedFlattenedSceneFilterNode{*this}; +} + +} diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/FlattenedSceneFilterNode.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/FlattenedSceneFilterNode.hpp new file mode 100644 index 0000000000..fb0bb5a2cd --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/FlattenedSceneFilterNode.hpp @@ -0,0 +1,63 @@ +#pragma once +#include + +namespace score::gfx +{ + +/** + * @brief Per-pass filter on a flattened scene: geometry_spec → geometry_spec. + * + * Reads the `filter_tag` and `filter_material_index` metadata fields that + * ScenePreprocessorNode writes onto every output geometry, and emits a new + * geometry_spec containing only the draws that match the configured + * predicate. All underlying GPU buffers are shared via `shared_ptr` — the + * filter only rewrites the mesh_list; no GPU data is copied. + * + * Inputs: + * - Port 0: Geometry (Types::Geometry) + * - Port 1: Filter mode (Types::Int): + * 0 = tag equals match value + * 1 = tag differs from match value + * 2 = material index equals match value + * 3 = material index differs from match value + * 4 = blend_mode equals match (0 = opaque, 1 = premul-alpha) + * 5 = blend_mode differs from match + * 6 = depth_write equals (match != 0) + * 7 = depth_write differs from (match != 0) + * 8 = cull_mode equals match (0 = none, 1 = front, 2 = back) + * 9 = cull_mode differs from match + * 10 = topology equals match (0 = triangles, 1 = tri strip, …) + * 11 = topology differs from match + * 12 = format_id equals match_str (rapidhash of match_str truncated + * to 32 bits compared with filter_tag; an empty match_str + * short-circuits to 0u so it matches the "untagged" sentinel + * rather than the rapidhash of the empty string) + * 13 = format_id differs from match_str + * - Port 2: Match value (Types::Int) — user-supplied, interpreted per mode + * - Port 3: Match string (Types::Empty control) — used by modes 12/13 + * + * Per-draw filtering (e.g. "alphaMode=BLEND draws inside a single MDI + * batch") is NOT handled here — ScenePreprocessor emits one geometry + * per MDI batch so mesh-level fields collapse to 0. Use a Tier-3 + * CSF compute filter for per-draw cases; this node is for multi-mesh + * inputs (per-object producers, pre-MDI composition). + * + * Outputs: + * - Port 0: Geometry (Types::Geometry) + */ +class SCORE_PLUGIN_GFX_EXPORT FlattenedSceneFilterNode : public ProcessNode +{ +public: + FlattenedSceneFilterNode(); + ~FlattenedSceneFilterNode() override; + + score::gfx::NodeRenderer* createRenderer(RenderList& r) const noexcept override; + + void process(int32_t port, const ossia::value& v) override; + + int m_mode{0}; + int m_match{0}; + std::string m_match_str; +}; + +} diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/GeometryFilterNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/GeometryFilterNode.cpp index 07b080f381..b7172e5c11 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/GeometryFilterNode.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/GeometryFilterNode.cpp @@ -28,7 +28,23 @@ struct geometry_input_port_vis void operator()(const isf::long_input& in) noexcept { - *reinterpret_cast(data) = in.def; + // Enum mode: in.def is the *index* into VALUES, but the shader and the + // downstream ComboBox-driven port both consume the numeric VALUE at that + // index. Resolve here so the initial UBO matches post-interaction state. + // String-valued VALUES fall back to the index (GLSL can't receive strings). + int initial = (int)in.def; + if(!in.values.empty()) + { + auto idx = std::min(in.def, in.values.size() - 1); + const auto& v = in.values[idx]; + if(auto i = ossia::get_if(&v)) + initial = (int)*i; + else if(auto d = ossia::get_if(&v)) + initial = (int)*d; + else + initial = (int)idx; + } + *reinterpret_cast(data) = initial; self.input.push_back(new Port{&self, data, Types::Int, {}}); data += 4; sz += 4; @@ -136,6 +152,12 @@ struct geometry_input_port_vis // Storage buffers are typically managed by the system // No UI controls or uniform buffer data needed } + + void operator()(const isf::uniform_input& in) noexcept + { + // UBO inputs are sourced from upstream Buffer ports; no material-UBO + // storage needed here. + } void operator()(const isf::texture_input& in) noexcept { diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/GeometryFilterNodeRenderer.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/GeometryFilterNodeRenderer.cpp index 10c644d73f..0091d3882e 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/GeometryFilterNodeRenderer.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/GeometryFilterNodeRenderer.cpp @@ -20,6 +20,11 @@ TextureRenderTarget GeometryFilterNodeRenderer::renderTargetForInput(const Port& } void GeometryFilterNodeRenderer::init(RenderList& renderer, QRhiResourceUpdateBatch& res) +{ + initState(renderer, res); +} + +void GeometryFilterNodeRenderer::initState(RenderList& renderer, QRhiResourceUpdateBatch& res) { QRhi& rhi = *renderer.state.rhi; @@ -30,7 +35,10 @@ void GeometryFilterNodeRenderer::init(RenderList& renderer, QRhiResourceUpdateBa = rhi.newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, m_materialSize); m_materialUBO->setName("GeometryFilterNodeRenderer.ubo"); SCORE_ASSERT(m_materialUBO->create()); + if(node().m_material_data) + res.updateDynamicBuffer(m_materialUBO, 0, m_materialSize, node().m_material_data.get()); } + m_initialized = true; } void GeometryFilterNodeRenderer::update( @@ -47,8 +55,17 @@ void GeometryFilterNodeRenderer::update( void GeometryFilterNodeRenderer::release(RenderList& r) { - delete m_materialUBO; + releaseState(r); +} + +void GeometryFilterNodeRenderer::releaseState(RenderList& r) +{ + if(!m_initialized) + return; + if(m_materialUBO) + m_materialUBO->deleteLater(); m_materialUBO = nullptr; + m_initialized = false; } void GeometryFilterNodeRenderer::runInitialPasses( diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/GeometryFilterNodeRenderer.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/GeometryFilterNodeRenderer.hpp index 48242c10b3..64868f5823 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/GeometryFilterNodeRenderer.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/GeometryFilterNodeRenderer.hpp @@ -11,8 +11,10 @@ struct SCORE_PLUGIN_GFX_EXPORT GeometryFilterNodeRenderer : score::gfx::NodeRend TextureRenderTarget renderTargetForInput(const Port& p) override; void init(RenderList& renderer, QRhiResourceUpdateBatch& res) override; + void initState(RenderList& renderer, QRhiResourceUpdateBatch& res) override; void update(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) override; void release(RenderList& r) override; + void releaseState(RenderList& r) override; void runInitialPasses( RenderList&, QRhiCommandBuffer& commands, QRhiResourceUpdateBatch*& res, @@ -20,6 +22,9 @@ struct SCORE_PLUGIN_GFX_EXPORT GeometryFilterNodeRenderer : score::gfx::NodeRend void runRenderPass(RenderList&, QRhiCommandBuffer& commands, Edge& edge) override; + // Data-only renderer — no per-edge GPU pass state to release. + void removeOutputPass(RenderList&, Edge&) override { } + QRhiBuffer* material() const noexcept { return m_materialUBO; } private: diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/GpuResourceRegistry.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/GpuResourceRegistry.cpp new file mode 100644 index 0000000000..82757ebbef --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/GpuResourceRegistry.cpp @@ -0,0 +1,983 @@ +#include + +#include // BUFTRACE +#include +#include +#include // MaterialGPU layout + +#include + +#include +#include + +namespace score::gfx +{ +namespace +{ +// Per-arena configuration. Capacities are the Phase 1 hard cap; growth is +// a Phase 2 concern (allocate() logs + returns invalid Slot on overflow). +// Sizes are deliberately conservative — a typical scene has 1-6 cameras, +// 1-16 lights, 10-50 materials, 50-1000 draws. The caps below allow +// ~50× headroom before we need grow-in-place. +// Per-arena configuration: fixed-stride layout. Buffer capacity is +// stride × slot_count. Consumer shaders index arena.entries[slot_index] +// with std430 stride == slot_stride. +struct ArenaConfig +{ + uint32_t slot_stride; // byte stride per slot + uint32_t slot_count; // number of slots + QRhiBuffer::UsageFlags usage; + QRhiBuffer::Type type; + const char* name; +}; + +// Entry order MUST match the Arena enum in GpuResourceRegistry.hpp. +constexpr ArenaConfig kArenaConfigs[(std::size_t)GpuResourceRegistry::Arena::Count_] + = { + // RawCamera — 64 B stride × 32 slots = 2 KiB. UBO dynamic. + {64, 32, QRhiBuffer::UniformBuffer, QRhiBuffer::Dynamic, + "GpuResourceRegistry::raw_camera"}, + + // RawLight — 64 B stride × 4096 slots = 256 KiB. SSBO static + // (QRhi forbids StorageBuffer + Dynamic). Sized for VJ / + // particle-driven workflows that emit thousands of procedural + // lights via pack_lights_from_points / wander_lights_inline / + // grid_lights_inline. Typical 3D-file scenes (a handful of + // scene-node lights) pay only for the first N used slots — + // the rest is dormant device-local memory, no per-frame + // upload cost. Keep in sync with ScenePreprocessor's + // lightIdxBytes floor (must be slot_count * 4 bytes). + {64, 4096, QRhiBuffer::StorageBuffer, QRhiBuffer::Static, + "GpuResourceRegistry::raw_light"}, + + // RawTransform — 64 B stride × 16384 slots = 1 MiB. Sized for + // heavy glTF / FBX scenes with 5-10k nodes. + {64, 16384, QRhiBuffer::StorageBuffer, QRhiBuffer::Static, + "GpuResourceRegistry::raw_transform"}, + + // Material — 80 B stride × 32768 slots = 2.5 MiB. Shader indexes + // this arena directly as scene_materials.entries[material_index]. + // Sized for enterprise / architectural-scale USD content (city + // assemblies, CAD exports, Pixar Kitchen_set-class scenes) — + // those routinely pack 1k–20k unique materials across all their + // per-prop references. Small scenes pay only for the first N + // used slots; the rest is dormant SSBO space. + {80, 32768, QRhiBuffer::StorageBuffer, QRhiBuffer::Static, + "GpuResourceRegistry::material"}, + + // Env — 64 B stride × 8 slots = 512 B. UBO dynamic. + {64, 8, QRhiBuffer::UniformBuffer, QRhiBuffer::Dynamic, + "GpuResourceRegistry::env"}, +}; + +} // namespace + +GpuResourceRegistry::~GpuResourceRegistry() +{ + destroy(); +} + +void GpuResourceRegistry::init(QRhi& rhi, QRhiResourceUpdateBatch& batch) +{ + SCORE_ASSERT(!m_rhi); + m_rhi = &rhi; + + for(std::size_t i = 0; i < m_arenas.size(); ++i) + { + auto& a = m_arenas[i]; + const auto& cfg = kArenaConfigs[i]; + const uint32_t bytes = cfg.slot_stride * cfg.slot_count; + + a.buffer = rhi.newBuffer(cfg.type, cfg.usage, bytes); + a.buffer->setName(cfg.name); + if(!a.buffer->create()) + { + qWarning() << "GpuResourceRegistry: failed to create arena buffer" + << cfg.name + << "— falling back to null (allocations will fail)"; + delete a.buffer; + a.buffer = nullptr; + continue; + } + // Zero-fill the arena. Vulkan does NOT initialise VkBuffer memory + // — the underlying device-memory page contains whatever was there + // before. Arenas are sparse-uploaded by producers (each Light / + // Material / Transform / Camera node writes only its own slot); + // unused slots stay at their initial value. After a fresh + // RenderList (resize), every consumer indexing past the populated + // range reads device-memory garbage. Especially visible for lights: + // shaders compose world-space light positions via + // world_transforms.data[L.transform_slot], and L.color/range read + // from the RawLight arena — both arenas garbage on the resize + // frame produces the user's "wildly different lighting per + // resize" symptom (saturated colours, blown-out highlights, very + // dark, varying per attempt). + // + // Cost: ~4 MiB total upload per RenderList init across all arenas + // (RawCamera 2 KiB + RawLight 256 KiB + RawTransform 1 MiB + + // Material 2.5 MiB + Env 512 B). One-time per resize, negligible. + // RhiClearBuffer routes Dynamic buffers via chunked + // updateDynamicBuffer and Static buffers via uploadStaticBuffer + // — both fed from a thread-local zero pool so we don't pay a + // per-arena std::vector(bytes, 0) allocation on every + // RenderList init. + RhiClearBuffer::clearBuffer(rhi, batch, a.buffer, 0, bytes); + + a.slot_stride = cfg.slot_stride; + a.slot_count = cfg.slot_count; + a.usage = cfg.usage; + a.type = cfg.type; + // Generation table sized to slot_count. Start at 1 so a freshly- + // default gpu_slot_ref (generation=0) never matches a real slot. + a.slot_generations.assign(cfg.slot_count, 1u); + // Free-list stack: push slots in reverse order so pop yields slot + // index 0, 1, 2, ... in allocation order. Keeps the arena buffer + // densely packed at the front, which downstream tooling may assume. + a.free_slots.clear(); + a.free_slots.reserve(cfg.slot_count); + for(uint32_t s = cfg.slot_count; s-- > 0;) + a.free_slots.push_back(s); + } + + // Reserve Material arena slot 0 as the "default material" sentinel. + // arenaSlotForMaterial(nullptr) returns 0; seedDefaults() writes a + // white-dielectric MaterialGPU into that slot once a resource-update + // batch is available. Pop from the free-list now so no producer can + // claim it. (Other arenas keep slot 0 available — only Material has + // the "null fallback" semantics.) + { + auto& mat = m_arenas[(std::size_t)Arena::Material]; + if(!mat.free_slots.empty() && mat.free_slots.back() == 0u) + mat.free_slots.pop_back(); + } + + // Mesh arena — one QRhiBuffer per attribute stream, plus TWO shared + // OffsetAllocators (vertex-units and index-units). See the + // "CRITICAL invariant" block in GpuResourceRegistry.hpp for why the + // allocators are NOT per-stream: a single baseVertex applies to all + // vertex bindings, so per-mesh byte offsets across streams must be + // proportional to per-stream stride. One allocator → one logical + // vertex slot → guaranteed lockstep. + for(std::size_t i = 0; i < m_meshStreams.size(); ++i) + { + auto& s = m_meshStreams[i]; + const uint32_t bytes = kMeshCapBytes[i]; + + using UF = QRhiBuffer::UsageFlags; + UF usage; + if(i == (std::size_t)MeshStream::Indices) + usage = UF(QRhiBuffer::IndexBuffer); + else + usage = UF(QRhiBuffer::VertexBuffer | QRhiBuffer::StorageBuffer); + + s.buffer = rhi.newBuffer(QRhiBuffer::Static, usage, bytes); + const char* names[(std::size_t)MeshStream::Count_] = { + "MeshArena::positions", "MeshArena::normals", + "MeshArena::texcoords", "MeshArena::tangents", + "MeshArena::colors", "MeshArena::texcoords1", + "MeshArena::indices"}; + s.buffer->setName(names[i]); + if(!s.buffer->create()) + { + qWarning() << "GpuResourceRegistry: failed to create mesh arena stream" + << names[i] << "— acquireMeshSlab will return null."; + delete s.buffer; + s.buffer = nullptr; + continue; + } + s.capacity_bytes = bytes; + s.usage = usage; + } + + // Shared vertex/index allocators. Capacity in SLOTS, not bytes. + // For vertex slots: every vertex stream must accommodate + // capacity_slots × its_stride bytes. The min over the four vertex + // streams determines the safe cap. + uint32_t vertSlotCap = 0xFFFFFFFFu; + for(std::size_t i = 0; i < (std::size_t)MeshStream::Indices; ++i) + { + if(!m_meshStreams[i].buffer) + { + vertSlotCap = 0; + break; + } + vertSlotCap = std::min( + vertSlotCap, m_meshStreams[i].capacity_bytes / kMeshStride[i]); + } + m_vertexSlotsCapacity = vertSlotCap; + if(vertSlotCap > 0) + { + m_vertexAllocator = std::make_unique( + vertSlotCap, 128u * 1024u); + } + + const auto& idxStream = m_meshStreams[(std::size_t)MeshStream::Indices]; + m_indexSlotsCapacity = idxStream.buffer + ? idxStream.capacity_bytes + / kMeshStride[(std::size_t)MeshStream::Indices] + : 0u; + if(m_indexSlotsCapacity > 0) + { + m_indexAllocator = std::make_unique( + m_indexSlotsCapacity, 128u * 1024u); + } + + m_vertexSlotsUsed = 0; + m_indexSlotsUsed = 0; +} + +void GpuResourceRegistry::seedDefaults(QRhiResourceUpdateBatch& batch) +{ + if(m_defaults_seeded) + return; + + // Material arena slot 0 — the default material returned by + // arenaSlotForMaterial(nullptr). MaterialGPU's in-class initializers + // are exactly the right defaults (white baseColor, metallic=0, + // roughness=0.5, occlusion=1, no emissive, all texture refs null), so + // a default-constructed instance is the byte payload we want. + auto& mat = m_arenas[(std::size_t)Arena::Material]; + if(mat.buffer && mat.slot_stride >= sizeof(MaterialGPU)) + { + MaterialGPU defaultMat{}; + batch.uploadStaticBuffer( + mat.buffer, /*offset=*/0, + (quint32)sizeof(MaterialGPU), &defaultMat); + } + + m_defaults_seeded = true; +} + +void GpuResourceRegistry::destroy(RenderList& renderer) +{ + // Route every arena buffer release through RenderList::releaseBuffer + // so the RenderList's bookkeeping sees the release and the buffer is + // destroyed through the same code path as every other QRhiBuffer in + // the pipeline. + for(auto& a : m_arenas) + { + if(a.buffer) + { + renderer.releaseBuffer(a.buffer); + a.buffer = nullptr; + } + a.slot_stride = 0; + a.slot_count = 0; + for(auto& g : a.slot_generations) + ++g; + a.slot_generations.clear(); + a.free_slots.clear(); + } + m_defaults_seeded = false; + for(auto& ch : m_textureChannels) + { + for(auto& b : ch.buckets) + { + if(b.array) + { + b.array->deleteLater(); + b.array = nullptr; + } + if(b.sampler) + { + b.sampler->deleteLater(); + b.sampler = nullptr; + } + b.layers = 0; + b.layerMap.clear(); + } + ch.buckets.clear(); + ch.dynamicSlotMap.clear(); + ch.dynamicTextures.clear(); + ch.dynamicSlotLastUse.clear(); + ch.dynamicSlotCounter = 0; + } + // Mesh arena teardown. Route through releaseBuffer (same invariant + // as the component arenas) so downstream MeshBuffers that still + // reference one of our slab offsets don't hit use-after-free. + for(auto& s : m_meshStreams) + { + if(s.buffer) + { + renderer.releaseBuffer(s.buffer); + s.buffer = nullptr; + } + s.capacity_bytes = 0; + } + m_vertexAllocator.reset(); + m_indexAllocator.reset(); + m_vertexSlotsCapacity = 0; + m_indexSlotsCapacity = 0; + m_vertexSlotsUsed = 0; + m_indexSlotsUsed = 0; + m_meshSlabs.clear(); + m_pendingReleases.clear(); + m_rhi = nullptr; +} + +void GpuResourceRegistry::destroyOwned() +{ + // OutputNode-side teardown. The registry now persists across + // RenderList rebuilds (resize fast path), so destroy(RenderList&)'s + // RL-routed releaseBuffer path is bypassed during normal RL rebuild. + // When the OutputNode's QRhi is about to go away (destroyOutput, + // setSwapchainFormat, ~OutputNode), we have to tear down our QRhi + // resources directly — there is no live RenderList to plumb through + // and the QRhi is still alive (callers MUST invoke this BEFORE + // RenderState::destroy()). + // + // `delete` on a QRhiBuffer / QRhiTexture / QRhiSampler runs its + // destructor which calls destroy() on the underlying GPU resource + // and then frees the wrapper. Mirrors the direct deletes + // RenderList::release does for m_outputUBO / m_emptyTexture* — same + // safety contract (QRhi still alive). + for(auto& a : m_arenas) + { + delete a.buffer; + a.buffer = nullptr; + a.slot_stride = 0; + a.slot_count = 0; + for(auto& g : a.slot_generations) + ++g; + a.slot_generations.clear(); + a.free_slots.clear(); + } + m_defaults_seeded = false; + for(auto& ch : m_textureChannels) + { + for(auto& b : ch.buckets) + { + delete b.array; + b.array = nullptr; + delete b.sampler; + b.sampler = nullptr; + b.layers = 0; + b.layerMap.clear(); + } + ch.buckets.clear(); + ch.dynamicSlotMap.clear(); + ch.dynamicTextures.clear(); + ch.dynamicSlotLastUse.clear(); + ch.dynamicSlotCounter = 0; + } + for(auto& s : m_meshStreams) + { + delete s.buffer; + s.buffer = nullptr; + s.capacity_bytes = 0; + } + m_vertexAllocator.reset(); + m_indexAllocator.reset(); + m_vertexSlotsCapacity = 0; + m_indexSlotsCapacity = 0; + m_vertexSlotsUsed = 0; + m_indexSlotsUsed = 0; + m_meshSlabs.clear(); + m_pendingReleases.clear(); + m_rhi = nullptr; +} + +void GpuResourceRegistry::destroy() +{ + // Destructor fallback — nulls the buffer pointers without touching + // the QRhi. Safe when destroy(RenderList&) already ran; leaks the + // QRhiBuffer wrapper if QRhi has been torn down without a prior + // RenderList-routed release (deleteLater on a dangling buffer would + // crash, and leaking the wrapper is the lesser evil). + for(auto& a : m_arenas) + { + a.buffer = nullptr; + a.slot_stride = 0; + a.slot_count = 0; + for(auto& g : a.slot_generations) + ++g; + a.slot_generations.clear(); + a.free_slots.clear(); + } + m_defaults_seeded = false; + for(auto& ch : m_textureChannels) + { + // Do NOT deleteLater on textures here — if QRhi has already been + // torn down their storage is gone. Leak the wrapper, same rule + // as arena buffers above. + for(auto& b : ch.buckets) + { + b.array = nullptr; + b.sampler = nullptr; + b.layers = 0; + b.layerMap.clear(); + } + ch.buckets.clear(); + ch.dynamicSlotMap.clear(); + ch.dynamicTextures.clear(); + ch.dynamicSlotLastUse.clear(); + ch.dynamicSlotCounter = 0; + } + // Mesh arena: null the buffers (leaking the wrappers, same rule); + // tear down allocators since those are pure CPU-side. + for(auto& s : m_meshStreams) + { + s.buffer = nullptr; + s.capacity_bytes = 0; + } + m_vertexAllocator.reset(); + m_indexAllocator.reset(); + m_vertexSlotsCapacity = 0; + m_indexSlotsCapacity = 0; + m_vertexSlotsUsed = 0; + m_indexSlotsUsed = 0; + m_meshSlabs.clear(); + m_pendingReleases.clear(); + m_rhi = nullptr; +} + +const char* GpuResourceRegistry::textureChannelArrayName(TextureChannel ch) noexcept +{ + switch(ch) + { + case TextureChannel::BaseColor: return "baseColorArray"; + case TextureChannel::MetalRough: return "metalRoughArray"; + case TextureChannel::Normal: return "normalArray"; + case TextureChannel::Emissive: return "emissiveArray"; + case TextureChannel::Occlusion: return "occlusionArray"; + default: return ""; + } +} + +const char* GpuResourceRegistry::textureChannelDynBaseName(TextureChannel ch) noexcept +{ + switch(ch) + { + case TextureChannel::BaseColor: return "baseColorDyn"; + case TextureChannel::MetalRough: return "metalRoughDyn"; + case TextureChannel::Normal: return "normalDyn"; + case TextureChannel::Emissive: return "emissiveDyn"; + case TextureChannel::Occlusion: return "occlusionDyn"; + default: return ""; + } +} + +QRhiTexture::Flags GpuResourceRegistry::textureChannelFlags(TextureChannel ch) noexcept +{ + switch(ch) + { + case TextureChannel::BaseColor: + case TextureChannel::Emissive: + return QRhiTexture::sRGB; + // Occlusion is a single-channel data texture (R = occlusion). Linear, + // not sRGB. RGBA8 for now (we use only the R channel) — a future + // optimisation could route to R8 to save VRAM. + default: + return {}; + } +} + + +int GpuResourceRegistry::resolveDynamicSlot( + TextureChannel channel, void* native_handle) noexcept +{ + if(!native_handle) + return -1; + auto* tex = static_cast(native_handle); + // Key by QRhi's monotonic globalResourceId rather than the raw + // pointer. The pointer can be recycled by the heap allocator after + // the previous QRhiTexture is destroyed (qrhivulkan.cpp:5909-5912 + // documents this exact hazard for QRhi's own SRB tracking, which + // pairs the pointer with `m_id`). Using the id makes a stale entry + // simply mismatch instead of aliasing onto a fresh resource. + const quint64 key = tex->globalResourceId(); + auto& ch = textureChannel(channel); + const uint64_t now = ++ch.dynamicSlotCounter; + + // Hit: refresh access stamp and return existing slot. + auto it = ch.dynamicSlotMap.find(key); + if(it != ch.dynamicSlotMap.end()) + { + const int slot = it->second; + if(slot >= 0 && slot < (int)ch.dynamicSlotLastUse.size()) + ch.dynamicSlotLastUse[slot] = now; + return slot; + } + + // Miss with room: append a new slot. + if((int)ch.dynamicTextures.size() < kMaxDynamicSlots) + { + const int slot = (int)ch.dynamicTextures.size(); + ch.dynamicSlotMap[key] = slot; + ch.dynamicTextures.push_back(tex); + ch.dynamicSlotLastUse.push_back(now); + return slot; + } + + // Miss with full map: LRU-evict the slot with the oldest access stamp. + // Without this branch a long session that swaps capture sources or + // resizes a video texture more than kMaxDynamicSlots times pinned the + // map at its initial entries; every subsequent texture returned -1 and + // dynamic-textured materials silently blanked. + int victim = 0; + uint64_t victimStamp = ch.dynamicSlotLastUse[0]; + for(int i = 1; i < (int)ch.dynamicSlotLastUse.size(); ++i) + { + if(ch.dynamicSlotLastUse[i] < victimStamp) + { + victim = i; + victimStamp = ch.dynamicSlotLastUse[i]; + } + } + // Drop the old key→slot mapping (linear scan since flat_map keys are + // ids, not slot indices). N is bounded by kMaxDynamicSlots so this is + // a few comparisons. + for(auto it2 = ch.dynamicSlotMap.begin(); it2 != ch.dynamicSlotMap.end(); ++it2) + { + if(it2->second == victim) + { + ch.dynamicSlotMap.erase(it2); + break; + } + } + ch.dynamicSlotMap[key] = victim; + ch.dynamicTextures[victim] = tex; + ch.dynamicSlotLastUse[victim] = now; + return victim; +} + + +GpuResourceRegistry::Slot GpuResourceRegistry::allocate(Arena arena, uint32_t size) +{ + Slot slot; + slot.arena = arena; + slot.size = size; + + auto& a = m_arenas[(std::size_t)arena]; + if(!a.buffer || a.slot_stride == 0) + { + qWarning() << "GpuResourceRegistry::allocate: arena" + << (int)arena << "is not initialised"; + return slot; + } + if(size > a.slot_stride) + { + qWarning() << "GpuResourceRegistry::allocate: requested size" + << size << "exceeds arena" + << kArenaConfigs[(std::size_t)arena].name << "stride" + << a.slot_stride; + return slot; + } + if(a.free_slots.empty()) + { + qWarning() << "GpuResourceRegistry::allocate: arena" + << kArenaConfigs[(std::size_t)arena].name + << "is full — all" << a.slot_count << "slots in use"; + return slot; + } + slot.slot_index = a.free_slots.back(); + a.free_slots.pop_back(); + // Bump and stamp the generation. Any gpu_slot_ref still holding the + // previous generation for this slot index will fail isLive(). + slot.generation = ++a.slot_generations[slot.slot_index]; + return slot; +} + +void GpuResourceRegistry::free(Slot& slot) +{ + if(!slot.valid()) + return; + auto& a = m_arenas[(std::size_t)slot.arena]; + if(slot.slot_index < a.slot_generations.size()) + { + // Bump the generation first so any dangling ref from this Slot + // fails isLive() regardless of whether the slot gets re-allocated. + ++a.slot_generations[slot.slot_index]; + a.free_slots.push_back(slot.slot_index); + } + slot.slot_index = Slot::kInvalidIndex; + slot.generation = 0; +} + +QRhiBuffer* GpuResourceRegistry::buffer(Arena arena) const noexcept +{ + return m_arenas[(std::size_t)arena].buffer; +} + +uint32_t GpuResourceRegistry::slotOffset(const Slot& slot) const noexcept +{ + if(!slot.valid()) + return 0u; + return slot.slot_index * m_arenas[(std::size_t)slot.arena].slot_stride; +} + +uint32_t GpuResourceRegistry::arenaSlotStride(Arena arena) const noexcept +{ + return m_arenas[(std::size_t)arena].slot_stride; +} + +uint32_t GpuResourceRegistry::arenaSlotCount(Arena arena) const noexcept +{ + return m_arenas[(std::size_t)arena].slot_count; +} + +void GpuResourceRegistry::updateSlot( + QRhiResourceUpdateBatch& res, const Slot& slot, const void* data, + uint32_t size) noexcept +{ + if(!slot.valid() || !data || size == 0) + return; + auto& a = m_arenas[(std::size_t)slot.arena]; + if(!a.buffer) + return; + + const uint32_t offset = slotOffset(slot); + SCORE_ASSERT(offset + size <= a.slot_stride * a.slot_count); + + if(a.type == QRhiBuffer::Dynamic) + res.updateDynamicBuffer(a.buffer, offset, size, data); + else + res.uploadStaticBuffer(a.buffer, offset, size, data); +} + +// ─── Mesh arena manager ────────────────────────────────────────── + +GpuResourceRegistry::MeshSlab* GpuResourceRegistry::acquireMeshSlab( + uint64_t stable_id, uint32_t vertex_count, uint32_t index_count, + uint32_t current_frame) noexcept +{ + if(stable_id == 0) + return nullptr; // caller without stable_id — skip slab caching + + // Fast path: existing slab, same counts. Zero-cost hit. + auto it = m_meshSlabs.find(stable_id); + if(it != m_meshSlabs.end()) + { + auto& slab = it->second; + if(slab.vertex_count == vertex_count && slab.index_count == index_count) + { + slab.freshly_allocated = false; + return &slab; + } + // Count mismatch — same mesh primitive re-emitting with different + // counts. Defer the free to the grace queue so an in-flight draw + // referencing the old offset doesn't read freed-and-reused bytes. + // + // Stamp `released_frame = current_frame` so the next sweep waits + // `grace` frames *from this enqueue*, matching QRhi's deferred- + // release contract (which keys on the submission frame slot, not 0). + // Stamping 0 here would collapse the safety to "wait `grace` frames + // after boot" — a one-time delay that vanishes the moment + // current_frame >= grace, after which every count-mismatch enqueue + // is freed on the very next sweep (same-frame UAF). + // + // Decrement the *Used trackers eagerly here so the new allocation + // below sees an accurate "live slabs" footprint while the old slot + // sits in pending-releases. The actual OffsetAllocator::free runs + // in sweepMeshSlabs phase-2 once `released_frame + grace <= + // current_frame`, but that path will NOT decrement again (single + // decrement per slab — at logical-release time). + if(m_vertexAllocator + && slab.vertex_slot.metadata != OffsetAllocator::Allocation::NO_SPACE) + { + const auto sz = m_vertexAllocator->allocationSize(slab.vertex_slot); + if(m_vertexSlotsUsed >= sz) + m_vertexSlotsUsed -= sz; + } + if(m_indexAllocator + && slab.index_slot.metadata != OffsetAllocator::Allocation::NO_SPACE) + { + const auto sz = m_indexAllocator->allocationSize(slab.index_slot); + if(m_indexSlotsUsed >= sz) + m_indexSlotsUsed -= sz; + } + PendingRelease pr; + pr.stable_id = stable_id; + pr.released_frame = current_frame; + pr.vertex_slot = slab.vertex_slot; + pr.index_slot = slab.index_slot; + m_pendingReleases.push_back(pr); + m_meshSlabs.erase(it); + } + + // Drain any pending releases that have served their grace BEFORE + // attempting the fresh allocate. Otherwise an immediate count-mismatch + // (this call) plus a previously-queued release that is grace-elapsed + // would force the OffsetAllocator to find space for `new + old` bytes, + // even though the old bytes are safe to reuse — manifesting as a + // spurious "vertex/index pool exhausted" qWarning under live-edit on + // a near-capacity scene. The same `grace=2` invariant that + // sweepMeshSlabs uses is preserved here. + drainExpiredPendingReleases(current_frame, /*grace=*/2u); + + if(!m_vertexAllocator || !m_indexAllocator) + return nullptr; + + // Fresh allocation. ONE vertex slot (in vertex units) shared by + // positions/normals/texcoords/tangents, ONE index slot. + MeshSlab slab; + slab.stable_id = stable_id; + slab.vertex_count = vertex_count; + slab.index_count = index_count; + slab.freshly_allocated = true; + + if(vertex_count > 0) + { + slab.vertex_slot = m_vertexAllocator->allocate(vertex_count); + if(slab.vertex_slot.offset == OffsetAllocator::Allocation::NO_SPACE) + { + qWarning() << "GpuResourceRegistry::acquireMeshSlab: vertex pool " + "exhausted (requested" + << vertex_count << "verts; free" + << m_vertexAllocator->storageReport().totalFreeSpace + << "vertex slots). Skipping mesh stable_id=" + << qulonglong(stable_id); + return nullptr; + } + m_vertexSlotsUsed += vertex_count; + } + BUFTRACE() << "[MeshSlab] alloc id=" << qulonglong(stable_id) + << " vc=" << vertex_count << " ic=" << index_count + << " vSlot=" << slab.vertex_slot.offset + << " (used=" << m_vertexSlotsUsed << "/" << m_vertexSlotsCapacity + << ")"; + + if(index_count > 0) + { + slab.index_slot = m_indexAllocator->allocate(index_count); + if(slab.index_slot.offset == OffsetAllocator::Allocation::NO_SPACE) + { + qWarning() << "GpuResourceRegistry::acquireMeshSlab: index pool " + "exhausted (requested" + << index_count << "indices; free" + << m_indexAllocator->storageReport().totalFreeSpace + << "index slots). Skipping mesh stable_id=" + << qulonglong(stable_id); + // Roll back the vertex allocation we just made. + if(vertex_count > 0 + && slab.vertex_slot.metadata != OffsetAllocator::Allocation::NO_SPACE) + { + m_vertexAllocator->free(slab.vertex_slot); + if(m_vertexSlotsUsed >= vertex_count) + m_vertexSlotsUsed -= vertex_count; + } + return nullptr; + } + m_indexSlotsUsed += index_count; + } + + const auto [inserted_it, ok] = m_meshSlabs.emplace(stable_id, slab); + return ok ? &inserted_it->second : nullptr; +} + +void GpuResourceRegistry::markMeshSlabSeen( + uint64_t stable_id, uint32_t current_frame) noexcept +{ + auto it = m_meshSlabs.find(stable_id); + if(it != m_meshSlabs.end()) + it->second.last_seen_frame = current_frame; +} + +void GpuResourceRegistry::drainExpiredPendingReleases( + uint32_t current_frame, uint32_t grace) noexcept +{ + // Process the grace queue: any release submitted at least `grace` + // frames ago is safe to actually free from the OffsetAllocator now. + // The *Used trackers are NOT decremented here — the enqueue site + // (releaseMeshSlab / sweepMeshSlabs phase-1 / acquireMeshSlab's + // count-mismatch path) decrements eagerly so callers see "live + // slabs" as the footprint, not "live + grace-pending". + for(auto it = m_pendingReleases.begin(); it != m_pendingReleases.end();) + { + if(current_frame >= grace + && it->released_frame + grace <= current_frame) + { + BUFTRACE() << "[MeshSlab] free id=" << qulonglong(it->stable_id) + << " vSlot=" << it->vertex_slot.offset + << " iSlot=" << it->index_slot.offset + << " released_at=" << it->released_frame + << " current=" << current_frame; + if(m_vertexAllocator + && it->vertex_slot.metadata != OffsetAllocator::Allocation::NO_SPACE) + { + m_vertexAllocator->free(it->vertex_slot); + } + if(m_indexAllocator + && it->index_slot.metadata != OffsetAllocator::Allocation::NO_SPACE) + { + m_indexAllocator->free(it->index_slot); + } + it = m_pendingReleases.erase(it); + } + else + { + ++it; + } + } +} + +void GpuResourceRegistry::sweepMeshSlabs( + uint32_t current_frame, uint32_t grace) noexcept +{ + // Two-phase: move slabs past their grace into m_pendingReleases + // (carrying their vertex+index Allocations), then process already- + // pending releases whose grace has elapsed and actually free from + // the OffsetAllocators. + // + // The grace period guards against use-after-free: an + // indirect_draw_cmds entry issued last frame may still reference + // the slab's byte offset through an in-flight draw on the GPU. + // Waiting `grace >= FramesInFlight + 1` frames ensures the GPU is + // done with it. + for(auto it = m_meshSlabs.begin(); it != m_meshSlabs.end();) + { + // Underflow-safe comparison: if current_frame is less than grace, + // nothing is old enough yet. + if(current_frame >= grace + && it->second.last_seen_frame + grace <= current_frame) + { + // Eagerly decrement *Used trackers at logical-release time so + // the per-frame "live footprint" reflects active slabs only, + // not grace-pending ones. Phase-2 (drainExpiredPendingReleases) + // performs the OffsetAllocator::free without re-decrementing. + if(m_vertexAllocator + && it->second.vertex_slot.metadata + != OffsetAllocator::Allocation::NO_SPACE) + { + const auto sz + = m_vertexAllocator->allocationSize(it->second.vertex_slot); + if(m_vertexSlotsUsed >= sz) m_vertexSlotsUsed -= sz; + } + if(m_indexAllocator + && it->second.index_slot.metadata + != OffsetAllocator::Allocation::NO_SPACE) + { + const auto sz + = m_indexAllocator->allocationSize(it->second.index_slot); + if(m_indexSlotsUsed >= sz) m_indexSlotsUsed -= sz; + } + PendingRelease pr; + pr.stable_id = it->first; + pr.released_frame = current_frame; + pr.vertex_slot = it->second.vertex_slot; + pr.index_slot = it->second.index_slot; + m_pendingReleases.push_back(pr); + it = m_meshSlabs.erase(it); + } + else + { + ++it; + } + } + + drainExpiredPendingReleases(current_frame, grace); +} + +void GpuResourceRegistry::releaseMeshSlab( + uint64_t stable_id, uint32_t current_frame) noexcept +{ + auto it = m_meshSlabs.find(stable_id); + if(it == m_meshSlabs.end()) + return; + // Route through the pending-releases grace queue rather than freeing the + // OffsetAllocator sub-allocation immediately. The backing QRhiBuffer is + // long-lived; only the sub-allocation offset is guarded here. Freeing it + // at once would let the allocator hand the same offset out again this frame, + // producing a UAF for any in-flight GPU draw that still references it. + // sweepMeshSlabs() drains m_pendingReleases once released_frame + grace <= + // current_frame, matching QRhi's own deferred-release contract. + // + // Eagerly decrement *Used trackers at logical-release time (single + // decrement per slab; phase-2 drain does not re-decrement). + if(m_vertexAllocator + && it->second.vertex_slot.metadata + != OffsetAllocator::Allocation::NO_SPACE) + { + const auto sz + = m_vertexAllocator->allocationSize(it->second.vertex_slot); + if(m_vertexSlotsUsed >= sz) m_vertexSlotsUsed -= sz; + } + if(m_indexAllocator + && it->second.index_slot.metadata + != OffsetAllocator::Allocation::NO_SPACE) + { + const auto sz + = m_indexAllocator->allocationSize(it->second.index_slot); + if(m_indexSlotsUsed >= sz) m_indexSlotsUsed -= sz; + } + PendingRelease pr; + pr.stable_id = stable_id; + pr.released_frame = current_frame; + pr.vertex_slot = it->second.vertex_slot; + pr.index_slot = it->second.index_slot; + m_pendingReleases.push_back(pr); + m_meshSlabs.erase(it); +} + +uint32_t GpuResourceRegistry::meshSlabOffsetBytes( + const MeshSlab& slab, MeshStream stream) const noexcept +{ + // Single source of truth for per-stream byte offsets: + // vertex streams → vertex_slot.offset (in vertex units) × stride + // index stream → index_slot.offset (in index units) × 4 + // Independent allocators per stream would let these diverge, which + // would silently produce wrong attribute reads under fragmentation. + if(stream == MeshStream::Indices) + return slab.index_slot.offset + * kMeshStride[(std::size_t)MeshStream::Indices]; + return slab.vertex_slot.offset * kMeshStride[(std::size_t)stream]; +} + +QRhiBuffer* GpuResourceRegistry::meshStreamBuffer(MeshStream s) const noexcept +{ + return m_meshStreams[(std::size_t)s].buffer; +} + +void GpuResourceRegistry::uploadMeshStream( + QRhiResourceUpdateBatch& res, const MeshSlab& slab, + MeshStream s, const void* data, uint32_t size) noexcept +{ + auto& stream = m_meshStreams[(std::size_t)s]; + if(!stream.buffer || !data || size == 0) + return; + const uint32_t offset = meshSlabOffsetBytes(slab, s); + // Guard against out-of-bounds writes. Slab capacity in bytes: + // vertex streams: vertex_count × stride + // index stream: index_count × 4 + const uint32_t slot_capacity_bytes + = (s == MeshStream::Indices) + ? slab.index_count * kMeshStride[(std::size_t)MeshStream::Indices] + : slab.vertex_count * kMeshStride[(std::size_t)s]; + if(size > slot_capacity_bytes) + { + qWarning() << "GpuResourceRegistry::uploadMeshStream: upload" << size + << "bytes exceeds slab capacity" << slot_capacity_bytes + << "(stream" << (int)s << ")"; + return; + } + if(offset + size > stream.capacity_bytes) + { + qWarning() << "GpuResourceRegistry::uploadMeshStream: upload offset+size" + << (offset + size) << "exceeds stream capacity" + << stream.capacity_bytes << "(stream" << (int)s << ")"; + return; + } + res.uploadStaticBuffer(stream.buffer, offset, size, data); +} + +uint32_t GpuResourceRegistry::meshStreamUsedBytes(MeshStream s) const noexcept +{ + if(s == MeshStream::Indices) + return m_indexSlotsUsed * kMeshStride[(std::size_t)MeshStream::Indices]; + return m_vertexSlotsUsed * kMeshStride[(std::size_t)s]; +} + +uint32_t GpuResourceRegistry::meshStreamFreeBytes(MeshStream s) const noexcept +{ + if(s == MeshStream::Indices) + { + if(!m_indexAllocator) return 0u; + return m_indexAllocator->storageReport().totalFreeSpace + * kMeshStride[(std::size_t)MeshStream::Indices]; + } + if(!m_vertexAllocator) return 0u; + return m_vertexAllocator->storageReport().totalFreeSpace + * kMeshStride[(std::size_t)s]; +} + +} // namespace score::gfx diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/GpuResourceRegistry.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/GpuResourceRegistry.hpp new file mode 100644 index 0000000000..93a6d7259d --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/GpuResourceRegistry.hpp @@ -0,0 +1,796 @@ +#pragma once + +#include + +#include // ossia::gpu_slot_ref +#include +#include + +#ifndef OFFSETALLOCATOR_HPP_2026_04_24 +#define OFFSETALLOCATOR_HPP_2026_04_24 +#include +#endif + +#include + +#include +#include +#include +#include + +namespace score::gfx +{ +class RenderList; + +/** + * @brief Per-RenderList arena store for GPU-resident scene data. + * + * Owns one QRhiBuffer per well-known arena kind (camera UBO, light SSBO, + * material SSBO, per-draw SSBO, …) and hands out offset-based slots via + * a fixed-stride free-list. Source nodes (Camera, Light, PBRMesh, …) hold + * a slot for their lifetime and write their packed bytes into it at + * their own `update()`; the preprocessor binds the registry's buffers as + * scene auxiliaries. No CPU→GPU work happens in the preprocessor's render + * path — every upload is gated to a source-node message. + * + * Phase 1: scalar UBO / SSBO arenas only. Texture-array layer + * allocation (baseColorArray, metalRoughArray, …) stays inside the + * existing ScenePreprocessor::ChannelState for now; it will migrate into + * this registry in a later pass. + * + * Lifetime: created on RenderList::init, destroyed on RenderList::release. + * Not thread-safe — all calls must come from the render thread. + */ +class SCORE_PLUGIN_GFX_EXPORT GpuResourceRegistry +{ +public: + // Well-known arenas. Size tables live in GpuResourceRegistry.cpp and + // match the packed GPU layouts declared in SceneGPUState.hpp + + // CameraMath.hpp. Extend the enum carefully — every entry implies a + // QRhiBuffer allocation at init time. + // + // The Raw* arenas are written by source halp nodes (Camera, Light, + // Transform3D, …) at their own operator()() time — view-independent, + // aspect-ratio-agnostic, pre-composition. The Cooked arenas (Camera, + // Light, PerDraw, WorldTransform) are populated by ScenePreprocessor's + // transform passes that combine Raw inputs with the current render + // target's aspect ratio and the scene-graph parent-slot chain. + // Consumer shaders bind the Cooked arenas. Material and Env are + // raw == cooked — they have no scene-composition dependency, so + // source nodes write directly into the cooked slot without a + // separate raw stage. + enum class Arena : uint8_t + { + // ── Shared / source-authored ────────────────────────────────── + // These arenas hold view- and filter-independent bytes: every + // preprocessor reads the same data regardless of its camera / + // render target / upstream scene filtering. The producer owns the + // slot; multiple preprocessors consume via gpu_slot_ref + isLive(). + RawCamera, // RawCameraData — 64 B per slot, UBO + RawLight, // RawLightData — 64 B per slot, SSBO + RawTransform, // RawLocalTransform — 64 B per slot, SSBO + Material, // MaterialGPU — 64 B per slot, SSBO + Env, // EnvParamsUBO — 64 B per slot, UBO + + // Cooked outputs (camera UBOs, composed world matrices, per-draw + // structs, LightGPU with world-direction, MaterialGPU with resolved + // textureRefs) are preprocessor-PRIVATE and live in each + // ScenePreprocessorNode's own QRhiBuffers — they're view- and + // filter-dependent, so a shared arena would be incorrect when two + // preprocessors see different filtered views of the same source. + + Count_ + }; + + // Fixed-stride slot. The arena buffer is laid out as a packed array of + // stride-byte slots: slot i lives at byte offset i * stride. The slot + // index is the arena-level identity that consumer shaders use to + // address the slot as `scene_materials.entries[slot_index]` (std430 + // stride = sizeof(MaterialGPU)), `scene_lights.entries[slot_index]`, + // etc. Allocations are O(1) via a free-list stack; no bucket / bitmap + // fragmentation. Trades OffsetAllocator's variable-size flexibility + // for (a) shader-indexable layout and (b) a predictable 1:1 mapping + // between internal_index and byte offset — critical for direct arena + // reads without a per-draw offset-translation table. + struct Slot + { + static constexpr uint32_t kInvalidIndex = 0xFFFFFFFFu; + + Arena arena{Arena::RawCamera}; + uint32_t slot_index{kInvalidIndex}; + uint32_t size{0}; // requested payload size (≤ arena stride) + uint32_t generation{}; // stamped on allocate; bumps on free + + bool valid() const noexcept { return slot_index != kInvalidIndex; } + }; + + GpuResourceRegistry() = default; + GpuResourceRegistry(const GpuResourceRegistry&) = delete; + GpuResourceRegistry& operator=(const GpuResourceRegistry&) = delete; + ~GpuResourceRegistry(); + + /** + * @brief Create the arena buffers. Must be called before any allocate(). + * + * Per-arena capacity is fixed at init time (grow-in-place reallocation + * is a follow-up). If an arena runs out of room, allocate() returns + * an invalid Slot and logs a warning. + * + * Persist-across-rebuild contract: the registry now lives on the + * OutputNode and survives RenderList rebuilds (e.g. viewport resize). + * The owning OutputNode lazy-calls init() exactly once for a given + * QRhi lifetime. Subsequent createRenderList calls reuse the registry + * as-is (texture arrays, mesh slabs, arena slot generations all + * preserved). Use isInitialized() to detect "registry already up". + */ + void init(QRhi& rhi, QRhiResourceUpdateBatch& batch); + + /** + * @brief True if init() has been called and destroyOwned()/destroy() + * has not. Used by RenderList::init to gate the (otherwise asserting) + * init() call when the registry is being reused across an RL rebuild. + */ + bool isInitialized() const noexcept { return m_rhi != nullptr; } + + /** + * @brief QRhi this registry was init()'d against. Null when not + * initialised. The owning OutputNode uses this to decide whether + * the registry is still bound to its QRhi (vs. a fresh QRhi created + * after a setSwapchainFormat-style teardown). + */ + QRhi* boundRhi() const noexcept { return m_rhi; } + + /** + * @brief Seed reserved arena slots with sensible defaults. + * + * Called by the owning RenderList after init() and after the initial + * resource-update batch is ready. Currently writes a default + * white-dielectric MaterialGPU into Material arena slot 0 — the slot + * `arenaSlotForMaterial(nullptr)` returns when a draw has no + * material assigned (e.g. a Primitive cube with the user never + * having dropped a Material node on it). Without this seed, slot 0 + * carries whatever bytes the previous registered material left + * behind, producing the confusing "every unmaterialed mesh is red + * because the first registered material was red" symptom. + * + * Idempotent — second call is a no-op once @c m_defaults_seeded is + * set. + */ + void seedDefaults(QRhiResourceUpdateBatch& batch); + + /** + * @brief Destroy the arena buffers via the owning RenderList. + * + * Every arena QRhiBuffer is routed through @c RenderList::releaseBuffer + * so the RenderList's bookkeeping sees the release and any other path + * that still holds a pointer to the buffer can't accidentally double- + * free it. Prefer this overload; call it from RenderList::release() + * before the QRhi teardown. + */ + void destroy(RenderList& renderer); + + /** + * @brief Destructor fallback — buffers are nulled without touching the + * QRhi. Only safe when @ref destroy(RenderList&) has already run (or + * when the QRhi has already torn them down as children). Leaks the + * QRhiBuffer wrappers otherwise; that's the lesser evil vs. a + * use-after-free in the common "QRhi already dead" path. + */ + void destroy(); + + /** + * @brief Tear down arena buffers + texture arrays + mesh streams + * directly (no RenderList plumbing). Called by the owning OutputNode + * when its QRhi is about to be destroyed (destroyOutput, ~OutputNode). + * + * Persist-across-rebuild contract: the registry survives across RL + * rebuilds (RenderList::release is a no-op for the registry now), so + * the QRhi-routed teardown that used to happen in destroy(RenderList&) + * has no live RenderList to run through any more. We `delete` the + * QRhiBuffer / QRhiTexture / QRhiSampler wrappers directly: the QRhi + * is still alive at this call site (callers MUST invoke this BEFORE + * RenderState::destroy() / setSwapchainFormat-style teardown), so the + * destructors free both the wrapper and the underlying GPU resource + * cleanly. After this call the registry is back to its pre-init() + * state and can be re-init()'d against a new QRhi. + */ + void destroyOwned(); + + /** + * @brief Reserve a slot in the given arena for @p size bytes. + * @return invalid Slot on OOM. Caller must check Slot::valid(). + */ + Slot allocate(Arena arena, uint32_t size); + + /** + * @brief Return the slot to the free list. Safe to call with invalid Slot. + */ + void free(Slot& slot); + + /** + * @brief Buffer underlying an arena. Null until init(). + * + * Downstream consumers (preprocessor, rasterizer SRBs) bind this buffer + * with the slot offset + size from Slot. + */ + QRhiBuffer* buffer(Arena arena) const noexcept; + + /** + * @brief Byte offset of a slot inside its arena's buffer. + */ + uint32_t slotOffset(const Slot& slot) const noexcept; + + /** + * @brief Byte stride of the arena — every slot is this many bytes. + * Consumer shaders index `arena.entries[slot_index]` where entries[] + * has std430 stride equal to this value. + */ + uint32_t arenaSlotStride(Arena arena) const noexcept; + + /** + * @brief Slot capacity of the arena (number of slots, not bytes). + */ + uint32_t arenaSlotCount(Arena arena) const noexcept; + + /** + * @brief Upload @p size bytes starting at @p data into a slot. + * + * Thin wrapper around `QRhiResourceUpdateBatch::updateDynamicBuffer` + * (for Dynamic-usage arenas) or `uploadStaticBuffer` (Static). + * Called by source nodes in their `update()` when their content + * changes — never per frame for unchanged data. + */ + void updateSlot( + QRhiResourceUpdateBatch& res, const Slot& slot, const void* data, + uint32_t size) noexcept; + + /** + * @brief Produce an ossia::gpu_slot_ref that can be stamped on a + * scene-graph component for the downstream preprocessor to consume. + * + * The returned ref captures (arena tag, offset, size, internal slot + * index, generation). The preprocessor uses isLive() to validate it + * before reading GPU bytes. + */ + ossia::gpu_slot_ref toOssiaRef(const Slot& slot) const noexcept + { + if(!slot.valid()) + return {}; + ossia::gpu_slot_ref r; + r.arena = (uint32_t)slot.arena; + r.offset = slotOffset(slot); + r.size = slot.size; + r.internal_index = slot.slot_index; + r.generation = slot.generation; + return r; + } + + /** + * @brief Return true if the ref still points at a live allocation. + * + * O(1): one array access + one uint32 compare. The generation table + * is bumped on every allocate() and free(), so a ref from a prior + * allocation at the same slot index fails the compare. + */ + bool isLive(const ossia::gpu_slot_ref& r) const noexcept + { + if(r.arena >= (uint32_t)Arena::Count_ || r.size == 0) + return false; + const auto& a = m_arenas[r.arena]; + if(r.internal_index >= a.slot_generations.size()) + return false; + return a.slot_generations[r.internal_index] == r.generation; + } + + // ─── Material texture arrays ────────────────────────────────────── + // + // Per-channel static texture arrays shared across all preprocessors + // in this RenderList. Static textures dedup by texture_source pointer + // — every producer that references the same asset gets the same + // layer. Dynamic handles (video textures, runtime GPU outputs) get + // per-slot bindings in the `dynamicTextures` vector — the bound + // aux-texture name is `Dyn` in consumer shaders. + // + // Source-authored by nature: the textures belong to an asset / a + // wired GPU handle, independent of which preprocessor is looking. + // Shared state avoids re-decoding + re-uploading the same JPEG for + // every preprocessor. + + enum class TextureChannel : uint8_t + { + BaseColor = 0, + MetalRough = 1, + Normal = 2, + Emissive = 3, + Occlusion = 4, // Separate glTF occlusionTexture (when distinct from MR). + Count_ = 5 + }; + + // Default layer size + max dynamic slots. Matched across channels so + // samplers are interchangeable and consumer shaders can declare a + // fixed sampler count. + static constexpr int kTextureLayerSize = 1024; + // Bumped from 2 to 4: with LRU eviction in place the cap matters less + // (recycled slots stay fresh), but a higher floor reduces churn in + // scenes that legitimately use 3-4 distinct dynamic textures per + // channel (multi-camera capture, layered video). Stays comfortably + // under the 16-samplers-per-stage RHI floor at 4 channels × 4 slots + // + static arrays + skybox/IBL. + static constexpr int kMaxDynamicSlots = 4; + + // Wave 2 S2-shader: per-channel static buckets. Each bucket holds + // textures of ONE (format, pixelSize) tuple. Distinct tuples go into + // distinct buckets; consumer shaders declare N `sampler2DArray`s per + // channel and switch on the bucket field decoded from + // MaterialGPU::textureRefs (see tex_ref_static in SceneGPUState.hpp). + // + // Runtime cap is 16 (kMaxBuckets), chosen to stay within Vulkan's + // default VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER pool budget: 5 + // channels × 16 buckets + ~10 dynamic slots ≈ 90 samplers per + // pipeline, well under 256. Real scenes typically need 1-3 buckets + // per channel. Shader sampler arrays in classic_pbr_full.frag MUST + // stay in sync (baseColorArray0..baseColorArray15 etc). + // + // The tex_ref_static encoding (SceneGPUState.hpp:74) reserves a 7-bit + // bucket field (0..127), giving headroom to grow kMaxBuckets up to 128 + // without changing the packed layout or shader decode masks. Growing + // beyond 16 requires enlarging the shader array declarations and + // verifying the descriptor pool budget on the target backend. + // + // GLES 3.1 / WebGL 2 guarantee only 16 textures per stage; those + // targets need a reduced-bucket preset variant (follow-up). + // + // Small scenes pay nothing: buckets are allocated lazily as texture + // uploads discover new (format, size) combinations. + static constexpr int kMaxBuckets = 16; + + /** + * @brief Channel texture state with multi-bucket support. + * + * The MaterialGPU::textureRefs[] encoding is + * `source:2 | bucket:7 | layer:23` — the 7-bit bucket field + * addresses up to 128 distinct (format, pixelSize) tuples in the + * encoding; the runtime cap is kMaxBuckets (currently 16). Wave 1 of the rollout (Plan 09 S2-infra) keeps exactly + * ONE bucket live per channel: same shipping behaviour as the + * pre-refactor single-array path, shaders unchanged. Wave 2 + * (S2-shader) lifts the cap — the preprocessor allocates a new + * bucket when a texture of a new (format, pixelSize) appears, and + * shipped shaders grow a bucket-switch ladder in sample_slot(). + * + * The Bucket struct holds everything that used to be at channel + * scope (QRhiTexture*, layers, layerMap) plus the discriminating + * (format, pixelSize) tuple. Dynamic (runtime-GPU) slots stay at + * channel scope — they carry opaque QRhiTexture*s with no + * canonical format/size, so no sensible bucket to live in. + */ + struct TextureChannelState + { + struct Bucket + { + QRhiTexture* array{}; // QRhiTexture::TextureArray + channel flags + QRhiTexture::Format format{QRhiTexture::RGBA8}; + QSize pixelSize; // all layers in a bucket share this size + int layers{}; // current layer count + + // Per-bucket sampler config. Bucket key extended to include this: + // distinct (format, size, sampler_config) tuples land in distinct + // buckets so per-glTF-texture wrap/filter modes are honoured even + // when multiple materials share a channel array. + ossia::texture_sampler_config sampler_config{}; + QRhiSampler* sampler{}; // created on first allocation; owned + + // Dedup: texture_source shared_ptr pointer → layer index in + // this bucket's `array`. Append-only within a materials list; + // cleared when the list changes. + ossia::flat_map layerMap; + }; + + // Wave 1 invariant: buckets.size() <= 1. Wave 2: up to 64. + std::vector buckets; + + // Dynamic (runtime-GPU) slot map. Keyed by the QRhi-assigned + // globally-unique resource id (`QRhiResource::globalResourceId()`, + // monotonic uint64) rather than the raw `QRhiTexture*` pointer. + // The system allocator is allowed to recycle freed pointer values, + // and qrhivulkan.cpp:5909-5912 explicitly documents the same hazard + // for SRB tracking — keying by the stable id makes a recycled + // address always look like a fresh resource here too. + // + // Slots are recycled via LRU eviction: when the map fills up and a + // new texture id arrives, the slot with the smallest dynamicSlotLastUse + // counter is evicted to make room. Without the eviction path, a long + // session with any resolution-changing producer (window-capture, NDI + // source-switch, video file resolution change mid-stream) hit the + // 2-slot cap after two distinct globalResourceIds and every subsequent + // texture returned -1 → tex_ref_none() (material's dynamic texture + // silently blanks). LRU bumps lastUse on every access so the evicted + // slot is always the one no longer referenced by any active material. + ossia::flat_map dynamicSlotMap; + std::vector dynamicTextures; // slot idx → texture + std::vector dynamicSlotLastUse; // slot idx → access counter at last lookup + uint64_t dynamicSlotCounter{0}; // monotonic, bumped on each resolve + + // Wave-1 shims. Callers that haven't been updated to loop over + // buckets[] go through these for legacy single-bucket semantics. + // Returns null / 0 when no bucket has been allocated yet. + QRhiTexture* primaryArray() const noexcept + { + return buckets.empty() ? nullptr : buckets[0].array; + } + int primaryLayers() const noexcept + { + return buckets.empty() ? 0 : buckets[0].layers; + } + + // Access or lazily create bucket 0 with an owned (format, size). + // Kept for init-time fallback allocation only — production code + // goes through findOrCreateBucket() which selects the right bucket + // for the texture's actual (format, size). + Bucket& ensurePrimary(QRhiTexture::Format fmt, QSize sz) + { + if(buckets.empty()) + buckets.emplace_back(); + auto& b = buckets[0]; + b.format = fmt; + b.pixelSize = sz; + return b; + } + + // Find a bucket matching (fmt, sz); create a new one if none + // matches and we haven't hit kMaxBuckets. Returns `{bucket_index, + // pointer}`. On overflow returns `{-1, nullptr}` — caller must + // handle (typically emits a warning + `tex_ref_none`). + // + // Bucket identity is the exact (format, pixelSize) tuple — no + // rounding. Most real scenes have < 4 distinct tuples per + // channel; a Sponza-size asset mix sits comfortably at 2-3. + std::pair + findOrCreateBucket(QRhiTexture::Format fmt, QSize sz) + { + for(std::size_t i = 0; i < buckets.size(); ++i) + { + if(buckets[i].format == fmt && buckets[i].pixelSize == sz) + return {(int)i, &buckets[i]}; + } + if((int)buckets.size() >= kMaxBuckets) + return {-1, nullptr}; + buckets.emplace_back(); + auto& b = buckets.back(); + b.format = fmt; + b.pixelSize = sz; + return {(int)buckets.size() - 1, &b}; + } + + // Sampler-config-aware variant. Bucket key = (format, pixelSize, + // sampler_config). Used by the glTF path so a scene with mixed + // wrap modes (e.g., a tiled floor with REPEAT plus a UI element + // with CLAMP_TO_EDGE) splits across buckets, each with its own + // QRhiSampler. Falls back to the simpler 2-tuple variant when + // sampler config is the default (no need to fragment buckets if + // every texture uses the same sampler). + std::pair + findOrCreateBucket( + QRhiTexture::Format fmt, QSize sz, + const ossia::texture_sampler_config& sampler_cfg) + { + for(std::size_t i = 0; i < buckets.size(); ++i) + { + if(buckets[i].format == fmt && buckets[i].pixelSize == sz + && buckets[i].sampler_config == sampler_cfg) + return {(int)i, &buckets[i]}; + } + if((int)buckets.size() >= kMaxBuckets) + return {-1, nullptr}; + buckets.emplace_back(); + auto& b = buckets.back(); + b.format = fmt; + b.pixelSize = sz; + b.sampler_config = sampler_cfg; + return {(int)buckets.size() - 1, &b}; + } + }; + + /** + * @brief Shared state for one of the four PBR texture channels. + * Preprocessors / producers read-modify this in place; contents are + * view-independent (asset identity drives layer assignment) so + * sharing across preprocessors is correct. + */ + TextureChannelState& textureChannel(TextureChannel ch) noexcept + { + return m_textureChannels[(std::size_t)ch]; + } + const TextureChannelState& textureChannel(TextureChannel ch) const noexcept + { + return m_textureChannels[(std::size_t)ch]; + } + + /** + * @brief Shader-visible aux-texture name for a channel's static array + * (`baseColorArray`, `metalRoughArray`, `normalArray`, `emissiveArray`). + */ + static const char* textureChannelArrayName(TextureChannel ch) noexcept; + + /** + * @brief Shader-visible aux-texture name base for a channel's dynamic + * slots (`baseColorDyn`, `metalRoughDyn`, `normalDyn`, `emissiveDyn`). + * Full name is ``, slot_index < kMaxDynamicSlots. + */ + static const char* textureChannelDynBaseName(TextureChannel ch) noexcept; + + /** + * @brief QRhiTexture creation flags for a channel. sRGB channels + * (base color, emissive) get hardware sRGB→linear on sample; MR and + * normal stay linear. + */ + static QRhiTexture::Flags textureChannelFlags(TextureChannel ch) noexcept; + + /** + * @brief Register a runtime GPU texture handle for this channel's + * dynamic-slot set. Returns the slot index (0 .. kMaxDynamicSlots-1) + * or -1 if the slot cap is exhausted. + * + * Slot assignment is persistent across frames — once a handle is in + * the map, it keeps its slot until the registry is destroyed. This + * ordering-free property lets multiple producers AND the + * preprocessor all call resolveDynamicSlot concurrently within a + * frame and agree on the same answer for the same handle. + * + * The ~6-handle cap (4 channels × kMaxDynamicSlots ≈ 8 slots + * registry-wide) is fine for the common case of 1-2 live + * per-channel dynamic textures; more elaborate eviction (LRU, + * explicit release from producer teardown) is a future concern + * when the first real 3+-handle scene shows up. + */ + int resolveDynamicSlot(TextureChannel channel, void* native_handle) noexcept; + + // ─── Mesh arena manager (Plan 09 S4, post-fix) ─────────────────── + // + // Per-mesh slab allocator over the 5 attribute streams of the MDI + // concatenated geometry: positions, normals, texcoords, tangents, + // indices. Each stream is a single growth-capped QRhiBuffer. + // + // CRITICAL invariant for indirect-draw correctness: a single + // `baseVertex` value is applied to ALL vertex bindings by the GPU + // (see VkDrawIndexedIndirectCommand::vertexOffset). So per-mesh + // byte offsets across vertex streams MUST satisfy + // pos_byte_off = baseVertex * 16 + // nrm_byte_off = baseVertex * 16 + // uv_byte_off = baseVertex * 8 + // tan_byte_off = baseVertex * 16 + // Original design used 5 INDEPENDENT OffsetAllocators (one per + // stream). For sequential allocations from a fresh pool that holds, + // but as soon as alloc/free traffic fragments the streams the + // per-stream allocators pick free blocks of different size-bins and + // the offsets diverge → vertex shader reads attribute[v] from the + // wrong slab → garbage normals (back-face cull → mesh disappears), + // 1-pixel-wide texcoord smear, etc. + // + // Fixed design: TWO shared allocators — + // * `m_vertexAllocator` in VERTEX units (cap = 8M vertex slots) + // * `m_indexAllocator` in INDEX units (cap = 8M index slots) + // Each slab carries one `vertex_slot` and one `index_slot`. Per- + // stream byte offsets are derived as `vertex_slot.offset * stride` + // and `index_slot.offset * 4`. Lockstep is structurally guaranteed. + // + // Cache: stable_id hit → reuse slab, skip upload. Miss → fresh + // allocation. Sweep frees slabs unseen for `grace` frames. + // + // Backing buffer sizes (pointer-stable across the registry's + // lifetime; downstream bindings resolve once): + // positions / normals / tangents 128 MB (8M verts × 16 B) + // texcoords 64 MB (8M verts × 8 B) + // indices 32 MB (8M idx × 4 B) + // + // Indirect draw: `baseVertex = vertex_slot.offset`, + // `firstIndex = index_slot.offset`. + + enum class MeshStream : uint8_t + { + Positions = 0, + Normals = 1, + Texcoords = 2, // TEXCOORD_0 (primary UV). + Tangents = 3, + Colors = 4, // glTF COLOR_0, vec4 (vec3 sources padded with alpha=1). + Texcoords1 = 5, // glTF TEXCOORD_1 (lightmap / secondary UV). + Indices = 6, + Count_ = 7 + }; + + // Bytes per element per stream. Matches the MDI output layout + // the existing rasterizer presets consume: + // positions/normals = vec3 padded to vec4 (std430 alignment). + // tangents = vec4. + // colors = vec4 (vec3 sources padded with alpha=1). + // texcoords[_1] = vec2. + // indices = uint32. + static constexpr uint32_t kMeshStride[(std::size_t)MeshStream::Count_] + = {16, 16, 8, 16, 16, 8, 4}; + + // Bytes of capacity reserved per stream at init time. These are the + // "kMinCap" pre-sizing budgets — generous enough to avoid realloc + // churn on normal scene growth. If a scene exceeds these, allocate() + // returns a sentinel allocation and the caller skips the mesh. + // + // 128 MB positions × 16B stride = 8M verts. + // 128 MB normals/tangents/colors matches. + // 64 MB texcoords (8B) = 8M verts. + // 64 MB texcoords1 matches. + // 32 MB indices (4B) = 8M indices. + static constexpr uint32_t kMeshCapBytes[(std::size_t)MeshStream::Count_] + = { + 128u * 1024u * 1024u, + 128u * 1024u * 1024u, + 64u * 1024u * 1024u, + 128u * 1024u * 1024u, + 128u * 1024u * 1024u, // colors + 64u * 1024u * 1024u, // texcoords1 + 32u * 1024u * 1024u, + }; + + /** + * @brief Slab handle returned by MeshArenaManager::acquire. + * + * One per mesh (keyed on stable_id). Holds ONE vertex-unit allocation + * (shared across positions / normals / texcoords / tangents) and ONE + * index-unit allocation. Per-stream byte offsets are derived in + * meshSlabOffsetBytes() as `vertex_slot.offset * stride` / + * `index_slot.offset * 4`. This guarantees baseVertex consistency + * across all vertex bindings even after fragmentation — see the + * "CRITICAL invariant" block above. + * + * `last_seen_frame` is bumped each frame the owner calls + * markSeen(); sweep() frees slabs whose last_seen is older than + * `current_frame - grace`. Grace = FramesInFlight + 1 is the + * safe default (let in-flight draws finish). + */ + struct MeshSlab + { + uint64_t stable_id{}; + OffsetAllocator::Allocation vertex_slot{}; // offset/size in vertex units + OffsetAllocator::Allocation index_slot{}; // offset/size in index units + uint32_t vertex_count{}; + uint32_t index_count{}; + uint32_t last_seen_frame{}; + bool freshly_allocated{}; // true on the frame the slab was created + }; + + /// Acquire a slab for a mesh. Returns an existing slab on stable_id + /// hit (zero-cost, no upload needed); allocates fresh on miss. + /// Returns nullptr on allocator exhaustion. + /// + /// `freshly_allocated` on the returned slab signals "caller must + /// upload the mesh's bytes via uploadMeshStream(...)". + /// + /// `current_frame` is required so that the count-mismatch grace-queue + /// enqueue stamps a real release frame (not 0). Without it, after the + /// first `grace` frames of the session every count-mismatch deferred + /// release is freed instantly on the very next sweep, defeating the + /// guard against in-flight GPU draws referencing the old offset. + MeshSlab* acquireMeshSlab( + uint64_t stable_id, + uint32_t vertex_count, + uint32_t index_count, + uint32_t current_frame) noexcept; + + /// Mark a slab as seen this frame so sweep() doesn't reclaim it. + void markMeshSlabSeen(uint64_t stable_id, uint32_t current_frame) noexcept; + + /// Release slabs whose `last_seen_frame < current_frame - grace`. + /// Grace defaults to 2 (covers FramesInFlight+1 on typical backends). + void sweepMeshSlabs(uint32_t current_frame, uint32_t grace = 2) noexcept; + + /// Free pending-release slabs whose `released_frame + grace <= current_frame` + /// from the OffsetAllocator. Called by `sweepMeshSlabs` (phase-2) and by + /// `acquireMeshSlab` *before* its fresh allocate, so a count-mismatch whose + /// previous slot has served its grace can recycle that capacity in the same + /// `update()` instead of triggering a spurious "pool exhausted" warning. + /// Does not touch the *SlotsUsed trackers — those are decremented eagerly at + /// logical-release time (enqueue) so phase-2 free is purely allocator + /// bookkeeping. + void drainExpiredPendingReleases( + uint32_t current_frame, uint32_t grace = 2) noexcept; + + /// Explicit release of a slab by stable_id (used on scene teardown). + /// The release is enqueued into the pending-releases grace queue and freed + /// from the OffsetAllocator only after `grace` frames have elapsed, matching + /// the same contract as sweepMeshSlabs. Pass the current render-frame counter + /// so the sweep can determine when it is safe to reclaim the sub-allocation. + void releaseMeshSlab(uint64_t stable_id, uint32_t current_frame) noexcept; + + /// Byte offset of a stream within its backing buffer. Use directly + /// as `uploadStaticBuffer(buf, offset, size, data)`. + uint32_t meshSlabOffsetBytes( + const MeshSlab& slab, MeshStream stream) const noexcept; + + /// Backing QRhiBuffer for a stream. Stable pointer across the + /// registry's lifetime (pre-sized, never grown). + QRhiBuffer* meshStreamBuffer(MeshStream s) const noexcept; + + /// Upload CPU bytes into a slab's stream. Thin wrapper around + /// QRhiResourceUpdateBatch::uploadStaticBuffer at the slab's + /// computed offset. + void uploadMeshStream( + QRhiResourceUpdateBatch& res, const MeshSlab& slab, + MeshStream s, const void* data, uint32_t size) noexcept; + + /// Total bytes in use per stream (for S6 telemetry panel). + uint32_t meshStreamUsedBytes(MeshStream s) const noexcept; + uint32_t meshStreamFreeBytes(MeshStream s) const noexcept; + +private: + struct ArenaState + { + QRhiBuffer* buffer{}; + uint32_t slot_stride{0}; // bytes per slot (arena layout is a packed + // std430-compatible array of this stride) + uint32_t slot_count{0}; // total slots (capacity_bytes = stride × count) + QRhiBuffer::UsageFlags usage{}; + QRhiBuffer::Type type{QRhiBuffer::Dynamic}; + + // LIFO stack of free slot indices. Push on free, pop on allocate. + // O(1) alloc / free, no fragmentation (every slot is the same size). + std::vector free_slots; + + // Per-slot generation, indexed by slot_index. Sized to slot_count + // at init() and bumped on every allocate()/free() to that slot. + // Consumers check the stamped generation in their gpu_slot_ref via + // isLive(). + std::vector slot_generations; + }; + + std::array m_arenas{}; + + std::array + m_textureChannels{}; + + // Per-stream backing buffers (one QRhiBuffer per attribute). + // Allocations are NOT per-stream anymore: a single shared + // m_vertexAllocator hands out vertex-unit slots that all four + // vertex streams (positions/normals/texcoords/tangents) interpret + // through their own stride, and m_indexAllocator handles indices. + // This keeps per-stream byte offsets in lockstep — required for + // indirect-draw baseVertex correctness across fragmentation. + struct MeshStreamState + { + QRhiBuffer* buffer{}; + uint32_t capacity_bytes{}; + QRhiBuffer::UsageFlags usage{}; + }; + std::array m_meshStreams{}; + + // Shared vertex / index allocators (slot units, not bytes). + // capacity_slots = min(stream_capacity_bytes / stream_stride) across + // the four vertex streams = 8M for the default sizes; index pool + // capacity = 8M slots. + std::unique_ptr m_vertexAllocator; + std::unique_ptr m_indexAllocator; + uint32_t m_vertexSlotsCapacity{}; + uint32_t m_indexSlotsCapacity{}; + uint32_t m_vertexSlotsUsed{}; + uint32_t m_indexSlotsUsed{}; + + ossia::hash_map m_meshSlabs; + + // Slabs whose `released_frame` is set are waiting out the grace + // period before their OffsetAllocator allocations return to the + // free list. Prevents use-after-free when an in-flight draw still + // references the old offset. + struct PendingRelease + { + uint64_t stable_id{}; + uint32_t released_frame{}; + OffsetAllocator::Allocation vertex_slot{}; + OffsetAllocator::Allocation index_slot{}; + }; + std::vector m_pendingReleases; + + QRhi* m_rhi{}; + + // Set by seedDefaults() after writing the default-MaterialGPU bytes + // into Material arena slot 0. Idempotent guard so repeated calls are + // free. + bool m_defaults_seeded{false}; +}; + +} // namespace score::gfx diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/GpuTiming.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/GpuTiming.cpp new file mode 100644 index 0000000000..0a65ef5e9f --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/GpuTiming.cpp @@ -0,0 +1,111 @@ +#include + +#include + +namespace score::gfx +{ + +void GpuTimings::record(std::string_view name, double ms) noexcept +{ + // Samples of 0 typically mean "backend doesn't support timestamps" or + // "resolved value not yet available" — don't pollute the rolling + // mean with those. An explicit clear happens via reset(). + if(ms <= 0.0) + return; + + std::lock_guard lk{m_mutex}; + + auto it = std::find_if( + m_entries.begin(), m_entries.end(), + [&](const Entry& e) { return e.name == name; }); + + if(it == m_entries.end()) + { + Entry e; + e.name.assign(name); + e.history.fill(0.0); + e.last_ms = ms; + e.mean_ms = ms; + e.max_ms = ms; + e.history[0] = ms; + e.history_index = 1 % kHistorySize; + e.sample_count = 1; + e.frames_since_observed = 0; + m_entries.push_back(std::move(e)); + return; + } + + // Ring-buffer update + rolling mean + max over the window. + it->last_ms = ms; + it->history[it->history_index] = ms; + it->history_index = (it->history_index + 1) % kHistorySize; + if(it->sample_count < kHistorySize) + ++it->sample_count; + it->frames_since_observed = 0; + + double sum = 0.0; + double m = 0.0; + for(int i = 0; i < it->sample_count; ++i) + { + const double v = it->history[i]; + sum += v; + m = std::max(m, v); + } + it->mean_ms = sum / double(it->sample_count); + it->max_ms = m; +} + +void GpuTimings::tickFrame() noexcept +{ + std::lock_guard lk{m_mutex}; + for(auto& e : m_entries) + ++e.frames_since_observed; + + // Drop entries not observed for a while — nodes get reconfigured, + // passes come and go, keeping stale ghosts in the panel is noise. + m_entries.erase( + std::remove_if( + m_entries.begin(), m_entries.end(), + [](const Entry& e) { + return e.frames_since_observed > kStaleThreshold; + }), + m_entries.end()); +} + +std::vector GpuTimings::snapshot() const +{ + std::lock_guard lk{m_mutex}; + return m_entries; +} + +void GpuTimings::reset() noexcept +{ + std::lock_guard lk{m_mutex}; + m_entries.clear(); +} + +ScopedGpuTimer::ScopedGpuTimer( + QRhiCommandBuffer& cb, GpuTimings& timings, std::string_view name) + : m_cb{cb} + , m_timings{timings} + , m_name{name} +{ + // QRhi only exposes a CB-wide timestamp via lastCompletedGpuTime() — + // there is no per-pass sub-range API. Recording that value here (under + // a per-pass name) would cause every ScopedGpuTimer in the same frame + // to write the identical number under different names, making the S6 + // panel show the full-frame cost against every individual pass. + // + // The frame-total is recorded once per frame in RenderList::renderInternal + // under the "frame" bucket. ScopedGpuTimer's job is to emit the debug + // marker brackets (visible in RenderDoc / Nsight) without duplicating + // the timing attribution. + m_cb.debugMarkBegin(QByteArray::fromRawData(m_name.data(), (qsizetype)m_name.size())); +} + +ScopedGpuTimer::~ScopedGpuTimer() +{ + m_cb.debugMarkEnd(); +} + +} // namespace score::gfx diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/GpuTiming.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/GpuTiming.hpp new file mode 100644 index 0000000000..14c736413d --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/GpuTiming.hpp @@ -0,0 +1,126 @@ +#pragma once +#include + +#include + +#include +#include +#include +#include +#include + +namespace score::gfx +{ +/** + * @brief Per-pass GPU timing collector (Plan 09 S0 / S6). + * + * QRhi exposes only a single `QRhiCommandBuffer::lastCompletedGpuTime()` + * value — the elapsed GPU time of the most recently COMPLETED frame on + * that CB. Internally QRhi wraps the CB with a timestamp query pair and + * returns the delta in milliseconds. This class gives us per-pass + * granularity via scoped markers: every `ScopedGpuTimer` pushes a + * debug marker pair around its `beginPass` / `endPass` and reads + * `lastCompletedGpuTime()` ONE FRAME LATER, attributing the delta to + * the named pass. + * + * Results are always one frame late (the GPU must complete, then the + * CPU reads back the resolved timestamp). Callers expecting live + * numbers should treat the read as "previous frame's time". + * + * The collector is per-RenderList. It accumulates a rolling mean over + * the last N frames and exposes a snapshot via `timingsLastFrame()` + * for the S6 observability panel. + * + * Thread model: all public methods are called from the Gfx thread. + * The panel's read path takes a shared lock; writers hold an exclusive + * lock during update. Lock contention is negligible (one update/frame, + * one read/ui-tick). + */ +class SCORE_PLUGIN_GFX_EXPORT GpuTimings +{ +public: + static constexpr int kHistorySize = 64; + + struct Entry + { + std::string name; + double last_ms{0.0}; + double mean_ms{0.0}; + double max_ms{0.0}; + std::array history{}; + int history_index{0}; + int sample_count{0}; // capped at kHistorySize; used to avoid cold-start bias + int frames_since_observed{0}; + }; + + GpuTimings() = default; + GpuTimings(const GpuTimings&) = delete; + GpuTimings& operator=(const GpuTimings&) = delete; + + /** + * @brief Record an observation for a named pass. + * + * @p ms may be 0 when caps.timestamps is false or when the backend + * hasn't resolved a timestamp yet. Zero samples skip the rolling + * mean update. + */ + void record(std::string_view name, double ms) noexcept; + + /** + * @brief Tick once per frame. Entries not observed for more than + * `kStaleThreshold` frames are dropped. + */ + void tickFrame() noexcept; + + /** + * @brief Snapshot of all entries for the observability panel. + * + * Returns a copy so the caller doesn't need to hold a lock while + * iterating. Cost: O(n_entries); typical n ≤ 32. + */ + std::vector snapshot() const; + + /** + * @brief Reset all state. Called on RenderList re-init. + */ + void reset() noexcept; + +private: + static constexpr int kStaleThreshold = 120; // drop entries after 2s at 60fps + + mutable std::mutex m_mutex; + std::vector m_entries; +}; + +/** + * @brief RAII helper that brackets a named pass region for GPU frame-debug. + * + * Emits `debugMarkBegin` / `debugMarkEnd` around the enclosed code so + * RenderDoc, Nsight, and Metal Frame Debugger show pass boundaries in + * captures. Does NOT record timing data — `QRhiCommandBuffer::lastCompletedGpuTime()` + * returns a CB-wide delta with no per-pass resolution, so attributing it + * to individual passes would print the same full-frame cost against every + * named region. + * + * The whole-CB frame time is recorded once per frame in + * `RenderList::renderInternal` under the `"frame"` bucket. Per-pass + * sub-range timestamps require explicit QRhi timestamp queries, which + * are not yet exposed by the RHI abstraction layer. + */ +class SCORE_PLUGIN_GFX_EXPORT ScopedGpuTimer +{ +public: + ScopedGpuTimer( + QRhiCommandBuffer& cb, GpuTimings& timings, std::string_view name); + ~ScopedGpuTimer(); + + ScopedGpuTimer(const ScopedGpuTimer&) = delete; + ScopedGpuTimer& operator=(const ScopedGpuTimer&) = delete; + +private: + QRhiCommandBuffer& m_cb; + GpuTimings& m_timings; + std::string m_name; +}; + +} // namespace score::gfx diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/Graph.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/Graph.cpp index ef95a59a27..1d18057118 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/Graph.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/Graph.cpp @@ -1,5 +1,6 @@ #include "ISFNode.hpp" +#include #include #include #include @@ -71,25 +72,25 @@ struct no_delay_edges static void graphwalk( score::gfx::Node* node, std::vector& list, GraphImpl& g, - VertexMap& m) + VertexMap& m, ossia::flat_set& visited) { auto sink_desc = m[node]; for(auto inputs : node->input) { for(auto edge : inputs->edges) { - if(!edge->source->node->addedToGraph) + auto* src_node = edge->source->node; + if(visited.insert(src_node).second) { - list.push_back(edge->source->node); + list.push_back(src_node); - auto src_desc = boost::add_vertex(edge->source->node, g); - m[edge->source->node] = src_desc; - edge->source->node->addedToGraph = true; + auto src_desc = boost::add_vertex(src_node, g); + m[src_node] = src_desc; boost::add_edge(src_desc, sink_desc, edge->type, g); } else { - auto src_desc = m[edge->source->node]; + auto src_desc = m[src_node]; boost::add_edge(src_desc, sink_desc, edge->type, g); } } @@ -100,14 +101,16 @@ static void graphwalk(std::vector& model_nodes) { GraphImpl g; VertexMap m; + ossia::flat_set visited; + auto k = boost::add_vertex(model_nodes.front(), g); m[model_nodes.front()] = k; - model_nodes.front()->addedToGraph = true; + visited.insert(model_nodes.front()); std::size_t processed = 0; while(processed != model_nodes.size()) { - graphwalk(model_nodes[processed], model_nodes, g, m); + graphwalk(model_nodes[processed], model_nodes, g, m, visited); processed++; } @@ -236,6 +239,62 @@ void Graph::recreateOutputRenderList(OutputNode& output) std::shared_ptr& renderer = *it; if(renderer.get() == output.renderer()) { + // Pre-condition: recreateOutputRenderList MUST be called outside + // any active beginFrame/endFrame block. The Window::resize -> + // resizeSwapChain -> onResize -> here chain is invoked at the + // top of Window::render BEFORE beginFrame (Window.cpp:148-151), + // so this should always hold. Assert it to catch any future + // path that triggers the resize from inside a render frame. + if(auto rs = output.renderState(); rs && rs->rhi) + SCORE_ASSERT(!rs->rhi->isRecordingFrame()); + + // Drain the GPU before tearing down the old RenderList. release() + // walks every renderer and triggers a torrent of delete / + // deleteLater on QRhi objects (textures, samplers, buffers, + // SRBs, pipelines). On Vulkan, sibling outputs (BackgroundNode's + // beginOffscreenFrame, MultiWindowNode per-window CBs, the + // resizing window's own previous-frame CB) may still hold those + // resources in pending state. Without this drain, the next time + // ScenePreprocessor's runInitialPasses records vkCmdCopyBuffer / + // vkCmdPipelineBarrier into a CB the rhi believes is fresh, + // validation fires (-recording / -in-use), eventual device loss. + // + // FIX-A added rhi->finish() inside ScreenNode::destroyOutput and + // BackgroundNode::destroyOutput, but the + // `Window::resize → onResize → recreateOutputRenderList` path + // never enters those — it tears down the RenderList directly. + if(auto rs = output.renderState(); rs && rs->rhi) + { + auto* rhi = rs->rhi; + rhi->finish(); + + // Force a no-op offscreen frame on each frame slot so BOTH + // cmdPools are reset symmetrically. QRhi-Vulkan's finish() + // resets only `cmdPool[currentFrameSlot]` + // (qrhivulkan.cpp:2617-2629); the OTHER slot's pool stays + // untouched. If a sibling output (BackgroundNode / + // PreviewNode / MultiWindowNode) drives its own + // beginOffscreenFrame on a separate timer, its + // ensureCommandPoolForNewFrame on the un-reset slot finds + // CBs still in pending state from the pre-resize era → + // vkResetCommandPool VUID-00040, then vkBeginCommandBuffer + // on active CB, eventual device loss in vkQueueSubmit. + // The cascade fires ~16 frames after resize because that's + // when the sibling timer happens to phase-align with the + // un-drained slot. + // + // beginOffscreenFrame advances currentFrameSlot + // (qrhivulkan.cpp:3025-3031) and resets the new slot's pool; + // endOffscreenFrame waits on ofr.cmdFence (drains every + // queued CB before the fence signals). Two iterations cover + // QVK_FRAMES_IN_FLIGHT=2. + for(int i = 0; i < 2; ++i) + { + QRhiCommandBuffer* cb{}; + if(rhi->beginOffscreenFrame(&cb) == QRhi::FrameOpSuccess) + rhi->endOffscreenFrame(); + } + } auto old_renderer = renderer; old_renderer->release(); old_renderer.reset(); @@ -252,7 +311,6 @@ void Graph::recreateOutputRenderList(OutputNode& output) } else { - qDebug("???"); } } } @@ -268,7 +326,25 @@ void Graph::initializeOutput(OutputNode* output, GraphicsApi graphicsApi) }; auto onResize = [this, output] { - // FIXME optimize if size did not change? + // FAST-PATH: pure viewport resize. Skip the full RL rebuild + // (release+createRenderList) — its cost (pipeline compiles, + // ScenePreprocessor REBUILD, mesh slab + texture array + // re-upload, every preprocessor SSBO from cap=0) is wasted + // when only the framebuffer size changed. Instead, mark every + // renderer's RT specs as dirty so the existing rt_changed + // surgical block in renderInternal recreates only the + // swapchain-sized RTs + rebinds the downstream samplers. + // Persistent GpuResourceRegistry + persistent ScenePreprocessor + // caches mean none of the heavier work is needed for a pure + // size change. + // + // Returns false if it cannot handle the change (no renderers + // yet, invalid size); the fallback below covers initial setup + // and any future "format / sample-count change" path. + if(auto* rl = output->renderer()) + if(auto rs = output->renderState(); rs) + if(rl->resizeSwapchainSizedTargets(rs->outputSize)) + return; recreateOutputRenderList(*output); }; @@ -287,8 +363,6 @@ void Graph::relinkGraph() for(auto r_it = m_renderers.begin(); r_it != m_renderers.end();) { auto& r = **r_it; - for(auto& node : m_nodes) - node->addedToGraph = false; assert(!r.nodes.empty()); @@ -306,11 +380,21 @@ void Graph::relinkGraph() if(model_nodes.size() > 1) { bool invalid_renderlist = false; + // Acquire a resource update batch for both brand-new renderers + // (whose init() uploads material UBOs, creates samplers, etc.) and + // reused renderers that we just released (whose init() must recreate + // freed resources). Without reinitialising the reused path, a + // second execution after stop/start leaves every reused renderer + // in its released state forever. + QRhiResourceUpdateBatch* batch = r.state.rhi + ? r.state.rhi->nextResourceUpdateBatch() + : nullptr; for(auto node : model_nodes) { score::gfx::NodeRenderer* rn{}; auto it = node->renderedNodes.find(&r); - if(it == node->renderedNodes.end()) + const bool is_new = (it == node->renderedNodes.end()); + if(is_new) { if((rn = node->createRenderer(r))) { @@ -318,7 +402,6 @@ void Graph::relinkGraph() node->renderedNodes.emplace(&r, rn); node->renderedNodesChanged(); - //rn->init(r); } else { @@ -331,12 +414,31 @@ void Graph::relinkGraph() rn = it->second; SCORE_ASSERT(rn); rn->release(r); - //rn->init(r); } SCORE_ASSERT(rn); + if(batch) + rn->init(r, *batch); r.renderers.push_back(rn); } + // Fold the batch into the RenderList's initial batch so its uploads + // (vertex buffers, placeholder UBOs, samplers) land before the first + // render frame. `merge` copies entries but doesn't release `batch` + // back to the pool — release it explicitly, or we leak a pool slot + // per relinkGraph call and eventually exhaust the 64-slot pool. + if(batch) + { + if(r.initialBatch()) + { + r.initialBatch()->merge(batch); + batch->release(); + } + else + { + r.setInitialBatch(batch); + } + } + // If a node couldn't be recreated, we skip the whole thing if(invalid_renderlist) { @@ -344,11 +446,6 @@ void Graph::relinkGraph() r_it = m_renderers.erase(r_it); break; } - - // for(auto node : r.renderers) - // { - // node->init(r); - // } } else if(model_nodes.size() == 1) { @@ -406,10 +503,12 @@ std::shared_ptr Graph::createRenderList(OutputNode* output, std::shared_ptr state) { auto ptr = std::make_shared(*output, state); + // Forward the session-wide AssetTable (if any) so ScenePreprocessor + // and other renderers can hit the content-hash decode cache + // instead of decoding every texture per-RenderList. Plan 09 S1. + ptr->setAssetTable(m_assetTable); state->renderer = ptr; output->setRenderer(ptr); - for(auto& node : m_nodes) - node->addedToGraph = false; #if 0 for(auto& model : m_nodes) qDebug() << "Model: " << typeid(*model).name(); @@ -463,22 +562,511 @@ Graph::createRenderList(OutputNode* output, std::shared_ptr state) { r.init(); - if(model_nodes.size() > 1) + // Compute m_requiresDepth from the node graph BEFORE + // createAllInputRenderTargets — RT creation reads it. Mirrors + // maybeRebuild's recompute at RenderList.cpp:484-486. { - // Create all input render targets centrally before any node init(). - // This ensures RTs are available regardless of init order - // (matches what maybeRebuild does). - r.createAllInputRenderTargets(); + bool requiresDepth = false; + for(auto node : r.nodes) + requiresDepth |= node->requiresDepth; + r.markRequiresDepth(requiresDepth); + } - auto batch = r.initialBatch(); - for(auto node : r.renderers) - node->init(r, *batch); + // Create all input render targets centrally before any node init(). + // This ensures RTs are available regardless of init order + // (matches what maybeRebuild does). + r.createAllInputRenderTargets(); + + // Always init all renderers, even when only the output node exists. + // This ensures the output renderer's internal render target (e.g. + // ScaledRenderer::m_inputTarget) is created and available for + // incremental edge additions later. + auto batch = r.initialBatch(); + for(auto node : r.renderers) + { + node->init(r, *batch); + // Sync change indices so the first render frame doesn't see + // a spurious rt_changed. Between init and the first render, + // update_inputs() can deliver render_target_spec messages that + // increment the node's counter. Without syncing, the renderer's + // stale index (-1) mismatches → rt_changed triggers → release+init + // Sync change indices to prevent spurious rt_changed, then set + // materialChanged and geometryChanged so the first update() uploads + // data and processes geometry. This matches what the old maybeRebuild() + // did. renderTargetSpecsChanged is left false (synced) to prevent + // the destructive rt_changed block from triggering. + node->checkForChanges(); + node->materialChanged = true; + node->geometryChanged = true; + node->renderTargetSpecsChanged = false; } + + // Mark built. Skips the wasteful + previously-dangerous mid-frame + // release()+init() that maybeRebuild(false) would otherwise fire on + // the first render frame. Without this, every viewport resize did + // a full RenderList teardown TWICE in quick succession (once here, + // once on the next frame in maybeRebuild) -- multi-second resizes + // for non-trivial scenes. The mid-frame teardown was also the root + // of the CB-cascade chased through commits 51400fc37 / 5b2da1d48 / + // 7f9f1e36a. The safety net (C2 drain in maybeRebuild) stays in + // place for forced rebuilds and the actual size-change cycle in + // maybeRebuild on subsequent frames. + // + // The historical concerns the previous comment cited (null + // processUBO in MRT blit passes, feedback ISF persistent textures, + // surgical rt_changed handling) were all fixed in their respective + // commits. The two missing pieces vs maybeRebuild's release+init + // (m_requiresDepth recompute, markBuilt) are now done here. + r.markBuilt(); } return ptr; } +void Graph::removeNodeFromRenderLists(Node* node) +{ + for(auto& [rl, renderer] : node->renderedNodes) + { + renderer->releaseState(*rl); + delete renderer; + + ossia::remove_erase(rl->renderers, renderer); + ossia::remove_erase(rl->nodes, node); + } + + node->renderedNodes.clear(); + node->renderedNodesChanged(); +} + +void Graph::removeNodeAndEdges(Node* node) +{ + // 1. For each edge involving this node, notify the render lists + // so that upstream/downstream renderers clean up their passes. + // Must happen BEFORE edge deletion (onEdgeRemoved reads the edge). + for(auto* edge : m_edges) + { + if(edge->source->node == node || edge->sink->node == node) + { + // Notify affected render lists + Node* other = (edge->source->node == node) + ? edge->sink->node + : edge->source->node; + + for(auto& rl : m_renderers) + { + if(ossia::contains(rl->nodes, other) + || ossia::contains(rl->nodes, node)) + { + rl->onEdgeRemoved(*edge); + } + } + } + } + + // 2. Delete all edges involving this node from m_edges. + // Edge destructor removes from source->edges and sink->edges. + for(auto it = m_edges.begin(); it != m_edges.end();) + { + Edge* edge = *it; + if(edge->source->node == node || edge->sink->node == node) + { + delete edge; + it = m_edges.erase(it); + } + else + { + ++it; + } + } + + // 3. Release the node's own renderers from all render lists. + removeNodeFromRenderLists(node); + + // 4. Retopological sort all affected render lists and notify outputs. + for(auto& rl : m_renderers) + { + retopologicalSort(*rl); + rl->output.onRendererChange(); + } + + // Note: does NOT remove from m_nodes — the caller (GfxContext::remove_node) + // handles that via Graph::removeNode(). +} + +void Graph::onEdgeRemoved( + Edge& edge, const ossia::hash_set* preserveSinks) +{ + Node* source = edge.source->node; + + for(auto& rl : m_renderers) + { + // Only act on render lists that contain the source node + if(!ossia::contains(rl->nodes, source)) + continue; + + // Delegate to the render list (must happen before edge destruction) + rl->onEdgeRemoved(edge, preserveSinks); + + // Do NOT retopological-sort or destroy unreachable renderers here. + // Removals are processed before additions in incrementalEdgeUpdate. + // A node that becomes temporarily unreachable during removal may become + // reachable again when additions are processed. Destroying its renderer + // would lose runtime state (mesh data, video frames, etc.) that can't + // be trivially recreated. + // + // reconcileAllRenderLists() runs after all adds/removes and handles + // the final reachability check, renderer cleanup, and retopo sort. + } +} + +void Graph::createPassForEdgeIfMissing(Edge& edge) +{ + Node* source = edge.source->node; + + for(auto& rl : m_renderers) + { + // Check if the source node has a renderer in this render list + auto rn_it = source->renderedNodes.find(rl.get()); + if(rn_it == source->renderedNodes.end()) + continue; + + auto* renderer = rn_it->second; + + // Check if the sink node is also in this render list + if(!ossia::contains(rl->nodes, edge.sink->node)) + continue; + + // Check if a pass already exists for this edge + if(renderer->hasOutputPassForEdge(edge)) + continue; + + // Ensure the sink port has a render target (if needed) + Port* sink = edge.sink; + if(sink->type == Types::Image + && (sink->flags & Flag::GrabsFromSource) != Flag::GrabsFromSource + && sink->node != &rl->output) + { + if(rl->renderTargetForInputPort(*sink).renderTarget == nullptr) + { + int cur_port = 0; + for(auto* in : sink->node->input) + { + if(in == sink) + break; + cur_port++; + } + auto spec = sink->node->resolveRenderTargetSpecs(cur_port, *rl); + if(!sink->node->hasExplicitRenderTargetSize(cur_port)) + { + ossia::small_flat_map emptySpecs; + QSize downstream = rl->resolveDownstreamSize(sink->node, emptySpecs); + if(!downstream.isEmpty()) + spec.size = downstream; + } + bool wantsDepth = rl->requiresDepth(*sink); + bool wantsSamplableDepth + = (sink->flags & Flag::SamplableDepth) == Flag::SamplableDepth; + auto rt = createRenderTarget( + rl->state, spec.format, spec.size, rl->samples(), + wantsDepth || wantsSamplableDepth, wantsSamplableDepth); + rl->m_inputRenderTargets[sink] = std::move(rt); + } + } + + // Create the output pass on the source renderer. + // Allocate a fresh batch, collect `addOutputPass`'s updates, then + // either promote it to the RL's initial batch or merge + release. + // QRhiResourceUpdateBatch::merge does NOT release the source batch + // — without the explicit release() the 64-slot pool exhausts after + // enough edges (e.g. when a live-connected shader triggers + // createAllMissingPasses over a large scene graph) and the next + // nextResourceUpdateBatch() returns null → crash on merge. + auto* batch = rl->state.rhi->nextResourceUpdateBatch(); + if(!batch) + continue; + renderer->addOutputPass(*rl, edge, *batch); + + if(rl->initialBatch()) + { + rl->initialBatch()->merge(batch); + batch->release(); + } + else + { + rl->setInitialBatch(batch); + } + } +} + +void Graph::createAllMissingPasses() +{ + for(auto* edge : m_edges) + createPassForEdgeIfMissing(*edge); +} + +void Graph::updateAllSinkSamplers() +{ + for(auto* edge : m_edges) + updateSinkSampler(*edge); +} + +void Graph::updateSinkSampler(Edge& edge) +{ + Port* sink = edge.sink; + if(sink->type != Types::Image) + return; + + // GrabsFromSource ports don't have a render target — they need the + // upstream's QRhiTexture directly via textureForOutput(). This path + // covers cubemaps, 3D textures, AND texture arrays (e.g. + // ScenePreprocessor's base_color_array feeding classic_pbr_textured). + // Without this, the sink keeps binding emptyTexture (2D, single-layer) + // into what the shader expects as sampler2DArray → Vulkan validation + // error VUID-vkCmdDrawIndexed-viewType-07752, nothing renders. + if((sink->flags & Flag::GrabsFromSource) == Flag::GrabsFromSource) + { + Port* source = edge.source; + if(!source || !source->node) + return; + for(auto& rl : m_renderers) + { + auto sink_rn_it = sink->node->renderedNodes.find(rl.get()); + if(sink_rn_it == sink->node->renderedNodes.end()) + continue; + auto src_rn_it = source->node->renderedNodes.find(rl.get()); + if(src_rn_it == source->node->renderedNodes.end()) + continue; + if(auto* tex = src_rn_it->second->textureForOutput(*source)) + sink_rn_it->second->updateInputTexture(*sink, tex); + } + return; + } + + for(auto& rl : m_renderers) + { + auto sink_rn_it = sink->node->renderedNodes.find(rl.get()); + if(sink_rn_it == sink->node->renderedNodes.end()) + continue; + + // For output nodes, the RT comes from the renderer itself + if(sink->node == &rl->output) + { + auto rt = sink_rn_it->second->renderTargetForInput(*sink); + if(rt.texture) + sink_rn_it->second->updateInputTexture(*sink, rt.texture, rt.depthTexture); + } + else + { + // For intermediate nodes, the RT comes from the centralized map + auto rt = rl->renderTargetForInputPort(*sink); + if(rt.texture) + sink_rn_it->second->updateInputTexture(*sink, rt.texture, rt.depthTexture); + } + } +} + +void Graph::reconcileAllRenderLists() +{ + for(auto& rl : m_renderers) + { + // 1. Re-walk the graph from output to discover all reachable nodes. + auto* outputNode = rl->nodes.front(); + rl->nodes.clear(); + rl->nodes.push_back(outputNode); + graphwalk(rl->nodes); + + // 2. Find nodes that are newly reachable (no renderer yet) + // and nodes that are no longer reachable (have renderer but not in walk). + ossia::flat_set reachable(rl->nodes.begin(), rl->nodes.end()); + // Collect all nodes that have renderers for this RL + std::vector nodesWithRenderers; + for(auto* node : m_nodes) + { + if(node->renderedNodes.find(rl.get()) != node->renderedNodes.end()) + nodesWithRenderers.push_back(node); + } + + // 3. Remove renderers for nodes no longer reachable. + for(auto* node : nodesWithRenderers) + { + if(!reachable.contains(node)) + { + auto rn_it = node->renderedNodes.find(rl.get()); + if(rn_it != node->renderedNodes.end()) + { + auto* renderer = rn_it->second; + BUFTRACE() << "reconcile: releasing unreachable renderer=" + << (void*)renderer + << " node_id=" << node->nodeId + << " (any downstream node still referencing this " + "renderer's buffers via process() caches will see " + "stale pointers → ASan target)"; + renderer->releaseState(*rl); + delete renderer; + node->renderedNodes.erase(rn_it); + node->renderedNodesChanged(); + } + } + } + + // 4. Ensure render targets exist for all input ports BEFORE creating + // renderers. initState() → initInputSamplers() looks up the RT + // texture — if the RT doesn't exist yet, the sampler gets emptyTexture + // and the SRB will have wrong bindings. + for(auto* node : rl->nodes) + { + if(node == &rl->output) + continue; + int cur_port = 0; + for(auto* in : node->input) + { + if(in->type == Types::Image + && (in->flags & Flag::GrabsFromSource) != Flag::GrabsFromSource) + { + if(rl->renderTargetForInputPort(*in).renderTarget == nullptr) + { + // Create the missing render target + auto spec = node->resolveRenderTargetSpecs(cur_port, *rl); + if(!node->hasExplicitRenderTargetSize(cur_port)) + { + ossia::small_flat_map emptySpecs; + QSize downstream = rl->resolveDownstreamSize(node, emptySpecs); + if(!downstream.isEmpty()) + spec.size = downstream; + } + bool wantsDepth = rl->requiresDepth(*in); + bool wantsSamplableDepth + = (in->flags & Flag::SamplableDepth) == Flag::SamplableDepth; + auto rt = createRenderTarget( + rl->state, spec.format, spec.size, rl->samples(), + wantsDepth || wantsSamplableDepth, wantsSamplableDepth); + rl->m_inputRenderTargets[in] = std::move(rt); + } + } + cur_port++; + } + } + + // 5. Create renderers for newly-reachable nodes (AFTER render targets + // exist so that initState → initInputSamplers finds the correct textures). + QRhiResourceUpdateBatch* batch = rl->state.rhi->nextResourceUpdateBatch(); + bool batchUsed = false; + + for(auto* node : rl->nodes) + { + if(node->renderedNodes.find(rl.get()) == node->renderedNodes.end()) + { + if(auto* rn = node->createRenderer(*rl)) + { + rn->nodeId = node->nodeId; + node->renderedNodes.emplace(rl.get(), rn); + node->renderedNodesChanged(); + + // All renderers now implement initState(). Pass creation for + // individual edges is handled by createPassForEdgeIfMissing + // after reconciliation, ensuring all renderers + RTs exist first. + rn->initState(*rl, *batch); + rn->checkForChanges(); + rn->materialChanged = true; + rn->geometryChanged = true; + rn->renderTargetSpecsChanged = false; + + // Seed downstream consumers with this newly-created renderer's + // outputs so live-inserted scene producers (Camera, Environment, + // Light) don't need a full stop/restart to take + // effect. Default no-op for everything else. + rn->seedInitialOutputs(*rl); + + batchUsed = true; + } + } + } + + // 6. Pass creation is now handled entirely by createPassForEdgeIfMissing + // in incrementalEdgeUpdate, after reconciliation completes and all + // renderers + RTs exist. No sweep needed here. + + // 7. Rebuild renderers vector from node order. + // Also sync change indices for ALL renderers (not just newly created) + // to prevent spurious rt_changed on the first render frame. + // Without this, existing renderers whose nodes received process() + // messages (via update_inputs) between reconciliation and rendering + // could have stale indices, triggering a full release+init in the + // rt_changed block — which destroys the feedback ISF's persistent textures. + rl->renderers.clear(); + // Filter nodes to only those with renderers + std::vector validNodes; + validNodes.reserve(rl->nodes.size()); + for(auto* node : rl->nodes) + { + auto rn_it = node->renderedNodes.find(rl.get()); + if(rn_it != node->renderedNodes.end()) + { + validNodes.push_back(node); + auto* rn = rn_it->second; + rl->renderers.push_back(rn); + + // Sync change indices and prevent spurious rt_changed + rn->checkForChanges(); + rn->renderTargetSpecsChanged = false; + } + } + rl->nodes = std::move(validNodes); + + // 8. Submit batch and notify output. `merge()` copies entries but + // does NOT release the source batch, so we have to do it ourselves + // — otherwise the 64-slot pool leaks one slot per reconcile. + if(batchUsed) + { + if(rl->initialBatch()) + { + rl->initialBatch()->merge(batch); + batch->release(); + } + else + { + rl->setInitialBatch(batch); + } + } + else + { + batch->release(); + } + + rl->output.onRendererChange(); + } +} + +void Graph::retopologicalSort(RenderList& rl) +{ + // Save the output node (always first in the list) + auto* outputNode = rl.nodes.front(); + + // Clear and re-walk + rl.nodes.clear(); + rl.nodes.push_back(outputNode); + graphwalk(rl.nodes); + + // Rebuild renderers vector from the new node order. + // Only include nodes that actually have a renderer for this RenderList. + // Nodes discovered by the graph walk but without renderers (e.g. just + // added to the graph but not yet processed by reconcileAllRenderLists) are excluded + // from both lists to prevent the render loop from asserting. + rl.renderers.clear(); + std::vector valid_nodes; + valid_nodes.reserve(rl.nodes.size()); + for(auto* node : rl.nodes) + { + auto it = node->renderedNodes.find(&rl); + if(it != node->renderedNodes.end()) + { + valid_nodes.push_back(node); + rl.renderers.push_back(it->second); + } + } + rl.nodes = std::move(valid_nodes); +} + Graph::Graph() { } Graph::~Graph() @@ -493,6 +1081,19 @@ Graph::~Graph() out->destroyOutput(); } + // Belt-and-braces: any OutputNode registered via addNode but not yet + // promoted into m_outputs (e.g. preview outputs added via + // createSingleRenderList without a subsequent createAllRenderLists) + // would otherwise leak its swapchain / RPD on shutdown. + for(auto* n : m_nodes) + { + if(auto* out = dynamic_cast(n)) + { + if(!ossia::contains(m_outputs, out)) + out->destroyOutput(); + } + } + clearEdges(); } @@ -545,25 +1146,6 @@ void Graph::removeEdge(Port* source, Port* sink) } } -void Graph::addAndLinkEdge(Port* source, Port* sink, Process::CableType t) -{ - addEdge(source, sink, t); - - auto output = dynamic_cast(sink->node); - SCORE_ASSERT(output); - - recreateOutputRenderList(*output); -} - -void Graph::unlinkAndRemoveEdge(Port* source, Port* sink) -{ - removeEdge(source, sink); - auto output = dynamic_cast(sink->node); - SCORE_ASSERT(output); - - recreateOutputRenderList(*output); -} - void Graph::destroyOutputRenderList(score::gfx::OutputNode& output) { auto it = ossia::find_if( @@ -584,7 +1166,6 @@ void Graph::destroyOutputRenderList(score::gfx::OutputNode& output) } else { - qDebug("???"); } } diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/Graph.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/Graph.hpp index 6431b0d412..20f202d0ef 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/Graph.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/Graph.hpp @@ -7,6 +7,10 @@ #include #include +namespace Gfx +{ +class AssetTable; +} namespace score::gfx { class OutputNode; @@ -43,15 +47,42 @@ struct SCORE_PLUGIN_GFX_EXPORT Graph */ void removeEdge(Port* source, Port* sink); - /** - * @brief Add an edge between two nodes and creates relevant pipelines. - */ - void addAndLinkEdge(Port* source, Port* sink, Process::CableType t); - - /** - * @brief Remove an edge between two nodes and free the pipelines - */ - void unlinkAndRemoveEdge(Port* source, Port* sink); + /// Remove a node's renderers from all render lists. + void removeNodeFromRenderLists(Node* node); + + /// Incrementally remove a non-output node: notify renderers of each + /// edge being removed, delete edges from m_edges, release the node's + /// renderers, retopological sort affected render lists, remove from m_nodes. + void removeNodeAndEdges(Node* node); + + /// Called when an edge is removed from the graph. + /// + /// @param preserveSinks Optional set of sink Ports whose input render + /// target should be kept alive even if this edge was their only feed. + /// GfxContext::incrementalEdgeUpdate uses this to bridge the brief + /// "sink has 0 edges" window that appears during a mid-batch filter + /// insertion (A→B removed, A→F and F→B added in the same batch). + /// Without this, B's input RT would be destroyed and immediately + /// re-allocated with the same spec. + void + onEdgeRemoved(Edge& edge, const ossia::hash_set* preserveSinks = nullptr); + + /// For an added edge, update the sink renderer's input sampler + /// to point to the (possibly new) render target texture. + void updateSinkSampler(Edge& edge); + + /// Create missing passes and update samplers for ALL edges in ALL render lists. + void createAllMissingPasses(); + void updateAllSinkSamplers(); + + /// For an added edge, create the output pass on the source renderer + /// if it exists but doesn't already have a pass for this edge. + void createPassForEdgeIfMissing(Edge& edge); + + /// After all edges have been added/removed, reconcile all render lists: + /// retopological sort, create renderers for newly-reachable nodes, + /// create render targets and passes, remove unreachable nodes. + void reconcileAllRenderLists(); /** * @brief Remove all edges. @@ -93,7 +124,24 @@ struct SCORE_PLUGIN_GFX_EXPORT Graph return m_outputs; } + /** + * @brief Inject the session-wide AssetTable (Plan 09 S1). + * + * GfxContext owns the AssetTable and calls this once at graph + * construction. All RenderLists subsequently created by this + * Graph receive the pointer via their constructor, so the + * preprocessor can hit the content-hash cache when decoding + * texture_source / buffer_resource payloads. + * + * Null is allowed (tests, early teardown) — consumers guard. + */ + void setAssetTable(Gfx::AssetTable* a) noexcept { m_assetTable = a; } + Gfx::AssetTable* assetTable() const noexcept { return m_assetTable; } + private: + /// Re-run topological sort for a render list and rebuild renderer ordering. + void retopologicalSort(RenderList& rl); + void initializeOutput(OutputNode* output, GraphicsApi graphicsApi); void createOutputRenderList(OutputNode& output); void recreateOutputRenderList(OutputNode& output); @@ -107,5 +155,9 @@ struct SCORE_PLUGIN_GFX_EXPORT Graph std::vector m_edges; std::vector m_outputs; + + // Session-wide decode cache. Non-owning; GfxContext owns the + // actual AssetTable. May be null in tests or during teardown. + Gfx::AssetTable* m_assetTable{}; }; } diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/ISFNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/ISFNode.cpp index 6a82673425..e7de2544c1 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/ISFNode.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/ISFNode.cpp @@ -31,7 +31,24 @@ struct isf_input_port_vis void operator()(const isf::long_input& in) noexcept { - *reinterpret_cast(data) = in.def; + // Enum mode (VALUES/LABELS set): in.def is the *index* into VALUES, but + // the shader and the UI pipeline downstream consume the numeric VALUE at + // that index. Look it up here so the initial UBO state matches what the + // ComboBox emits after any user interaction. String-valued VALUES fall + // back to the index (GLSL can't receive strings). + int initial = (int)in.def; + if(!in.values.empty()) + { + auto idx = std::min(in.def, in.values.size() - 1); + const auto& v = in.values[idx]; + if(auto i = ossia::get_if(&v)) + initial = (int)*i; + else if(auto d = ossia::get_if(&v)) + initial = (int)*d; + else + initial = (int)idx; + } + *reinterpret_cast(data) = initial; self.input.push_back(new Port{&self, data, Types::Int, {}}); data += 4; sz += 4; @@ -105,15 +122,38 @@ struct isf_input_port_vis void operator()(const isf::image_input& in) noexcept { - auto flags = in.dimensions == 3 ? Flag::GrabsFromSource : Flag{}; + // GrabsFromSource = "fetch the QRhiTexture* straight from the upstream + // renderer's textureForOutput() instead of allocating our own render + // target". Required for: + // - 3D textures (volumes): no render-target path exists for them. + // - Texture arrays: consumers (e.g. classic_pbr_textured sampling a + // per-material base_color_array from ScenePreprocessor) need the + // producer's actual QRhiTexture array, not an empty render-target + // texture created on their side. + // - "STATIC: true" image inputs (shader-author opt-in): the upstream + // is a CPU producer that publishes a long-lived QRhiTexture + // (precomputed LUTs, IBL bakes, asset caches). Without this opt-in + // the consumer would silently allocate an unused render target and + // bind that empty texture instead of the producer's real one, + // making the input read all zeros. + auto flags = (in.dimensions == 3 || in.is_array || in.is_static) + ? Flag::GrabsFromSource + : Flag{}; if(in.depth) flags = flags | Flag::SamplableDepth; + if(in.is_array) + flags = flags | Flag::TextureArray; + if(in.dimensions == 3) + flags = flags | Flag::ThreeDimensional; self.input.push_back(new Port{&self, {}, Types::Image, flags, {}}); } void operator()(const isf::cubemap_input& in) noexcept { - self.input.push_back(new Port{&self, {}, Types::Image, Flag::GrabsFromSource, {}}); + auto flags = Flag::GrabsFromSource | Flag::Cubemap; + if(in.depth) + flags = flags | Flag::SamplableDepth; + self.input.push_back(new Port{&self, {}, Types::Image, flags, {}}); } void operator()(const isf::audio_input& audio) noexcept @@ -121,6 +161,8 @@ struct isf_input_port_vis self.m_audio_textures.push_back({}); auto& data = self.m_audio_textures.back(); data.fixedSize = audio.max; + data.filter = audio.sampler.filter; + data.wrap = audio.sampler.wrap; self.input.push_back(new Port{&self, &data, Types::Audio, {}}); } @@ -130,6 +172,8 @@ struct isf_input_port_vis auto& data = self.m_audio_textures.back(); data.fixedSize = audio.max; data.mode = data.Histogram; + data.filter = audio.sampler.filter; + data.wrap = audio.sampler.wrap; self.input.push_back(new Port{&self, &data, Types::Audio, {}}); } @@ -139,6 +183,8 @@ struct isf_input_port_vis auto& data = self.m_audio_textures.back(); data.fixedSize = audio.max; data.mode = AudioTexture::Mode::FFT; + data.filter = audio.sampler.filter; + data.wrap = audio.sampler.wrap; self.input.push_back(new Port{&self, &data, Types::Audio, {}}); } @@ -149,16 +195,24 @@ struct isf_input_port_vis // - read_only: input port // - write_only: output port // - read_write: output port only, buffer is persistent + // + // BUFFER_USAGE="indirect_draw[_indexed]": port additionally carries the + // IndirectDraw flag so renderers can route it to the indirect-draw + // mechanism on MeshBuffers. + + auto extra_flags = Flag{}; + if(in.buffer_usage == "indirect_draw" || in.buffer_usage == "indirect_draw_indexed") + extra_flags = extra_flags | Flag::IndirectDraw; if(in.access == "read_only") { // Create input port for read-only storage buffer - self.input.push_back(new Port{&self, {}, Types::Buffer, {}}); + self.input.push_back(new Port{&self, {}, Types::Buffer, extra_flags, {}}); } else if(in.access.contains("write")) { // Create output port for write-only storage buffer - self.output.push_back(new Port{&self, {}, Types::Buffer, {}}); + self.output.push_back(new Port{&self, {}, Types::Buffer, extra_flags, {}}); // Check for flexible array member if(!in.layout.empty()) @@ -172,9 +226,18 @@ struct isf_input_port_vis } } + void operator()(const isf::uniform_input& in) noexcept + { + // Read-only UBO sourced from upstream Buffer port. Renderers bind it via + // QRhiShaderResourceBinding::uniformBuffer (not bufferLoad). + self.input.push_back(new Port{&self, {}, Types::Buffer, Flag::UniformBuffer, {}}); + } + void operator()(const isf::texture_input& in) noexcept { - auto flags = in.dimensions == 3 ? Flag::GrabsFromSource : Flag{}; + auto flags = in.dimensions == 3 + ? (Flag::GrabsFromSource | Flag::ThreeDimensional) + : Flag{}; self.input.push_back(new Port{&self, {}, Types::Image, flags, {}}); } @@ -229,7 +292,9 @@ struct isf_input_port_vis if(in.access == "read_only") { // Input port for read-only image; 3D textures use GrabsFromSource - auto flags = in.is3D() ? Flag::GrabsFromSource : Flag{}; + auto flags = in.is3D() + ? (Flag::GrabsFromSource | Flag::ThreeDimensional) + : Flag{}; self.input.push_back(new Port{&self, {}, Types::Image, flags, {}}); } else if(in.access == "write_only" || in.access == "read_write") diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/ISFNode.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/ISFNode.hpp index 805208ae0a..aa74aaa621 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/ISFNode.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/ISFNode.hpp @@ -45,5 +45,32 @@ class ISFNode : public score::gfx::ProcessNode std::vector m_event_ports; int m_materialSize{}; + + // Reset all `event` input ports to 0 so they pulse true for exactly one + // frame after the upstream producer writes 1. Called at the end of each + // frame's update() — AFTER the material UBO has been staged via + // updateDynamicBuffer (which captures the value at call time), so + // resetting the CPU memory here doesn't affect what the shader reads + // this frame, only what would leak into the next frame if we didn't + // reset. + // + // Returns true if any port was actually firing. Callers should then set + // their NodeRenderer::materialChanged flag so the next frame re-uploads + // the now-zero event value — otherwise the gate-on-materialChanged + // upload path would skip the re-upload and leave the stale 1 in the GPU + // UBO indefinitely. + [[nodiscard]] bool resetEventPortsAfterFrame() noexcept + { + bool any_fired = false; + for(int* p : m_event_ports) + { + if(p && *p != 0) + { + *p = 0; + any_fired = true; + } + } + return any_fired; + } }; } diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/ISFVisitors.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/ISFVisitors.hpp index 637269a244..c8e519c70a 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/ISFVisitors.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/ISFVisitors.hpp @@ -1,8 +1,152 @@ #pragma once #include +#include + namespace score::gfx { +// --------------------------------------------------------------------------- +// Descriptor port walker (diagnostic 097, refactor R3) +// --------------------------------------------------------------------------- +// +// SINGLE source of truth for "how many input ports / output ports / samplers +// does each desc.inputs entry produce?". Every prior call site (CSF +// port_indices, RawRaster port_idx, RawRaster bindAuxTexturesInit, ISF +// IsfBindingsBuilder) had its own copy of this rule — and they had drifted +// (e.g. CSF over-counted inlets for write-only storage_input without a +// flex-array sizing field; IsfBindingsBuilder added a phantom inlet for every +// write-only csf_image_input). Mirrors `isf_input_port_vis` in ISFNode.cpp, +// which is the actual port-creation code. +// +// When a new isf::*_input variant is added, update isf_input_port_vis AND +// the matching `operator()` here — keep them in lockstep. +struct port_counts +{ + int inlets{}; //!< score input ports created by this desc.inputs entry + int outlets{}; //!< score output ports created + int samplers{}; //!< sampler slots in initInputSamplers (1 per image-like; + //!< +1 for image_input.depth on a non-GrabsFromSource port) + + port_counts& operator+=(const port_counts& o) noexcept + { + inlets += o.inlets; + outlets += o.outlets; + samplers += o.samplers; + return *this; + } +}; + +// Returns the port_counts contributed by a single input variant. Mirrors +// isf_input_port_vis (ISFNode.cpp) one-to-one. +struct isf_input_port_count_vis +{ + port_counts operator()(const isf::float_input&) const noexcept { return {1, 0, 0}; } + port_counts operator()(const isf::long_input&) const noexcept { return {1, 0, 0}; } + port_counts operator()(const isf::event_input&) const noexcept { return {1, 0, 0}; } + port_counts operator()(const isf::bool_input&) const noexcept { return {1, 0, 0}; } + port_counts operator()(const isf::point2d_input&) const noexcept { return {1, 0, 0}; } + port_counts operator()(const isf::point3d_input&) const noexcept { return {1, 0, 0}; } + port_counts operator()(const isf::color_input&) const noexcept { return {1, 0, 0}; } + port_counts operator()(const isf::audio_input&) const noexcept { return {1, 0, 0}; } + port_counts operator()(const isf::audioHist_input&) const noexcept{ return {1, 0, 0}; } + port_counts operator()(const isf::audioFFT_input&) const noexcept { return {1, 0, 0}; } + + port_counts operator()(const isf::image_input& in) const noexcept + { + // GrabsFromSource means no own render target → the matching depth sampler + // (image_input.depth==true) is also NOT created in initInputSamplers. + const bool grabs = (in.dimensions == 3 || in.is_array || in.is_static); + const int extra_depth_sampler = (in.depth && !grabs) ? 1 : 0; + return {1, 0, 1 + extra_depth_sampler}; + } + port_counts operator()(const isf::cubemap_input&) const noexcept { return {1, 0, 1}; } + port_counts operator()(const isf::texture_input&) const noexcept { return {1, 0, 1}; } + + port_counts operator()(const isf::storage_input& in) const noexcept + { + // read_only: 1 input port (no output, no sampler). + // write/read_write: 1 output port; +1 input port if the layout's last + // field is a flexible array (synthesized long_input for sizing). + if(in.access == "read_only") + return {1, 0, 0}; + port_counts c{0, 1, 0}; + if(!in.layout.empty() + && in.layout.back().type.find("[]") != std::string::npos) + c.inlets = 1; + return c; + } + + port_counts operator()(const isf::uniform_input&) const noexcept + { + return {1, 0, 0}; + } + + port_counts operator()(const isf::csf_image_input& in) const noexcept + { + // read_only: 1 input port; write/read_write: 1 output port (no input). + if(in.access == "read_only") + return {1, 0, 0}; + return {0, 1, 0}; + } + + port_counts operator()(const isf::geometry_input& in) const noexcept + { + port_counts c{}; + if(in.attributes.empty()) + { + // Pass-through: 1 inlet + 1 outlet + c.inlets = 1; + c.outlets = 1; + } + else + { + for(const auto& attr : in.attributes) + if(attr.access == "read_only" || attr.access == "read_write") + { c.inlets = 1; break; } + for(const auto& attr : in.attributes) + if(attr.access == "write_only" || attr.access == "read_write") + { c.outlets = 1; break; } + } + // $USER ports → synthesized long_input each (1 inlet) + if(in.vertex_count.find("$USER") != std::string::npos) c.inlets++; + if(in.instance_count.find("$USER") != std::string::npos) c.inlets++; + for(const auto& aux : in.auxiliary) + if(aux.size.find("$USER") != std::string::npos) + c.inlets++; + return c; + } +}; + +// Walk desc.inputs once. For each input, the visitor receives: +// - the isf::input entry +// - the cumulative port_counts BEFORE this input (so cur.inlets is the +// index of the first input port this entry creates, if any) +// - the per-input port_counts delta (how many ports this entry creates) +// Cumulative state is then advanced before moving on. +// +// Callers needing a non-zero starting offset (e.g. RawRaster's port 0 is +// the implicit Geometry input) can pass it in `start` — its inlets/outlets +// are accumulated upfront. +template +inline void walk_descriptor_inputs( + const isf::descriptor& desc, port_counts start, F&& fn) +{ + port_counts cur = start; + for(const auto& inp : desc.inputs) + { + port_counts delta = ossia::visit(isf_input_port_count_vis{}, inp.data); + fn(inp, cur, delta); + cur += delta; + } +} + +// Convenience overload: zero starting offset. +template +inline void walk_descriptor_inputs(const isf::descriptor& desc, F&& fn) +{ + walk_descriptor_inputs(desc, port_counts{}, std::forward(fn)); +} + struct isf_input_size_vis { int sz{}; @@ -55,21 +199,32 @@ struct isf_input_size_vis // CSF-specific input handlers void operator()(const isf::storage_input& in) noexcept { - if(in.access.contains("write")) + // Must match what isf_input_port_vis (ISFNode.cpp) actually writes into the + // blob — and the synthesized "size" int it creates: ONLY a writable buffer + // whose layout ends in a flexible-array member. Reserving for every write + // buffer over-allocated the UBO (harmless, but desynced from the port + // visitor and the generated GLSL Params/material_t block). + if(in.access.contains("write") && !in.layout.empty() + && in.layout.back().type.find("[]") != std::string::npos) { (*this)(isf::long_input{}); } } + void operator()(const isf::uniform_input&) noexcept + { + // UBO inputs are bound from an upstream Buffer port; they do not + // contribute to the material UBO size. + } + void operator()(const isf::texture_input in) noexcept { } void operator()(const isf::csf_image_input& in) noexcept { - if(in.access.contains("write")) - { - (*this)(isf::point2d_input{}); - (*this)(isf::long_input{}); - } + // isf_input_port_vis does NOT write anything into the material blob for + // write csf_image inputs (its point2d/long synthesis is commented out), so + // reserve nothing here — keep the size visitor and the port visitor (and + // hence the generated uniform block) in agreement. } void operator()(const isf::geometry_input& in) noexcept diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/ImageNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/ImageNode.cpp index d7d6608329..8906d14a3f 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/ImageNode.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/ImageNode.cpp @@ -9,6 +9,7 @@ #include #endif +#include #include #include #include @@ -207,6 +208,11 @@ void ImagesNode::process(Message&& msg) case 5: // Images { + // getImages() acquires every image from Gfx::ImageCache (refcount + // bumped per image). Without a matching release on the no-change + // branch below, the cache refcount accumulated by one acquire per + // re-emit of the same control value — long sessions that re-fed + // the same image list every tick bled cache memory until quit. auto new_images = Gfx::getImages(*val, this->ctx); auto diff = [](const score::gfx::Image& lhs, const score::gfx::Image& rhs) { return lhs.path != rhs.path; @@ -245,6 +251,14 @@ void ImagesNode::process(Message&& msg) ++this->imagesChanged; } + else + { + // Same image set as before — release the freshly-acquired + // copy so the cache refcount returns to baseline. Without + // this, every re-emit on the same control value bumped + // ImageCache::m_refcounts by one per image and never paired. + Gfx::releaseImages(new_images); + } break; } @@ -381,13 +395,13 @@ class ImagesNode::PreloadedRenderer : public GenericNodeRenderer } TextureRenderTarget renderTargetForInput(const Port& p) override { return {}; } - void init(RenderList& renderer, QRhiResourceUpdateBatch& res) override + void initState(RenderList& renderer, QRhiResourceUpdateBatch& res) override { auto& n = static_cast(this->node); const auto& rs = renderer.state; - const Mesh& mesh = renderer.defaultQuad(); + m_mesh = &renderer.defaultQuad(); - defaultMeshInit(renderer, mesh, res); + defaultMeshInit(renderer, *m_mesh, res); processUBOInit(renderer); m_material.init(renderer, node.input, m_samplers); @@ -398,9 +412,15 @@ class ImagesNode::PreloadedRenderer : public GenericNodeRenderer recreateTextures(rhi); tile = n.tileMode; + + // Compile shaders for the "single" case std::tie(m_vertexS, m_fragmentS) = score::gfx::makeShaders( rs, images_single_vertex_shader, images_single_fragment_shader); + // Compile shaders for the "tiled" case + std::tie(m_tiledVertexS, m_tiledFragmentS) = score::gfx::makeShaders( + rs, images_tiled_vertex_shader, images_tiled_fragment_shader); + // Create the sampler in which we are going to put the texture { auto sampler = createSampler(tile, rhi); @@ -408,34 +428,62 @@ class ImagesNode::PreloadedRenderer : public GenericNodeRenderer m_samplers.push_back({sampler, tex}); } - // Initialize the passes for the "single" case - defaultPassesInit(renderer, mesh); + m_initialized = true; + } - // Initialize the passes for the "tiled" case + void addOutputPass( + RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) override + { + if(!m_mesh) + return; + if(this->node.output[0]->type != score::gfx::Types::Image) + return; + + auto rt = renderer.renderTargetForOutput(edge); + if(rt.renderTarget) { - auto [v, f] = score::gfx::makeShaders( - rs, images_tiled_vertex_shader, images_tiled_fragment_shader); - for(Edge* edge : this->node.output[0]->edges) + // Pass for the "single" case { - auto rt = renderer.renderTargetForOutput(*edge); - if(rt.renderTarget) - { - m_altPasses.emplace_back( - edge, score::gfx::buildPipeline( - renderer, mesh, v, f, rt, m_processUBO, m_material.buffer, - m_samplers)); - } + auto pip = score::gfx::buildPipeline( + renderer, *m_mesh, m_vertexS, m_fragmentS, rt, m_processUBO, + m_material.buffer, m_samplers); + if(pip.pipeline) + m_p.emplace_back(&edge, Pass{rt, pip, nullptr}); + } + + // Pass for the "tiled" case + { + auto pip = score::gfx::buildPipeline( + renderer, *m_mesh, m_tiledVertexS, m_tiledFragmentS, rt, m_processUBO, + m_material.buffer, m_samplers); + if(pip.pipeline) + m_altPasses.emplace_back(&edge, Pass{rt, pip, nullptr}); } } } + void removeOutputPass(RenderList& renderer, Edge& edge) override + { + // Remove from the single passes + GenericNodeRenderer::removeOutputPass(renderer, edge); + + // Remove from the tiled passes + auto it + = ossia::find_if(m_altPasses, [&](const auto& p) { return p.first == &edge; }); + if(it != m_altPasses.end()) + { + it->second.release(); + m_altPasses.erase(it); + } + } + void update(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) override { auto& n = (static_cast(this->node)); if(n.tileMode != tile) { tile = n.tileMode; - auto [s, tex] = m_samplers[0]; + auto [s, tex, fb_] = m_samplers[0]; m_samplers.clear(); // Create a new sampler @@ -445,7 +493,7 @@ class ImagesNode::PreloadedRenderer : public GenericNodeRenderer // Replace it in the render passes auto replace_sampler = [](PassMap& passes, QRhiSampler* oldS, QRhiSampler* newS) { for(auto& pass : passes) - score::gfx::replaceSampler(*pass.second.srb, oldS, newS); + score::gfx::replaceSampler(*pass.second.p.srb, oldS, newS); }; replace_sampler(m_p, s, new_sampler); @@ -539,7 +587,7 @@ class ImagesNode::PreloadedRenderer : public GenericNodeRenderer auto replace_texture = [](PassMap& passes, QRhiSampler* sampler, QRhiTexture* tex) { for(auto& pass : passes) - score::gfx::replaceTexture(*pass.second.srb, sampler, tex); + score::gfx::replaceTexture(*pass.second.p.srb, sampler, tex); }; currentImageIndex = imageIndex(n.ubo.currentImageIndex, m_textures.size()); @@ -639,6 +687,7 @@ class ImagesNode::PreloadedRenderer : public GenericNodeRenderer { res.updateDynamicBuffer(m_material.buffer, 0, m_material.size, &m_ubo); } + materialChanged = false; } } @@ -651,7 +700,7 @@ class ImagesNode::PreloadedRenderer : public GenericNodeRenderer defaultRenderPass(renderer, mesh, cb, edge, m_altPasses); } - void release(RenderList& r) override + void releaseState(RenderList& r) override { for(auto tex : m_textures) { @@ -659,17 +708,17 @@ class ImagesNode::PreloadedRenderer : public GenericNodeRenderer } m_textures.clear(); - defaultRelease(r); + for(auto& pass : m_altPasses) + pass.second.release(); + m_altPasses.clear(); - { - for(auto& pass : m_altPasses) - pass.second.release(); - m_altPasses.clear(); - } + GenericNodeRenderer::releaseState(r); } struct ImagesNode::UBO m_ubo; - ossia::small_vector, 2> m_altPasses; + QShader m_tiledVertexS; + QShader m_tiledFragmentS; + ossia::small_vector, 2> m_altPasses; std::vector m_textures; bool m_uploaded = false; }; @@ -755,9 +804,9 @@ class ImagesNode::OnTheFlyRenderer : public GenericNodeRenderer if(rt.renderTarget) { m_altPasses.emplace_back( - edge, score::gfx::buildPipeline( + edge, Pass{rt, score::gfx::buildPipeline( renderer, mesh, v, f, rt, m_processUBO, m_material.buffer, - m_samplers)); + m_samplers), nullptr}); } } } @@ -770,7 +819,7 @@ class ImagesNode::OnTheFlyRenderer : public GenericNodeRenderer if(n.tileMode != tile) { tile = n.tileMode; - auto [s, tex] = m_samplers[0]; + auto [s, tex, fb_] = m_samplers[0]; m_samplers.clear(); @@ -781,7 +830,7 @@ class ImagesNode::OnTheFlyRenderer : public GenericNodeRenderer // Replace it in the render passes auto replace_sampler = [](PassMap& passes, QRhiSampler* oldS, QRhiSampler* newS) { for(auto& pass : passes) - score::gfx::replaceSampler(*pass.second.srb, oldS, newS); + score::gfx::replaceSampler(*pass.second.p.srb, oldS, newS); }; replace_sampler(m_p, s, new_sampler); @@ -803,7 +852,7 @@ class ImagesNode::OnTheFlyRenderer : public GenericNodeRenderer auto replace_texture = [](PassMap& passes, QRhiSampler* sampler, QRhiTexture* tex) { for(auto& pass : passes) - score::gfx::replaceTexture(*pass.second.srb, sampler, tex); + score::gfx::replaceTexture(*pass.second.p.srb, sampler, tex); }; auto sampler = m_samplers[0].sampler; @@ -854,7 +903,7 @@ class ImagesNode::OnTheFlyRenderer : public GenericNodeRenderer } struct ImagesNode::UBO m_prev_ubo; - ossia::small_vector, 2> m_altPasses; + ossia::small_vector, 2> m_altPasses; QRhiTexture* m_texture{}; bool m_uploaded = false; }; @@ -929,10 +978,10 @@ class FullScreenImageNode::Renderer : public GenericNodeRenderer ~Renderer() { } TextureRenderTarget renderTargetForInput(const Port& p) override { return {}; } - void init(RenderList& renderer, QRhiResourceUpdateBatch& res) override + void initState(RenderList& renderer, QRhiResourceUpdateBatch& res) override { - const auto& mesh = renderer.defaultTriangle(); - defaultMeshInit(renderer, mesh, res); + m_mesh = &renderer.defaultTriangle(); + defaultMeshInit(renderer, *m_mesh, res); processUBOInit(renderer); m_material.init(renderer, node.input, m_samplers); std::tie(m_vertexS, m_fragmentS) = score::gfx::makeShaders( @@ -962,7 +1011,7 @@ class FullScreenImageNode::Renderer : public GenericNodeRenderer m_samplers.push_back({sampler, m_texture}); } - defaultPassesInit(renderer, mesh); + m_initialized = true; } void update(RenderList& renderer, QRhiResourceUpdateBatch& res, score::gfx::Edge* edge) @@ -985,12 +1034,15 @@ class FullScreenImageNode::Renderer : public GenericNodeRenderer defaultRenderPass(renderer, mesh, cb, edge); } - void release(RenderList& r) override + void releaseState(RenderList& r) override { - m_texture->deleteLater(); - m_texture = nullptr; + if(m_texture) + { + m_texture->deleteLater(); + m_texture = nullptr; + } - defaultRelease(r); + GenericNodeRenderer::releaseState(r); } QRhiTexture* m_texture{}; diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/IsfBindingsBuilder.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/IsfBindingsBuilder.cpp new file mode 100644 index 0000000000..40f882e04c --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/IsfBindingsBuilder.cpp @@ -0,0 +1,1016 @@ +#include "IsfBindingsBuilder.hpp" + +#include +#include +#include +#include +#include + +#include + +namespace score::gfx +{ + +// Centralized GLSL type → size table; see header comment for conventions. +int64_t glslTypeSizeBytes(std::string_view type) noexcept +{ + if(type == "float" || type == "int" || type == "uint" || type == "bool") + return 4; + if(type == "vec2" || type == "ivec2" || type == "uvec2") + return 8; + if(type == "vec3" || type == "ivec3" || type == "uvec3") + return 12; + if(type == "vec4" || type == "ivec4" || type == "uvec4") + return 16; + if(type == "mat2") + return 16; + if(type == "mat3") + return 48; + if(type == "mat4") + return 64; + return 16; +} + +int64_t std430ArrayStride(std::string_view type) noexcept +{ + // std430 keeps the vec4-aligned base alignment for vec3 array elements, + // so the per-element stride is 16 (4 bytes of trailing padding). Every + // other primitive shrinks to its packed size in std430. + if(type == "vec3" || type == "ivec3" || type == "uvec3") + return 16; + return glslTypeSizeBytes(type); +} + +} + +namespace score::gfx +{ + +int64_t std430LayoutSize( + const std::vector& layout) noexcept +{ + int64_t sz = 0; + for(const auto& f : layout) + { + auto type = f.type; + int64_t count = 1; + auto lbr = type.find('['); + if(lbr != std::string::npos) + { + auto rbr = type.find(']', lbr + 1); + if(rbr != std::string::npos && rbr > lbr + 1) + { + auto inner = type.substr(lbr + 1, rbr - lbr - 1); + if(!inner.empty()) + { + try { count = std::stoll(inner); } catch(...) { count = 1; } + } + // else: empty '[]' means runtime-length — counted as 1 element for + // sizing the fixed part of the struct; the renderer sizes the buffer + // based on actual data. + } + type = type.substr(0, lbr); + } + int64_t element = glslTypeSizeBytes(type); + // std430: elements align to 16 bytes for vec3/mat arrays; keep it simple + // and align each field to 16 bytes to match the CSF renderer's convention. + element = (element + 15) & ~15; + sz += element * count; + } + if(sz == 0) + sz = 16; + return sz; +} + +int64_t glslTypeSizeBytes(std::string_view type, const isf::descriptor& d) noexcept +{ + // Built-in primitives go through the authoritative size table. + if(type == "float" || type == "int" || type == "uint" || type == "bool") + return 4; + if(type == "vec2" || type == "ivec2" || type == "uvec2") + return 8; + if(type == "vec3" || type == "ivec3" || type == "uvec3") + return 12; + if(type == "vec4" || type == "ivec4" || type == "uvec4") + return 16; + if(type == "mat2") return 16; + if(type == "mat3") return 48; + if(type == "mat4") return 64; + + // User-defined struct from the descriptor's TYPES section. We sum + // each field's natural size (no per-field 16-byte padding) so the + // result matches the actual GLSL std430 size of the emitted struct + // for scalar/vector-only layouts. This is what producers compare + // against when binding a struct-typed ATTRIBUTE (the AUXILIARY path + // uses `std430LayoutSize` instead, which over-pads each field for + // legacy reasons). For mixed-alignment layouts the producer should + // populate `element_byte_size` explicitly; the runtime trusts that + // value over this estimate. + for(const auto& tdef : d.types) + { + if(tdef.name != type) + continue; + int64_t sz = 0; + for(const auto& f : tdef.layout) + { + auto fty = f.type; + int64_t count = 1; + auto lbr = fty.find('['); + if(lbr != std::string::npos) + { + auto rbr = fty.find(']', lbr + 1); + if(rbr != std::string::npos && rbr > lbr + 1) + { + auto inner = fty.substr(lbr + 1, rbr - lbr - 1); + if(!inner.empty()) + { + try { count = std::stoll(inner); } catch(...) { count = 1; } + } + } + fty = fty.substr(0, lbr); + } + sz += glslTypeSizeBytes(fty) * count; + } + return sz > 0 ? sz : 16; + } + + // Unknown — match the lenient default of the no-descriptor overload. + return 16; +} + +int64_t std430ArrayStride(std::string_view type, const isf::descriptor& d) noexcept +{ + // Only built-in vec3 needs the std430 padding promotion; user-defined + // structs already pad their fields at declaration time and their array + // stride is just the struct's std430 size. + if(type == "vec3" || type == "ivec3" || type == "uvec3") + return 16; + return glslTypeSizeBytes(type, d); +} + +} + +namespace +{ +// Internal alias for the existing AUXILIARY size sites that imported the old +// name from this translation unit; defer to the public helper. +inline int64_t isf_ssbo_elem_size( + const std::vector& layout) noexcept +{ + return score::gfx::std430LayoutSize(layout); +} +} + +namespace score::gfx +{ + +QRhiShaderResourceBinding::StageFlags visibilityToStages(std::string_view v) noexcept +{ + using Stage = QRhiShaderResourceBinding; + if(v == "fragment") + return Stage::FragmentStage; + if(v == "vertex") + return Stage::VertexStage; + if(v == "vertex+fragment" || v == "both" || v == "graphics" || v == "all") + return Stage::VertexStage | Stage::FragmentStage; + if(v == "compute") + return Stage::ComputeStage; + if(v == "none") + return {}; + // Default fallback: fragment visibility (matches the default in isf.hpp). + return Stage::FragmentStage; +} + +void collectGraphicsStorageResources( + const isf::descriptor& desc, int firstBinding, GraphicsStorageResources& out) +{ + out.ssbos.clear(); + out.images.clear(); + out.indirectDrawBuffer = nullptr; + out.indirectDrawIndexed = false; + out.indirectDrawSsboIndex = -1; + + int binding = firstBinding; + + // walk_descriptor_inputs() advances port_idx in lockstep with + // isf_input_port_vis (ISFNode.cpp / ISFVisitors.hpp). Pre-refactor, this + // function had its own bookkeeping that did `port_idx++` for every + // desc.inputs entry — wrong for write-only storage_input (no input port + // unless flex-array sizing) and for write-only csf_image_input (no + // input port at all). Now port_idx == cur.inlets, which matches the + // actual ports created by ISFNode. + walk_descriptor_inputs( + desc, [&](const isf::input& inp, const port_counts& cur, const port_counts&) { + const int port_idx = cur.inlets; + if(auto* s = ossia::get_if(&inp.data)) + { + // Indirect-draw argument buffers don't need a shader-visible binding + // (the GPU reads them via cb.drawIndirect), but we still track them to + // refresh pointers from upstream ports. + if(!s->buffer_usage.empty()) + { + GraphicsSSBO e; + e.name = inp.name; + e.access = s->access; + e.buffer_usage = s->buffer_usage; + e.persistent = false; + e.owned = false; // Pointer comes from upstream + e.layout = s->layout; + e.stages = QRhiShaderResourceBinding::StageFlags{}; // No shader binding + e.binding = -1; + // Only read-only indirect-draw buffers come from an upstream + // input port; write variants are produced by an output port. + e.input_port_index = (s->access == "read_only") ? port_idx : -1; + out.ssbos.push_back(std::move(e)); + out.indirectDrawSsboIndex = (int)out.ssbos.size() - 1; + out.indirectDrawIndexed = (s->buffer_usage == "indirect_draw_indexed"); + return; + } + auto stages = visibilityToStages(s->visibility); + if(stages == QRhiShaderResourceBinding::StageFlags{}) + return; + GraphicsSSBO e; + e.name = inp.name; + e.access = s->access; + e.persistent = s->persistent; + e.owned = true; + e.size = isf_ssbo_elem_size(s->layout); + e.layout = s->layout; + e.stages = stages; + e.binding = binding++; + // Only read-only storage_inputs have a matching input port; write + // variants put the buffer on an OUTPUT port (no upstream rebind). + e.input_port_index = (s->access == "read_only") ? port_idx : -1; + if(s->persistent) + e.prev_binding = binding++; + out.ssbos.push_back(std::move(e)); + } + else if(auto* img = ossia::get_if(&inp.data)) + { + auto stages = visibilityToStages(img->visibility); + if(stages == QRhiShaderResourceBinding::StageFlags{} + || stages == QRhiShaderResourceBinding::ComputeStage) + return; + GraphicsStorageImage e; + e.name = inp.name; + e.access = img->access; + e.format = img->format; + e.is3D = img->is3D(); + // Cubemap / array shape flags must propagate from the parser to + // the runtime allocator AND to the GLSL emit; otherwise the + // descriptor type bound at SRB-create disagrees with the GLSL + // declaration (parser accepts CUBEMAP / IS_ARRAY at isf.cpp:1411 + // / :1426 but earlier versions of this collector kept only is3D, + // forcing the allocator into a flat 2D texture and the emit into + // `image2D`, triggering Vulkan VUID-VkGraphicsPipelineCreateInfo- + // layout-07990 at pipeline build). + e.cubemap = img->isCube(); + e.is_array = img->is_array; + e.persistent = img->persistent; + if(e.is3D && !img->depth_expression.empty()) + { + try + { + e.depth = std::stoi(img->depth_expression); + } + catch(...) + { + // Non-literal expression (e.g. "$DEPTH"): leave 0, use default at alloc time + } + } + if(e.is_array && !img->layers_expression.empty()) + { + try + { + e.layers = std::stoi(img->layers_expression); + } + catch(...) + { + // Non-literal expression (e.g. "$LAYERS"): leave 0; allocator picks default + } + } + e.owned = true; + e.stages = stages; + e.binding = binding++; + // Only read-only csf_image_inputs have a matching input port. + e.input_port_index = (img->access == "read_only") ? port_idx : -1; + if(img->persistent) + e.prev_binding = binding++; + out.images.push_back(std::move(e)); + } + else if(auto* uni = ossia::get_if(&inp.data)) + { + auto stages = visibilityToStages(uni->visibility); + if(stages == QRhiShaderResourceBinding::StageFlags{} + || stages == QRhiShaderResourceBinding::ComputeStage) + return; + GraphicsUBO e; + e.name = inp.name; + e.owned = false; // sourced from upstream port each frame + e.stages = stages; + e.binding = binding++; + e.input_port_index = port_idx; + out.ubos.push_back(std::move(e)); + } + }); +} + +// --- SSBO allocation ------------------------------------------------------ + +static QRhiBuffer* allocateSsbo( + QRhi& rhi, const std::string& name, const std::string& buffer_usage, + int64_t size) +{ + QRhiBuffer::UsageFlags flags = QRhiBuffer::StorageBuffer; +#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0) + if(buffer_usage == "indirect_draw" || buffer_usage == "indirect_draw_indexed") + flags = flags | QRhiBuffer::IndirectBuffer; +#else + (void)buffer_usage; +#endif + auto* buf = rhi.newBuffer(QRhiBuffer::Static, flags, size); + buf->setName(QByteArray("ISF_SSBO_") + name.c_str()); + if(!buf->create()) + { + qWarning() << "Failed to create SSBO" << name.c_str(); + delete buf; + return nullptr; + } + return buf; +} + +static QRhiTexture::Format parseImageFormat(const std::string& fmt) +{ + std::string f = fmt; + for(auto& c : f) c = (char)std::tolower((unsigned char)c); + if(f == "rgba8") return QRhiTexture::RGBA8; + if(f == "bgra8") return QRhiTexture::BGRA8; + if(f == "r8") return QRhiTexture::R8; + if(f == "rg8") return QRhiTexture::RG8; + if(f == "r16") return QRhiTexture::R16; + if(f == "rg16") return QRhiTexture::RG16; + if(f == "r16f") return QRhiTexture::R16F; + if(f == "r32f") return QRhiTexture::R32F; +// if(f == "rg16f") return QRhiTexture::RG16F; +// if(f == "rg32f") return QRhiTexture::RG32F; + if(f == "rgba16f") return QRhiTexture::RGBA16F; + if(f == "rgba32f") return QRhiTexture::RGBA32F; + + // Integer storage image formats — required for atomic image ops + // (imageAtomicOr / Add / Min / Max / Exchange / CompareExchange). + // Aliasing an integer SPIR-V OpTypeImage Format operand onto a float + // QRhiTexture::Format violates VUID-RuntimeSpirv-OpTypeImage-07752 + // and VUID-RuntimeSpirv-OpImageWrite-04469 (numeric-class mismatch + // between Sampled operand and the bound storage image's format). + // Mirror RenderedCSFNode.cpp's pattern: gate on Qt 6.10+ (when + // QRhiTexture exposed R{8,32}{UI,SI} and {RG,RGBA}{32}{UI,SI}). +#if QT_VERSION >= QT_VERSION_CHECK(6, 10, 0) + if(f == "r8ui") return QRhiTexture::R8UI; + if(f == "r32ui") return QRhiTexture::R32UI; + if(f == "rg32ui") return QRhiTexture::RG32UI; + if(f == "rgba32ui") return QRhiTexture::RGBA32UI; + if(f == "r8si" || f == "r8i") return QRhiTexture::R8SI; + if(f == "r32si" || f == "r32i") return QRhiTexture::R32SI; + if(f == "rg32si") return QRhiTexture::RG32SI; + if(f == "rgba32si") return QRhiTexture::RGBA32SI; +#endif + // r16ui / r16si / r16i are not exposed by QRhiTexture::Format at all, + // and on older Qt the wider integer formats are also absent. Refuse + // the binding rather than silently aliasing onto a float/UNORM format + // — atomics and integer image ops have undefined behaviour otherwise. + if(f == "r8ui" || f == "r8si" || f == "r8i" + || f == "r16ui" || f == "r16si" || f == "r16i" + || f == "r32ui" || f == "r32si" || f == "r32i" + || f == "rg32ui" || f == "rg32si" + || f == "rgba32ui" || f == "rgba32si") + { + qWarning() << "ISF storage image format" << fmt.c_str() + << "requires Qt 6.10+ integer formats; refusing binding " + "(VUID-RuntimeSpirv-OpTypeImage-07752)."; + return QRhiTexture::UnknownFormat; + } + return QRhiTexture::RGBA8; +} + +// Sentinel zero-buffer used when an upstream SSBO/UBO port disconnects +// mid-session. Vulkan requires every SRB binding to point at a valid +// resource — without a sentinel, a disconnect leaves the binding +// pointing at a deleteLater'd QRhiBuffer (the prior upstream's, freed +// when its owning node was destroyed), and the next setShaderResources +// dereferences the freed pointer. +// +// 64 KiB is generous for any realistic UBO/SSBO layout that a graphics +// shader actually reads from (Vulkan UBO max is at least 16 KiB; SSBOs +// can be larger but disconnect-fallback reads return zeros regardless of +// the buffer's actual size, only its descriptor validity matters). One +// buffer covers both SSBO and UBO disconnects since QRhi accepts both +// usage flags on a single buffer; the descriptor type is set on the +// SRB binding side, not the buffer side. +static constexpr uint32_t kSentinelBufferSize = 64u * 1024u; + +// Allocate (and zero-fill) the sentinel disconnect-fallback buffer. +// Called from ensureStorageResources so the resource-update batch is in +// hand. Idempotent — store.sentinelBuffer is non-null after first call. +static void ensureSentinelBuffer( + QRhi& rhi, QRhiResourceUpdateBatch& res, GraphicsStorageResources& store) +{ + if(store.sentinelBuffer) + return; + auto* buf = rhi.newBuffer( + QRhiBuffer::Static, + QRhiBuffer::StorageBuffer | QRhiBuffer::UniformBuffer, + kSentinelBufferSize); + buf->setName("ISF_SentinelDisconnectBuffer"); + if(!buf->create()) + { + qWarning() << "Failed to create sentinel disconnect buffer"; + delete buf; + return; + } + // Zero-fill so disconnected SSBO/UBO reads return predictable zeros + // rather than uninitialised memory. + static const std::vector zeros(kSentinelBufferSize, 0); + res.uploadStaticBuffer(buf, 0, kSentinelBufferSize, zeros.data()); + store.sentinelBuffer = buf; + store.sentinelSize = kSentinelBufferSize; +} + +void ensureStorageResources( + QRhi& rhi, QRhiResourceUpdateBatch& res, const RenderList& renderer, + const isf::descriptor& /*desc*/, GraphicsStorageResources& store, + QSize renderSize) +{ + // Sentinel disconnect-fallback buffer: only allocate when the node has + // at least one upstream-bound SSBO or UBO. ensureSentinelBuffer is + // idempotent, so subsequent calls (per-frame ensure passes) are + // no-ops once the sentinel exists. Allocating here (rather than + // lazily inside bindUpstreamBuffers) lets us fold the zero-fill upload + // into the same resource-update batch as the rest of the storage + // initialisation, instead of needing a per-call res in the bind path. + bool needsSentinel = false; + for(const auto& s : store.ssbos) + if(s.input_port_index >= 0) { needsSentinel = true; break; } + if(!needsSentinel) + for(const auto& u : store.ubos) + if(u.input_port_index >= 0) { needsSentinel = true; break; } + if(needsSentinel) + ensureSentinelBuffer(rhi, res, store); + // SSBOs + for(auto& e : store.ssbos) + { + // owned==false: buffer comes from upstream, nothing to allocate here. + // size derived from layout when persistent; otherwise the user sets + // it externally (typically matching upstream geometry). + if(!e.owned) + continue; + int64_t target_size = e.size > 0 ? e.size : 16; + if(!e.buffer) + { + e.buffer = allocateSsbo(rhi, e.name, e.buffer_usage, target_size); + // Zero-fill the placeholder. Vulkan does NOT initialise VkBuffer + // memory; on a fresh RenderList the new placeholder lands on a + // device-memory page with whatever the previous owner left there. + // For shader inputs that have no producer in the user's graph + // (e.g. cluster_light_counts / cluster_light_lists when no + // clustered-lighting compute pass is wired) this placeholder IS + // the buffer the shader reads from — and the read returns + // device-memory garbage (e.g. a huge cluster_light_count value + // makes openpbr's light loop iterate thousands of slots, each + // returning garbage indices into scene_lights → wildly different + // colours per resize). Mirrors the sentinel-buffer zero-fill at + // line 432. + if(e.buffer) + RhiClearBuffer::clearBuffer( + rhi, res, e.buffer, 0, (quint32)target_size); + } + if(e.persistent && !e.prev) + { + e.prev = allocateSsbo(rhi, e.name + "_prev", "", target_size); + if(e.prev) + RhiClearBuffer::clearBuffer( + rhi, res, e.prev, 0, (quint32)target_size); + } + } + + // Uniform buffers (UBOs sourced from upstream Buffer ports). The upstream's + // real buffer is swapped in at runtime by bindUpstreamBuffers — but we need + // a valid placeholder allocated here so the SRB binding slot exists at + // pipeline-build time. Without it, Vulkan complains about an invalid + // descriptor for binding N when the shader reads `camera`. + for(auto& e : store.ubos) + { + if(e.buffer) // already borrowed from upstream, or previously allocated + continue; + // 256 bytes covers the camera UBO (240 B) and most other small UBOs. + // If the upstream provides a larger buffer we'll replace this at bind time. + auto* buf = rhi.newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, 256); + buf->setName(QByteArray("ISF_UBO_placeholder_") + e.name.c_str()); + if(!buf->create()) + { + qWarning() << "Failed to create placeholder UBO" << e.name.c_str(); + delete buf; + continue; + } + // Zero-fill the placeholder. Same Vulkan-doesn't-zero-VkBuffers + // rationale as the SSBO placeholder above. UBOs have a smaller + // attack surface (256 B) but a single garbage value here can flip + // a feature bit in scene_counts or fog params, producing the + // openpbr-only intermittent lighting glitch on resize. + RhiClearBuffer::clearBuffer(rhi, res, buf, 0, 256u); + e.buffer = buf; + e.owned = true; // we own this placeholder; bindUpstreamBuffers drops ownership when it swaps. + } + + // Storage images. Allocator must honor every shape flag the parser + // captured (is3D / cubemap / is_array) so the bound texture matches the + // GLSL declaration emitted by isf_emit_image_decl. Cube + array combos + // are rejected at parse time; this code therefore picks one shape via + // priority order: cubemap > 3D > array > 2D. + for(auto& e : store.images) + { + if(!e.owned) + continue; + + QSize sz = renderSize.isValid() ? renderSize : QSize(256, 256); + QRhiTexture::Format fmt = parseImageFormat(e.format); + if(fmt == QRhiTexture::UnknownFormat) + continue; // parseImageFormat already warned + QRhiTexture::Flags flags = QRhiTexture::UsedWithLoadStore; + if(e.is3D) + flags |= QRhiTexture::ThreeDimensional; + if(e.cubemap) + flags |= QRhiTexture::CubeMap; + if(e.is_array) + flags |= QRhiTexture::TextureArray; + + // Cubes use the size-only newTexture overload; QRhi infers face_count=6 + // from the CubeMap flag. width must equal height (cube face is square) + // — we size both axes to the smaller of renderSize for safety. + if(e.cubemap) + { + const int edge = std::min(sz.width(), sz.height()); + sz = QSize(edge, edge); + } + const int arrayLayers = e.layers > 0 ? e.layers : 4; // matches doc default + + auto make_tex = [&](const char* suffix) -> QRhiTexture* { + QRhiTexture* t = nullptr; + if(e.cubemap) + t = rhi.newTexture(fmt, sz, 1, flags); + else if(e.is3D) + t = rhi.newTexture( + fmt, sz.width(), sz.height(), + e.depth > 0 ? e.depth : 16, 1, flags); + else if(e.is_array) + t = rhi.newTextureArray(fmt, arrayLayers, sz, 1, flags); + else + t = rhi.newTexture(fmt, sz, 1, flags); + t->setName( + QByteArray("ISF_StorageImage_") + e.name.c_str() + suffix); + if(!t->create()) + { + qWarning() << "Failed to create storage image" << e.name.c_str() << suffix; + delete t; + return nullptr; + } + return t; + }; + + if(!e.texture) + e.texture = make_tex(""); + if(e.persistent && !e.prev) + e.prev = make_tex("_prev"); + } +} + +QVarLengthArray buildExtraBindings( + const GraphicsStorageResources& store) +{ + QVarLengthArray out; + + for(const auto& e : store.ssbos) + { + if(!e.buffer || e.binding < 0) + continue; + + const auto stages = e.stages; + if(stages == QRhiShaderResourceBinding::StageFlags{}) + continue; + + if(e.access == "read_only") + { + out.append(QRhiShaderResourceBinding::bufferLoad(e.binding, stages, e.buffer)); + } + else if(e.access == "write_only") + { + out.append(QRhiShaderResourceBinding::bufferStore(e.binding, stages, e.buffer)); + } + else + { + out.append(QRhiShaderResourceBinding::bufferLoadStore(e.binding, stages, e.buffer)); + } + + if(e.persistent && e.prev && e.prev_binding >= 0) + { + out.append( + QRhiShaderResourceBinding::bufferLoad(e.prev_binding, stages, e.prev)); + } + } + + for(const auto& e : store.images) + { + if(!e.texture || e.binding < 0) + continue; + const auto stages = e.stages; + if(stages == QRhiShaderResourceBinding::StageFlags{}) + continue; + + if(e.access == "read_only") + out.append(QRhiShaderResourceBinding::imageLoad(e.binding, stages, e.texture, 0)); + else if(e.access == "write_only") + out.append(QRhiShaderResourceBinding::imageStore(e.binding, stages, e.texture, 0)); + else + out.append(QRhiShaderResourceBinding::imageLoadStore(e.binding, stages, e.texture, 0)); + + if(e.persistent && e.prev && e.prev_binding >= 0) + { + out.append( + QRhiShaderResourceBinding::imageLoad(e.prev_binding, stages, e.prev, 0)); + } + } + + for(const auto& e : store.ubos) + { + if(!e.buffer || e.binding < 0) + continue; + const auto stages = e.stages; + if(stages == QRhiShaderResourceBinding::StageFlags{}) + continue; + out.append(QRhiShaderResourceBinding::uniformBuffer(e.binding, stages, e.buffer)); + } + + return out; +} + +void bindUpstreamBuffers( + RenderList& renderer, const std::vector& inputPorts, + GraphicsStorageResources& store, + QRhiShaderResourceBindings* srb) +{ + // Upstream renderers (halp-based nodes like ExtractBuffer2, RenderedCSFNode, + // ScenePreprocessorNode aux extractors, ...) publish their output buffer via + // the virtual NodeRenderer::bufferForOutput() — never by writing + // Port::value. RenderList::bufferForInput(edge) is the right lookup: it + // resolves the source node's renderer and calls bufferForOutput on it. + auto fetchUpstream = [&](Port* port) -> QRhiBuffer* { + for(Edge* edge : port->edges) + { + if(!edge || !edge->source) + continue; + if(edge->source->type != Types::Buffer) + continue; + if(auto view = renderer.bufferForInput(*edge); view.handle) + return view.handle; + } + return nullptr; + }; + // For each SSBO that has an input_port_index and is either read-only or an + // indirect-draw buffer, try to fetch the buffer from the upstream port. + for(auto& e : store.ssbos) + { + if(e.input_port_index < 0) + continue; + if(e.input_port_index >= (int)inputPorts.size()) + continue; + + Port* port = inputPorts[e.input_port_index]; + if(!port) + continue; + + // Only ports of Type::Buffer carry SSBO pointers. + if(port->type != Types::Buffer) + continue; + + if(auto* buf = fetchUpstream(port)) + { + if(buf == e.buffer) + continue; // unchanged — nothing to do + + if(!e.owned) + { + e.buffer = buf; + if(srb && e.binding >= 0) + replaceBuffer(*srb, e.binding, buf); + } + else if(e.access == "read_only") + { + if(e.owned && e.buffer) + e.buffer->deleteLater(); + e.owned = false; + e.buffer = buf; + if(srb && e.binding >= 0) + replaceBuffer(*srb, e.binding, buf); + } + } + else if(!e.owned && store.sentinelBuffer && !port->edges.empty()) + { + // Disconnect: we were borrowing an upstream buffer (!e.owned), the + // user had wired the port (port->edges non-empty), and the upstream + // is now gone (fetchUpstream returned nullptr). The prior upstream's + // QRhiBuffer was deleteLater'd when its node tore down, so the SRB + // binding now points at a dangling pointer. Adopt the sentinel + // zero-buffer so reads return zeros and the descriptor remains + // valid (Vulkan validation requires a live resource at every + // binding slot). Stays !owned — sentinel lifetime is owned by + // GraphicsStorageResources::release(). + // + // The port->edges.empty() guard is critical for entries that are + // bound from the upstream geometry's auxiliary_buffers list (the + // pattern ScenePreprocessor uses for scene_lights / world_transforms + // / per_draws / scene_materials / scene_counts / scene_light_indices + // / camera UBO / env UBO into flattened-scene shaders). Those have + // input_port_index >= 0 but no port edges — bindUpstreamBuffersFrom- + // Geometry restores the binding immediately after this function. + // Without the guard, the sentinel temporarily clobbered them and + // (worse) flipped their state in a way that confused subsequent + // frames. + if(e.buffer != store.sentinelBuffer) + { + e.buffer = store.sentinelBuffer; + if(srb && e.binding >= 0) + replaceBuffer(*srb, e.binding, store.sentinelBuffer); + } + } + } + + // UBOs: borrow the upstream buffer when one is published on the Buffer port. + // If the SRB is provided, patch its binding to point at the new buffer so + // the draw call binds the right descriptor. A per-frame "placeholder" UBO + // was allocated in ensureStorageResources so the binding slot exists even + // when no upstream is connected. + bool ubo_srb_changed = false; + for(auto& e : store.ubos) + { + if(e.input_port_index < 0) + continue; + if(e.input_port_index >= (int)inputPorts.size()) + continue; + Port* port = inputPorts[e.input_port_index]; + if(!port || port->type != Types::Buffer) + continue; + QRhiBuffer* found = fetchUpstream(port); + if(found == e.buffer) + continue; // unchanged — nothing to do + + if(found) + { + // An upstream is now providing a different buffer than what's currently + // bound. Drop any placeholder we owned and retarget the binding. + if(e.owned && e.buffer) + e.buffer->deleteLater(); + e.owned = false; + e.buffer = found; + + if(srb && e.binding >= 0) + { + replaceBuffer(*srb, e.binding, found); + ubo_srb_changed = true; + } + } + else if(!e.owned && store.sentinelBuffer && !port->edges.empty()) + { + // Disconnect path mirroring the SSBO loop above: the upstream UBO + // went away (e.g. its producer node was deleted), and we were + // borrowing its buffer. Bind the sentinel so the SRB descriptor + // stays valid; reads return predictable zeros. Note that any + // owned placeholder allocated in ensureStorageResources is kept + // — we don't destroy it here, since the next reconnect will adopt + // the new upstream and we'd just have to re-create the + // placeholder. The sentinel takeover is transient. + // + // The port->edges.empty() guard mirrors the SSBO branch above: + // entries bound via the geometry name-match path (the camera UBO + // and env UBO from ScenePreprocessor) have no port edges; the + // sentinel must not fire for them — bindUpstreamBuffersFrom- + // Geometry restores them immediately after this function returns. + if(e.buffer != store.sentinelBuffer) + { + e.buffer = store.sentinelBuffer; + if(srb && e.binding >= 0) + { + replaceBuffer(*srb, e.binding, store.sentinelBuffer); + ubo_srb_changed = true; + } + } + } + } + // No trailing srb->create() — replaceBuffer() now uses the + // updateResources() fast path, which already rebuilds the backend + // descriptor set. Re-creating here would tear down the pool slot + // we just refreshed. + (void)ubo_srb_changed; +} + +void bindUpstreamImagesFromGeometry( + GraphicsStorageResources& store, const ossia::geometry& geometry, + QRhiShaderResourceBindings* srb) +{ + // Symmetric to bindUpstreamBuffers' read-only SSBO branch, but for + // storage images. When a downstream csf_image_input is read_only and the + // upstream geometry publishes a storage image with the same name on its + // auxiliary_textures list (e.g. an upstream CSF or RawRaster wrote to it + // via csf_image_input ACCESS:write_only / read_write), swap our + // texture pointer to the upstream's published handle and free the + // auto-allocated placeholder. + // + // Without this, every read_only csf_image_input INPUTS reads from its + // OWN zero-initialised texture instead of the upstream's actual contents + // — silently broken. The downstream typically wants imageLoad on the + // upstream's writes (e.g. tile-render output sampled by a composite FS + // via imageLoad rather than texture()). + for(auto& e : store.images) + { + // Only read_only entries can adopt an upstream texture. write_only and + // read_write own their textures (the CSF / RawRaster IS the producer). + if(e.access != "read_only") + continue; + if(e.binding < 0) + continue; + + const auto* aux = geometry.find_auxiliary_texture(e.name); + if(!aux) + continue; // No upstream publishing this name — keep placeholder. + auto* upstream_tex = static_cast(aux->native_handle); + if(!upstream_tex) + continue; + + // Swap the underlying texture pointer when it actually changed — + // first time the upstream connects, or whenever the producer + // reallocates (resize, format change, …). Drop the auto-allocated + // placeholder we owned, adopt the upstream handle. Mark non-owned + // so later release() / persistent swap don't touch the upstream's + // lifetime. + if(upstream_tex != e.texture) + { + if(e.owned && e.texture) + e.texture->deleteLater(); + e.owned = false; + e.texture = upstream_tex; + } + + // Patch the SRB unconditionally when provided. Lets a multi-pass / + // multi-SRB caller invoke this helper once per SRB without + // re-running the upstream lookup (the early-out above guarantees + // idempotence). Pairs with the m_passes-per-pass loop in + // RenderedRawRasterPipelineNode::update. + if(srb) + replaceTexture(*srb, e.binding, e.texture); + } +} + +void bindUpstreamBuffersFromGeometry( + QRhi& rhi, QRhiResourceUpdateBatch& res, + GraphicsStorageResources& store, const ossia::geometry& geometry, + QRhiShaderResourceBindings* srb) +{ + // SSBO/UBO sibling of bindUpstreamImagesFromGeometry. INPUTS-declared + // storage_input / uniform_input may carry the upstream buffer either via + // a dedicated Buffer port edge (handled by bindUpstreamBuffers) OR + // name-matched against the upstream geometry's auxiliary_buffers list + // — exactly the pattern ScenePreprocessor uses to publish scene_lights / + // world_transforms / per_draws / scene_materials / scene_counts / + // scene_light_indices / camera UBO / env UBO into a flattened scene + // shader (classic_pbr et al.). + // + // Without this name-match path, those bindings stayed at the 16-byte + // placeholder ensureStorageResources allocates for owned SSBOs: + // vertices read pd.transform_slot from a zero PerDraw, multiply by a + // zero world_transforms[0] matrix, collapse to origin → black scene. + // + // `geometry` is already a single ossia::geometry (the caller — typically + // RenderedRawRasterPipelineNode — unwraps from geometry.meshes->meshes[0] + // at the call site). Same convention as bindUpstreamImagesFromGeometry. + const auto& mesh = geometry; + + // Look up the GPU/CPU buffer behind a named aux on the geometry. + // Returns {handle, byte_size, owned?} — owned means we just allocated + + // uploaded a CPU buffer (caller must release the prior owned handle). + struct ResolvedBuffer + { + QRhiBuffer* handle{}; + int64_t byte_size{0}; + bool owned{false}; + }; + auto resolve_aux = [&](const std::string& name, bool is_uniform) -> ResolvedBuffer { + auto* geo_aux = mesh.find_auxiliary(name); + if(!geo_aux || geo_aux->buffer < 0 + || geo_aux->buffer >= (int)mesh.buffers.size()) + return {}; + const auto& geo_buf = mesh.buffers[geo_aux->buffer]; + if(auto* gpu = ossia::get_if(&geo_buf.data)) + { + if(!gpu->handle) + return {}; + return {static_cast(gpu->handle), + geo_aux->byte_size > 0 ? geo_aux->byte_size : gpu->byte_size, + false}; + } + else if(auto* cpu = ossia::get_if(&geo_buf.data)) + { + if(!cpu->raw_data || cpu->byte_size <= 0) + return {}; + const int64_t sz + = geo_aux->byte_size > 0 ? geo_aux->byte_size : cpu->byte_size; + const auto usage + = is_uniform ? QRhiBuffer::UniformBuffer : QRhiBuffer::StorageBuffer; + auto* buf = rhi.newBuffer(QRhiBuffer::Immutable, usage, sz); + buf->setName(QByteArray("ISF_aux_geom_") + name.c_str()); + if(!buf->create()) + { + delete buf; + return {}; + } + res.uploadStaticBuffer(buf, 0, sz, cpu->raw_data.get()); + return {buf, sz, true}; + } + return {}; + }; + + for(auto& e : store.ssbos) + { + if(e.binding < 0) + continue; + // Indirect-draw SSBOs carry no shader binding; handled elsewhere. + if(!e.buffer_usage.empty()) + continue; + auto resolved = resolve_aux(e.name, /*is_uniform=*/false); + if(!resolved.handle || resolved.handle == e.buffer) + continue; + // Drop the prior owned placeholder (or prior owned CPU upload) before + // adopting the new handle. + if(e.owned && e.buffer) + e.buffer->deleteLater(); + e.buffer = resolved.handle; + e.size = resolved.byte_size; + e.owned = resolved.owned; + if(srb) + replaceBuffer(*srb, e.binding, e.buffer); + } + + for(auto& e : store.ubos) + { + if(e.binding < 0) + continue; + auto resolved = resolve_aux(e.name, /*is_uniform=*/true); + if(!resolved.handle || resolved.handle == e.buffer) + continue; + if(e.owned && e.buffer) + e.buffer->deleteLater(); + e.buffer = resolved.handle; + e.owned = resolved.owned; + if(srb) + replaceBuffer(*srb, e.binding, e.buffer); + } +} + +void swapPersistentSSBOsState(GraphicsStorageResources& store) +{ + for(auto& e : store.ssbos) + if(e.persistent && e.buffer && e.prev) + std::swap(e.buffer, e.prev); + for(auto& e : store.images) + if(e.persistent && e.texture && e.prev) + std::swap(e.texture, e.prev); +} + +void reapplyStorageBindings( + const GraphicsStorageResources& store, QRhiShaderResourceBindings& srb) +{ + for(const auto& e : store.ssbos) + { + if(!e.persistent || !e.buffer || !e.prev) + continue; + replaceBuffer(srb, e.binding, e.buffer); + replaceBuffer(srb, e.prev_binding, e.prev); + } + for(const auto& e : store.images) + { + if(!e.persistent || !e.texture || !e.prev) + continue; + replaceTexture(srb, e.binding, e.texture); + replaceTexture(srb, e.prev_binding, e.prev); + } + // No trailing srb.create() — the replace*() helpers use updateResources() + // which already refreshes the backend descriptor state. A create() here + // would re-allocate the descriptor set pool slot and defeat the + // fast-path swap (qrhivulkan.cpp:8707, updateResources). +} + +void swapPersistentSSBOs( + GraphicsStorageResources& store, QRhiShaderResourceBindings& srb) +{ + swapPersistentSSBOsState(store); + reapplyStorageBindings(store, srb); +} + +} diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/IsfBindingsBuilder.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/IsfBindingsBuilder.hpp new file mode 100644 index 0000000000..b933d4ebc7 --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/IsfBindingsBuilder.hpp @@ -0,0 +1,423 @@ +#pragma once + +// Shared infrastructure for binding `storage_input` and `csf_image_input` +// declarations into a graphics pipeline's shader resource bindings. +// +// Mirrors the pattern established by RenderedCSFNode (for compute) but wired +// to Vertex|Fragment stages for ISF / Raw Raster Pipeline / Scene Pass nodes. + +#include +#include + +#include + +#include + +#include +#include +#include +#include + +namespace score::gfx +{ + +/** + * @brief One SSBO attached to a graphics pipeline. + * + * Covers: + * - user-declared storage_input's (shader-visible read-only / read-write SSBOs) + * - persistent ping-pong pairs (current + previous frame) + * - indirect-draw argument buffers (BUFFER_USAGE="indirect_draw") + * - auxiliary buffers that travel with the geometry (forwarded from upstream) + */ +struct GraphicsSSBO +{ + std::string name; //!< Base GLSL identifier (e.g. "particles") + std::string access; //!< "read_only" / "write_only" / "read_write" + std::string buffer_usage;//!< "", "indirect_draw", "indirect_draw_indexed" + bool persistent{false}; //!< Ping-pong swapped every frame + bool owned{true}; //!< This SSBO owns `buffer` and `prev` (releases them) + int64_t size{0}; //!< Buffer size in bytes (0 = auto from layout) + + // Layout fields (for size computation + validation). May be empty for auxiliaries. + std::vector layout; + + // Buffer handles. `buffer` is the currently-written slot (R/W for persistent). + // `prev` is only set when persistent — holds the previous frame's data (R/O). + QRhiBuffer* buffer{}; + QRhiBuffer* prev{}; + + // Resolved SRB binding slots. + int binding{-1}; //!< Binding of `buffer` + int prev_binding{-1}; //!< Binding of `prev` (only set when persistent) + + // Stages that see this binding (fragment / vertex / both). + QRhiShaderResourceBinding::StageFlags stages{}; + + // Optional: indices into the Node's input/output port vectors. -1 = not + // connected to a port (e.g. private aux buffer or persistent-only). + int input_port_index{-1}; + int output_port_index{-1}; +}; + +/** + * @brief One storage image attached to a graphics pipeline. + */ +struct GraphicsStorageImage +{ + std::string name; + std::string access; //!< "read_only" / "write_only" / "read_write" + std::string format; //!< e.g. "rgba8", "r32f", "r32ui" + bool is3D{false}; + bool cubemap{false}; //!< imageCube — 6-layer cubemap storage image + bool is_array{false}; //!< image2DArray — N-layer array texture + bool persistent{false}; //!< Ping-pong two textures swapped every frame + int depth{0}; //!< Explicit Z dimension for 3D textures; 0 = use default (16) + int layers{0}; //!< Layer count for is_array (0 = use parser-supplied default) + + QRhiTexture* texture{}; //!< Current (write / read_write) slot + QRhiTexture* prev{}; //!< Previous frame (read-only); only set when persistent + bool owned{true}; + + int binding{-1}; + int prev_binding{-1}; //!< Binding of `prev` (only set when persistent) + QRhiShaderResourceBinding::StageFlags stages{}; + + int input_port_index{-1}; + int output_port_index{-1}; +}; + +/** + * @brief One UBO sourced from an upstream Buffer port (uniform_input). + * + * Bound via QRhiShaderResourceBinding::uniformBuffer (std140) rather than + * the SSBO bufferLoad/bufferStore used for storage_input. + */ +struct GraphicsUBO +{ + std::string name; + QRhiBuffer* buffer{}; + bool owned{false}; //!< Always false for now: borrowed from upstream. + int binding{-1}; + QRhiShaderResourceBinding::StageFlags stages{}; + int input_port_index{-1}; +}; + +/** + * @brief Aggregate of all graphics-visible storage resources for a node. + */ +struct GraphicsStorageResources +{ + std::vector ssbos; + std::vector images; + std::vector ubos; + + // Quick aliases: first SSBO with BUFFER_USAGE="indirect_draw*". Populated + // by collectGraphicsStorageResources. Updated by callers when the underlying + // SSBO's buffer pointer changes (e.g. when an upstream CSF rebuilds it). + QRhiBuffer* indirectDrawBuffer{}; + bool indirectDrawIndexed{false}; + int indirectDrawSsboIndex{-1}; + + // Sentinel zero-buffer bound when an SSBO/UBO upstream port disconnects + // mid-session. QRhi (especially Vulkan) requires every SRB binding to + // point at a valid resource — without a sentinel, a disconnect leaves + // the binding pointing at a dangling QRhiBuffer* (the prior upstream's + // buffer, which was deleteLater'd when the upstream node was destroyed). + // Lazily allocated on first disconnect, sized to the largest binding + // observed (kSentinelSize). Single buffer reused for both SSBO and UBO + // disconnects since the descriptor type is set on the SRB binding side, + // not the buffer side; QRhi accepts a buffer with both StorageBuffer and + // UniformBuffer usage flags. owned=true; freed in release(). + QRhiBuffer* sentinelBuffer{}; + uint32_t sentinelSize{0}; + + void release() + { + for(auto& s : ssbos) + { + if(s.owned) + { + if(s.buffer) s.buffer->deleteLater(); + if(s.prev) s.prev->deleteLater(); + } + s.buffer = nullptr; + s.prev = nullptr; + } + ssbos.clear(); + + for(auto& i : images) + { + if(i.owned) + { + if(i.texture) i.texture->deleteLater(); + if(i.prev) i.prev->deleteLater(); + } + i.texture = nullptr; + i.prev = nullptr; + } + images.clear(); + + for(auto& u : ubos) + { + if(u.owned && u.buffer) + u.buffer->deleteLater(); + u.buffer = nullptr; + } + ubos.clear(); + + if(sentinelBuffer) + { + sentinelBuffer->deleteLater(); + sentinelBuffer = nullptr; + } + sentinelSize = 0; + + indirectDrawBuffer = nullptr; + indirectDrawSsboIndex = -1; + } +}; + +// --- API ------------------------------------------------------------------ + +/** + * @brief Walk desc.inputs once and populate `out` with the storage buffers + * and images declared by the shader. + * + * Bindings are assigned sequentially starting from `firstBinding`. Persistent + * SSBOs consume TWO consecutive bindings. + * + * No GPU resources are allocated here — call ensureStorageResources() later. + */ +SCORE_PLUGIN_GFX_EXPORT +void collectGraphicsStorageResources( + const isf::descriptor& desc, int firstBinding, GraphicsStorageResources& out); + +/** + * @brief Create missing buffers and textures. + * + * Safe to call every frame — idempotent. Resizes buffers when they don't match + * the current layout. For persistent SSBOs, allocates both the current and + * prev buffers. For indirect-draw buffers, adds the IndirectBuffer usage flag. + */ +SCORE_PLUGIN_GFX_EXPORT +void ensureStorageResources( + QRhi& rhi, QRhiResourceUpdateBatch& res, const RenderList& renderer, + const isf::descriptor& desc, GraphicsStorageResources& store, + QSize renderSize); + +/** + * @brief Produce the QRhiShaderResourceBinding list for the graphics pipeline. + * + * Call this from inside addOutputPass() after buildPipeline() has been set up. + * The result is concatenated with the standard bindings (sampler, material, + * processUBO, etc.) via the `additionalBindings` span in createDefaultBindings. + */ +SCORE_PLUGIN_GFX_EXPORT +QVarLengthArray buildExtraBindings( + const GraphicsStorageResources& store); + +/** + * @brief Wire read-only SSBOs to upstream geometry buffers. + * + * When a storage_input is declared as `read_only` AND the upstream node + * supplies a buffer on the port, the binding is rewired to point at the + * upstream's QRhiBuffer (no allocation needed). Called each frame to track + * port changes. + */ +SCORE_PLUGIN_GFX_EXPORT +void bindUpstreamBuffers( + RenderList& renderer, const std::vector& inputPorts, + GraphicsStorageResources& store, + QRhiShaderResourceBindings* srb = nullptr); + +/** + * @brief Swap current/prev for all persistent SSBOs and storage images, + * then update the SRB. + * + * Call at end of frame, after all passes have run. Symmetric to the existing + * texture ping-pong in RenderedISFNode (the `swap(passes, altPasses)` at + * RenderedISFNode.cpp:782). + */ +SCORE_PLUGIN_GFX_EXPORT +void swapPersistentSSBOs( + GraphicsStorageResources& store, QRhiShaderResourceBindings& srb); + +/** + * @brief Swap current/prev pointers in `store` without touching any SRB. + * + * Used by multi-pass / multi-SRB renderers that need to apply the same + * post-swap state to many descriptor sets: call this once per frame, then + * call reapplyStorageBindings on every affected SRB. Calling + * swapPersistentSSBOs per-SRB would double-swap and cancel out. + */ +SCORE_PLUGIN_GFX_EXPORT +void swapPersistentSSBOsState(GraphicsStorageResources& store); + +/** + * @brief Re-apply the current persistent-storage state to a single SRB. + * + * Pairs with swapPersistentSSBOsState: after swapping `store` once, call + * this on every SRB that references the persistent bindings so the + * descriptor set matches the new pointers. Uses replaceBuffer's + * updateResources() fast path — no srb->create() rebuild — to avoid + * thrashing the SRB pool slot every frame on a static scene (the + * cf4b7d6f5 / diag-211 fix removed the trailing create() that earlier + * versions of this function called). + */ +SCORE_PLUGIN_GFX_EXPORT +void reapplyStorageBindings( + const GraphicsStorageResources& store, QRhiShaderResourceBindings& srb); + +/** + * @brief Wire read-only csf_image_input storage images to an upstream + * geometry's published auxiliary_textures. + * + * Symmetric to `bindUpstreamBuffers` for SSBOs: when a csf_image_input is + * declared `read_only` AND the upstream geometry publishes a storage image + * with the same name (e.g. an upstream CSF wrote to it via image_input + * with `write_only`/`read_write`), this swaps the storage image's texture + * pointer to the upstream's published handle and frees the auto-allocated + * placeholder we created in `ensureStorageResources`. + * + * Without this, every read_only csf_image_input INPUTS in a downstream + * RawRaster / ISF stage reads from its OWN zero-initialised texture instead + * of the upstream's actual contents — silently broken. + * + * Called per-frame; idempotent. When `srb` is non-null, patches the binding + * in-place via `replaceTexture`. The lookup is purely by name match against + * `geometry.auxiliary_textures` (the same name-match pattern used by + * RawRaster's `m_auxTextureSamplers` rebind path). + */ +SCORE_PLUGIN_GFX_EXPORT +void bindUpstreamImagesFromGeometry( + GraphicsStorageResources& store, const ossia::geometry& geometry, + QRhiShaderResourceBindings* srb = nullptr); + +/** + * @brief Wire INPUTS storage_input / uniform_input bindings to upstream + * geometry's published auxiliary_buffers list (name-match). + * + * SSBO/UBO sibling of `bindUpstreamImagesFromGeometry`. ScenePreprocessor + * publishes scene_lights / world_transforms / per_draws / scene_materials / + * scene_counts / scene_light_indices / camera UBO / env UBO as named aux + * buffers travelling along the geometry edge — flattened-scene shaders + * (classic_pbr et al.) declare matching INPUTS storage_input/uniform_input + * blocks and the runtime resolves them by name. + * + * Without this, INPUTS storage_input/uniform_input that go through the + * m_storage path stay at the 16-byte placeholder allocated by + * `ensureStorageResources` for owned SSBOs — vertices read a zero + * PerDraw, multiply by a zero world_transforms matrix, and collapse to + * origin. (Indirect-draw storage_inputs are skipped — they have no shader + * binding.) + * + * For CPU-backed aux buffers a fresh QRhiBuffer is allocated and the data + * uploaded immediately into `res`; the entry's `owned` flag is updated so + * `release()` cleans up correctly. For GPU-backed aux buffers we just + * adopt the upstream handle (`owned=false`). + * + * Patches the SRB in-place when a target SRB is provided; idempotent so + * multi-SRB callers can invoke once per SRB without re-running the lookup. + */ +SCORE_PLUGIN_GFX_EXPORT +void bindUpstreamBuffersFromGeometry( + QRhi& rhi, QRhiResourceUpdateBatch& res, + GraphicsStorageResources& store, const ossia::geometry& geometry, + QRhiShaderResourceBindings* srb = nullptr); + +/** + * @brief Decode an isf::storage_input::visibility string to Qt RHI stage flags. + * + * "fragment" → FragmentStage + * "vertex" → VertexStage + * "vertex+fragment" / "both" / "graphics" → Vertex | Fragment + * "compute" → ComputeStage + * "none" → 0 + */ +SCORE_PLUGIN_GFX_EXPORT +QRhiShaderResourceBinding::StageFlags visibilityToStages(std::string_view visibility) noexcept; + +/** + * @brief Byte size of a single GLSL primitive type as used for SSBO element + * strides in this codebase. + * + * Coverage: scalars (`float`, `int`, `uint`, `bool`), vectors (`vec[234]`, + * `ivec[234]`, `uvec[234]`), and matrices (`mat2`, `mat3`, `mat4`). Sampler / + * image / opaque types are not covered (return the fallback). Returns 16 as a + * fallback for unknown / unsupported types. + * + * Conventions: + * - Returns 12 for `vec3`/`ivec3`/`uvec3` (the bare component size). Consumers + * that need std140 / std430 array stride must align to 16 themselves; for + * that case prefer `std430ArrayStride` below, which encapsulates the rule + * and keeps the two domains (bare type size vs. stride-in-SSBO) from + * drifting at call sites. ISF auxiliary layouts continue to align at the + * field level via `std430LayoutSize`. + * - `mat2` is reported as 16 (two `vec2` columns, no per-column padding). + * - `mat3` is reported as 48 (three `vec4`-padded columns); this matches both + * std140 and std430 column-major layout for `mat3` in storage blocks. + * - `mat4` is reported as 64. + * + * This is the single source of truth for GLSL type → element size in + * `score-plugin-gfx`; do not introduce private copies (see diagnostic 095). + * + * Note: For the vertex-attribute format → byte-size mapping + * (`ossia::geometry::attribute` enum), see the unrelated helper inside + * `RenderedCSFNode.cpp`; it operates on a different domain (binary attribute + * formats, not GLSL type strings). + */ +SCORE_PLUGIN_GFX_EXPORT +int64_t glslTypeSizeBytes(std::string_view type) noexcept; + +/** + * @brief Same as glslTypeSizeBytes, but resolves user-defined types from + * the descriptor's TYPES section. Falls back to the built-in size table + * for primitives, then to descriptor.types lookup for struct names. The + * std430 size of a struct is the sum of its fields' sizes, each rounded + * up to a 16-byte boundary (matching the array-of-struct alignment rule + * already used by `std430LayoutSize` for AUXILIARY blocks). Returns 16 + * (the lenient default) for unresolved names. + */ +SCORE_PLUGIN_GFX_EXPORT +int64_t glslTypeSizeBytes(std::string_view type, const isf::descriptor& d) noexcept; + +/** + * @brief Compute the std430 element size of a layout (vector of + * `{name,type}` field entries), each field rounded up to 16 bytes per + * the array-of-struct alignment rule. Used by AUXILIARY blocks and by + * the user-defined struct lookup in glslTypeSizeBytes. + */ +SCORE_PLUGIN_GFX_EXPORT +int64_t std430LayoutSize( + const std::vector& layout) noexcept; + +/** + * @brief std430 array stride for a GLSL primitive type when laid out as + * `T array[]` inside a shader storage block. + * + * Differs from `glslTypeSizeBytes` only for vec3-shaped vectors: per the + * std430 layout rules, an array of `vec3` (or `ivec3` / `uvec3`) keeps + * the element's vec4-aligned base alignment, so the per-element stride + * is 16 bytes — the trailing 4 bytes are padding the GPU does not write + * but consumer reads must skip. For scalars, vec2, vec4 and matrices, + * the stride equals the bare type size, so this returns + * `glslTypeSizeBytes(type)` unchanged. + * + * Use this — never `glslTypeSizeBytes` — when sizing a CSF SoA output + * SSBO buffer or setting a downstream vertex binding stride that mirrors + * the SSBO's std430 layout. Mixing the two is the source of the silent + * vec3 corruption diagnosed in the 3DGS pipeline. + */ +SCORE_PLUGIN_GFX_EXPORT +int64_t std430ArrayStride(std::string_view type) noexcept; + +/** + * @brief Same as `std430ArrayStride`, but resolves user-defined struct + * names against the descriptor's TYPES section. Falls back to + * `glslTypeSizeBytes(type, d)` for non-vec3 primitives and structs. + */ +SCORE_PLUGIN_GFX_EXPORT +int64_t std430ArrayStride(std::string_view type, const isf::descriptor& d) noexcept; + +} diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/MergeGeometriesNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/MergeGeometriesNode.cpp new file mode 100644 index 0000000000..b561aa05b1 --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/MergeGeometriesNode.cpp @@ -0,0 +1,141 @@ +#include +#include +#include + +#include + +namespace score::gfx +{ + +struct RenderedMergeGeometriesNode final : NodeRenderer +{ + const MergeGeometriesNode& m_node; + ossia::geometry_spec m_outputSpec; + std::array m_cachedInputs; + + RenderedMergeGeometriesNode(const MergeGeometriesNode& n) + : NodeRenderer{n} + , m_node{n} + { + } + + void init(RenderList&, QRhiResourceUpdateBatch&) override { m_initialized = true; } + void release(RenderList&) override + { + m_outputSpec = {}; + for(auto& c : m_cachedInputs) + c = {}; + m_initialized = false; + } + + // Since m_portGeometries is now keyed by (port, source), look up the first + // entry matching the requested port. MergeGeometriesNode wires one input + // per port, so multi-source convergence on a single port isn't expected + // here; take the first match. + const ossia::geometry_spec* findFirstByPort(int32_t port) const + { + for(const auto& [k, v] : m_portGeometries) + if(k.first == port) + return &v; + return nullptr; + } + + bool anyInputChanged() const + { + for(int i = 0; i < MergeGeometriesNode::kMaxInputs; ++i) + { + const auto* found = findFirstByPort((int32_t)i); + const ossia::geometry_spec& cur + = found ? *found : ossia::geometry_spec{}; + if(!(cur == m_cachedInputs[i])) + return true; + } + return false; + } + + void rebuild() + { + auto list = std::make_shared(); + auto filters = std::make_shared(); + int64_t maxDirty = 0; + int64_t maxFilterDirty = 0; + for(int i = 0; i < MergeGeometriesNode::kMaxInputs; ++i) + { + const auto* found = findFirstByPort((int32_t)i); + if(!found || !found->meshes) + { + m_cachedInputs[i] = {}; + continue; + } + const auto& in = *found; + list->meshes.insert( + list->meshes.end(), + in.meshes->meshes.begin(), + in.meshes->meshes.end()); + maxDirty = std::max(maxDirty, in.meshes->dirty_index); + if(in.filters) + { + filters->filters.insert( + filters->filters.end(), + in.filters->filters.begin(), + in.filters->filters.end()); + maxFilterDirty = std::max(maxFilterDirty, in.filters->dirty_index); + } + m_cachedInputs[i] = in; + } + list->dirty_index = maxDirty + 1; + filters->dirty_index = maxFilterDirty + 1; + + m_outputSpec.meshes = std::move(list); + m_outputSpec.filters = std::move(filters); + } + + void update(RenderList&, QRhiResourceUpdateBatch&, Edge*) override + { + if(!m_outputSpec.meshes || this->geometryChanged || anyInputChanged()) + { + rebuild(); + this->geometryChanged = false; + } + } + + void runInitialPasses( + RenderList& renderer, QRhiCommandBuffer&, QRhiResourceUpdateBatch*&, + Edge& edge) override + { + if(!m_outputSpec.meshes) + return; + auto* sink = edge.sink; + if(!sink || !sink->node) + return; + auto rn_it = sink->node->renderedNodes.find(&renderer); + if(rn_it == sink->node->renderedNodes.end()) + return; + auto it = std::find(sink->node->input.begin(), sink->node->input.end(), sink); + if(it == sink->node->input.end()) + return; + int port_idx = (int)(it - sink->node->input.begin()); + rn_it->second->process(port_idx, m_outputSpec, edge.source); + } + + void runRenderPass(RenderList&, QRhiCommandBuffer&, Edge&) override { } + + // Data-only renderer — no per-edge GPU pass state to release. + void removeOutputPass(RenderList&, Edge&) override { } +}; + +MergeGeometriesNode::MergeGeometriesNode() +{ + for(int i = 0; i < kMaxInputs; ++i) + input.push_back(new Port{this, {}, Types::Geometry, {}}); + output.push_back(new Port{this, {}, Types::Geometry, {}}); +} + +MergeGeometriesNode::~MergeGeometriesNode() = default; + +NodeRenderer* MergeGeometriesNode::createRenderer(RenderList&) const noexcept +{ + return new RenderedMergeGeometriesNode{*this}; +} + +} diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/MergeGeometriesNode.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/MergeGeometriesNode.hpp new file mode 100644 index 0000000000..a219e8039d --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/MergeGeometriesNode.hpp @@ -0,0 +1,36 @@ +#pragma once +#include + +namespace score::gfx +{ + +/** + * @brief Concatenates up to N upstream geometry_specs into one. + * + * Intended use: combine independently-flattened scene partitions (static + * environment + animated characters + CSF-produced particles) into a + * single geometry_spec that a single downstream renderer can draw in one + * pass. All underlying GPU buffers are shared via `shared_ptr`; only the + * top-level mesh_list is rebuilt. + * + * For v1, up to 8 input geometry ports are exposed. Unconnected ports + * contribute nothing. + * + * Inputs: + * - Port 0..7: Geometry (Types::Geometry) + * + * Outputs: + * - Port 0: Geometry (Types::Geometry) + */ +class SCORE_PLUGIN_GFX_EXPORT MergeGeometriesNode : public ProcessNode +{ +public: + static constexpr int kMaxInputs = 8; + + MergeGeometriesNode(); + ~MergeGeometriesNode() override; + + score::gfx::NodeRenderer* createRenderer(RenderList& r) const noexcept override; +}; + +} diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/Mesh.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/Mesh.cpp index 1cb2a3c8b3..63e4b8331f 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/Mesh.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/Mesh.cpp @@ -40,6 +40,8 @@ void BasicMesh::preparePipeline(QRhiGraphicsPipeline& pip) const noexcept { pip.setDepthTest(true); pip.setDepthWrite(true); + // Reverse-Z project rule. + pip.setDepthOp(QRhiGraphicsPipeline::Greater); } pip.setTopology(this->topology); @@ -61,6 +63,32 @@ void BasicMesh::draw(const MeshBuffers& bufs, QRhiCommandBuffer& cb) const noexc SCORE_ASSERT(buf->usage().testFlag(QRhiBuffer::VertexBuffer)); setupBindings(bufs, cb); + if(bufs.useIndirectDraw && bufs.indirectDrawBuffer) + { +#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0) + if(bufs.gpuIndirectSupported) + { + if(bufs.indirectDrawIndexed) + cb.drawIndexedIndirect( + bufs.indirectDrawBuffer, bufs.indirectDrawOffset, + bufs.indirectDrawCount, bufs.indirectDrawStride); + else + cb.drawIndirect( + bufs.indirectDrawBuffer, bufs.indirectDrawOffset, + bufs.indirectDrawCount, bufs.indirectDrawStride); + return; + } +#endif + if(!bufs.cpuDrawCommands.empty()) + { + for(const auto& cmd : bufs.cpuDrawCommands) + cb.draw(cmd.index_or_vertex_count, cmd.instance_count, + cmd.first_index_or_vertex, cmd.first_instance); + return; + } + return; // skip — no commands available yet + } + cb.draw(vertexCount); } @@ -211,4 +239,15 @@ void TexturedQuad::setupBindings( cb.setVertexInput(0, 2, bindings); } + +void drawMeshWithOptionalIndirect( + const Mesh& mesh, const MeshBuffers& bufs, QRhiCommandBuffer& cb) noexcept +{ + // All Mesh subclasses (BasicMesh, CustomMesh) now handle useIndirectDraw + // internally — they check bufs.useIndirectDraw after binding vertex inputs + // and dispatch to cb.drawIndirect/drawIndexedIndirect when set. So this + // helper just forwards to mesh.draw(). It exists as an explicit opt-in + // marker for renderers that intend to support indirect multi-draw. + mesh.draw(bufs, cb); +} } diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/Mesh.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/Mesh.hpp index 64f235cc36..6cadfa43fe 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/Mesh.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/Mesh.hpp @@ -27,16 +27,44 @@ struct BufferView Usage usage{Usage::Direct}; #endif + // False for borrowed buffers — e.g., gpu_buffer handles the caller + // owns (scene preprocessor's MDI arena buffers, registry arena + // buffers). RenderList::release only `delete`s when owned=true; owners + // outside the RenderList's m_vertexBuffers destroy their own handles. + bool owned{true}; + inline operator bool() const noexcept { return handle; } }; struct MeshBuffers { ossia::small_vector buffers; -#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0) + // --- Multi-draw indirect state --- + // Always tracked regardless of Qt version. At draw time the path is: + // gpuIndirectSupported && indirectDrawBuffer → drawIndirect (GPU, Qt 6.12+) + // !gpuIndirectSupported && cpuDrawCommands → per-command drawIndexed loop + // neither → single drawIndexed QRhiBuffer* indirectDrawBuffer{}; bool useIndirectDraw{false}; bool indirectDrawIndexed{false}; + bool gpuIndirectSupported{false}; // set from RenderState::caps at init + quint32 indirectDrawOffset{0}; + quint32 indirectDrawCount{1}; + quint32 indirectDrawStride{0}; + + // CPU-side draw commands. Populated either: + // a) directly by the producer (ScenePreprocessor has CPU data), or + // b) via GPU readback when the indirect buffer is GPU-generated (CSF) + // and gpuIndirectSupported is false. + ossia::small_vector cpuDrawCommands; + + // Readback result storage for the synchronous GPU→CPU fallback in + // RenderedRawRasterPipelineNode::runInitialPasses. + // Qt < 6.6 has a separate type for buffer readbacks. +#if QT_VERSION >= QT_VERSION_CHECK(6, 6, 0) + QRhiReadbackResult readbackResult; +#else + QRhiBufferReadbackResult readbackResult; #endif }; /** @@ -222,4 +250,19 @@ struct SCORE_PLUGIN_GFX_EXPORT TexturedQuad final : TexturedMesh setupBindings(const MeshBuffers& bufs, QRhiCommandBuffer& cb) const noexcept override; }; +/** + * @brief Draw a mesh, using indirect multi-draw when available in MeshBuffers. + * + * When `bufs.useIndirectDraw` is true (and Qt >= 6.12), dispatches to + * `cb.drawIndexedIndirect` / `cb.drawIndirect` with the offset/count/stride + * stored in `bufs`. Otherwise falls back to the mesh's standard `draw()`. + * + * This is the main draw entry point for ISF / RawRaster / Scene renderers so + * that they can transparently support multi-draw indirect just by wiring an + * indirect buffer into MeshBuffers. + */ +SCORE_PLUGIN_GFX_EXPORT +void drawMeshWithOptionalIndirect( + const Mesh& mesh, const MeshBuffers& bufs, QRhiCommandBuffer& cb) noexcept; + } diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/MultiWindowNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/MultiWindowNode.cpp index ec739044ca..1c2b60eebf 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/MultiWindowNode.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/MultiWindowNode.cpp @@ -435,7 +435,13 @@ class MultiWindowRenderer final : public score::gfx::OutputNodeRenderer if(windowIndex < 0 || windowIndex >= (int)m_perWindow.size()) return; - auto* res = renderer.state.rhi->nextResourceUpdateBatch(); + // Don't pre-allocate a batch here: renderSubRegion has early-return + // paths before any consumer (beginPass), and pre-allocating leaks + // one pool slot per discarded window on every render. The three + // UBO blocks inside renderSubRegion lazily allocate via + // `if(!res) res = ...->nextResourceUpdateBatch()`, and beginPass + // accepts a null batch — so passing nullptr here is safe. + QRhiResourceUpdateBatch* res = nullptr; renderSubRegion(windowIndex, renderer, cb, res); } @@ -503,7 +509,7 @@ class MultiWindowRenderer final : public score::gfx::OutputNodeRenderer res->updateDynamicBuffer(pw.warpUBO, 0, sizeof(warpData), warpData); } - cb.beginPass(rt, Qt::black, {1.0f, 0}, res); + cb.beginPass(rt, Qt::black, {0.0f, 0}, res); res = nullptr; { auto sz = wo.swapChain->currentPixelSize(); @@ -557,15 +563,20 @@ void MultiWindowNode::setRenderSize(QSize sz) m_renderState->renderSize = sz; - // The offscreen target must be recreated BEFORE the render-list - // rebuild so that the new upstream pipelines are built against the - // new RPD and sample from the new offscreen texture. The old - // pipelines briefly reference the deleted RPD, but their destruction - // (inside the upcoming m_onResize) doesn't dereference it. - recreateOffscreenTarget(); - + // Tear down the existing render list (and all pipelines built against + // the old offscreen RPD) BEFORE recreating the offscreen target, so + // no pipeline ever references a freed RPD pointer. m_onResize triggers + // recreateOutputRenderList which calls release() on every pass — the + // pipeline destructors enqueue their underlying GPU resources via + // QRhi's deferred-release queue and never dereference the RPD again. + // Only after the render list has released its references is it safe + // to swap the offscreen RT/RPD; the subsequent createOutputRenderList + // (kicked off by the same m_onResize callback) will then build new + // pipelines against the freshly recreated m_offscreenTarget. if(m_onResize) m_onResize(); + + recreateOffscreenTarget(); } void MultiWindowNode::setSourceRect(int windowIndex, QRectF rect) @@ -612,12 +623,24 @@ void MultiWindowNode::setTransform(int windowIndex, int rotation, bool mirrorX, void MultiWindowNode::setSwapchainFlag(Gfx::SwapchainFlag flag) { + if(m_swapchainFlag == flag) + return; m_swapchainFlag = flag; + // Live flag change requires per-window swapchain recreation. Mirrors + // ScreenNode::setSwapchainFlag — destroyOutput tears down all windows; + // the Graph reconciler rebuilds them on next cycle picking up the new + // flag at the swapchain create site. + destroyOutput(); } void MultiWindowNode::setSwapchainFormat(Gfx::SwapchainFormat format) { + if(m_swapchainFormat == format) + return; m_swapchainFormat = format; + // Same rebuild rationale — without it the field updated but the live + // swapchains kept their prior format (HDR↔SDR toggle silently inert). + destroyOutput(); } void MultiWindowNode::startRendering() @@ -657,7 +680,7 @@ void MultiWindowNode::renderBlack() auto cb = wo.swapChain->currentFrameCommandBuffer(); auto batch = rhi->nextResourceUpdateBatch(); - cb->beginPass(wo.swapChain->currentFrameRenderTarget(), Qt::black, {1.0f, 0}, batch); + cb->beginPass(wo.swapChain->currentFrameRenderTarget(), Qt::black, {0.0f, 0}, batch); cb->endPass(); rhi->endFrame(wo.swapChain); @@ -868,10 +891,6 @@ void MultiWindowNode::releaseWindowSwapChain(int index) if(!wo.swapChain && !wo.depthStencil && !wo.renderPassDescriptor) return; - // Wait for any in-flight frames touching this swap chain before tearing - // its resources down. - m_renderState->rhi->finish(); - // Release the renderer's per-window GPU state first, so its pipeline // (built against wo.renderPassDescriptor) is gone before we delete the // RPD itself. @@ -887,16 +906,30 @@ void MultiWindowNode::releaseWindowSwapChain(int index) } } - delete wo.swapChain; - wo.swapChain = nullptr; + // Order matters: clear hasSwapChain BEFORE releasing wo.swapChain so a + // queued expose / resize event landing in the middle of teardown can + // never observe (hasSwapChain == true && swapChain dangling). See + // diagnostic 047. + wo.hasSwapChain = false; - delete wo.depthStencil; + // Use deleteLater() instead of a synchronous rhi->finish() + delete. + // rhi->finish() issues vkQueueWaitIdle which drains ALL in-flight work on + // the graphics queue — stalling every other window. deleteLater() defers + // native-object destruction to the next endFrame() when the relevant frame + // slot is known safe, with no cross-window stall. See diagnostic 048. + auto* sc = wo.swapChain; + wo.swapChain = nullptr; + auto* ds = wo.depthStencil; wo.depthStencil = nullptr; - - delete wo.renderPassDescriptor; + auto* rpd = wo.renderPassDescriptor; wo.renderPassDescriptor = nullptr; - wo.hasSwapChain = false; + if(sc) + sc->deleteLater(); + if(ds) + ds->deleteLater(); + if(rpd) + rpd->deleteLater(); } void MultiWindowNode::createOutput(score::gfx::OutputConfiguration conf) @@ -1034,6 +1067,11 @@ void MultiWindowNode::destroyOutput() // there are still frames in flight when resources are destroyed. m_renderState->rhi->finish(); + // Persist-across-rebuild contract: registry survives RL teardown, + // so its QRhi resources have to be torn down here (BEFORE + // RenderState::destroy below) while the device is still alive. + releaseRegistry(); + // Detach Window callbacks so a close that races with destruction can't // reach back into us while we're tearing things down. for(auto& wo : m_windowOutputs) @@ -1051,6 +1089,11 @@ void MultiWindowNode::destroyOutput() // outlive the rhi's teardown of per-window state. for(auto& wo : m_windowOutputs) { + // Order matters: clear hasSwapChain BEFORE deleting wo.swapChain so a + // queued event cannot observe (hasSwapChain == true && swapChain + // dangling). See diagnostic 047. + wo.hasSwapChain = false; + delete wo.swapChain; wo.swapChain = nullptr; @@ -1059,8 +1102,6 @@ void MultiWindowNode::destroyOutput() delete wo.renderPassDescriptor; wo.renderPassDescriptor = nullptr; - - wo.hasSwapChain = false; } // 2. Release the offscreen target (texture + depth + RT + RPD). This @@ -1090,6 +1131,43 @@ void MultiWindowNode::updateGraphicsAPI(GraphicsApi api) return; if(m_renderState->api != api) + { + destroyOutput(); + return; + } + + // Same API, but the requested sample count may have changed via the + // settings panel. Mirror ScreenNode's clamp-and-compare path: rebuild + // if the resolved sample count no longer matches what the rhi was + // created with. + auto* rhi = m_renderState->rhi; + if(!rhi) + return; + + int samples_request + = score::AppContext().settings().resolveSamples(api); + const auto supported = rhi->supportedSampleCounts(); + if(supported.isEmpty()) + { + samples_request = 1; + } + else + { + int chosen = supported.first(); + for(int v : supported) + { + if(v == samples_request) + { + chosen = v; + break; + } + if(v < samples_request) + chosen = v; + } + samples_request = chosen; + } + + if(m_renderState->samples != samples_request) destroyOutput(); } diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/Node.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/Node.hpp index fa847b03e3..35f26d9018 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/Node.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/Node.hpp @@ -160,7 +160,19 @@ class SCORE_PLUGIN_GFX_EXPORT Node : public QObject int32_t nodeId = score::gfx::invalid_node_index; bool requiresDepth{}; - bool addedToGraph{}; + + /** + * @brief Whether a given port has a user-specified render target size. + * + * Returns true only if the user explicitly set a size via render_target_spec. + * Used by backward size propagation to decide whether to inherit + * the downstream render target size. + */ + bool hasExplicitRenderTargetSize(int32_t port) const noexcept + { + auto it = renderTargetSpecs.find(port); + return it != renderTargetSpecs.end() && it->second.size.has_value(); + } QSize resolveRenderTargetSize(int32_t port, RenderList& renderer) const noexcept; RenderTargetSpecs diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/NodeRenderer.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/NodeRenderer.cpp index 3ebeadc500..272853cf23 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/NodeRenderer.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/NodeRenderer.cpp @@ -4,6 +4,10 @@ #include +#include +#include +#include + #include namespace score::gfx @@ -14,6 +18,70 @@ TextureRenderTarget NodeRenderer::renderTargetForInput(const Port& p) return {}; } +void NodeRenderer::initState(RenderList&, QRhiResourceUpdateBatch&) { } + +void NodeRenderer::releaseState(RenderList&) { } + +void NodeRenderer::addOutputPass(RenderList&, Edge&, QRhiResourceUpdateBatch&) { } + +void NodeRenderer::updateInputSamplerFilter( + const Port& input, const RenderTargetSpecs& spec) +{ + // Default: no-op. Renderers that cache samplers should override. +} + +void NodeRenderer::addInputEdge(RenderList&, Edge&, QRhiResourceUpdateBatch&) { } + +// When an upstream edge is removed (e.g. the user inserts a Transform3D in +// the middle of an existing glTF → ScenePreprocessor wire), drop the cached +// per-(port, source) entry this edge was populating. Without this, the +// last scene/geometry pushed by the now-disconnected producer lingers in +// m_portScenes / m_portGeometries forever and rebuildMergedScene keeps +// merging it in — the user saw the "scene doesn't disappear until +// stop/start" symptom. Also wipe the merge cache so the next merge runs +// fresh. +void NodeRenderer::removeInputEdge(RenderList&, Edge& edge) +{ + if(!edge.sink || !edge.sink->node) + return; + + // Figure out which input port of the sink this edge was landing on. + const auto& inputs = edge.sink->node->input; + int32_t port = -1; + for(std::size_t i = 0; i < inputs.size(); ++i) + { + if(inputs[i] == edge.sink) + { + port = (int32_t)i; + break; + } + } + if(port < 0) + return; + + const void* source_key = edge.source; + const PortSourceKey key{port, source_key}; + + m_portGeometries.erase(key); + m_portScenes.erase(key); + m_wrapCache.erase(key); + + // Also drop the legacy nullptr-keyed slot in case this edge was the sole + // contributor via the 2-arg process() path. + const PortSourceKey legacyKey{port, nullptr}; + m_portGeometries.erase(legacyKey); + m_portScenes.erase(legacyKey); + m_wrapCache.erase(legacyKey); + + // Force rebuildMergedScene to recompute from scratch next time. + m_mergeCacheInputs.clear(); + m_mergeCacheOutput = {}; +} + +bool NodeRenderer::hasOutputPassForEdge(Edge& edge) const { return false; } + +void NodeRenderer::seedInitialOutputs(RenderList&) { } + void defaultPassesInit( PassMap& passes, const std::vector& edges, RenderList& renderer, const Mesh& mesh, const QShader& v, const QShader& f, QRhiBuffer* processUBO, @@ -29,7 +97,7 @@ void defaultPassesInit( auto pip = score::gfx::buildPipeline( renderer, mesh, v, f, rt, processUBO, matUBO, samplers, additionalBindings); if(pip.pipeline) - passes.emplace_back(edge, pip); + passes.emplace_back(edge, Pass{rt, pip, nullptr}); } } } @@ -43,8 +111,8 @@ void defaultRenderPass( if(it != passes.end()) { const auto sz = renderer.renderSize(&edge); - cb.setGraphicsPipeline(it->second.pipeline); - cb.setShaderResources(it->second.srb); + cb.setGraphicsPipeline(it->second.p.pipeline); + cb.setShaderResources(it->second.p.srb); cb.setViewport(QRhiViewport(0, 0, sz.width(), sz.height())); mesh.draw(bufs, cb); @@ -61,11 +129,12 @@ void quadRenderPass( { auto it = ossia::find_if(passes, [ptr = &edge](const auto& p) { return p.first == ptr; }); - SCORE_ASSERT(it != passes.end()); + if(it == passes.end()) + return; { const auto sz = renderer.renderSize(&edge); - cb.setGraphicsPipeline(it->second.pipeline); - cb.setShaderResources(it->second.srb); + cb.setGraphicsPipeline(it->second.p.pipeline); + cb.setShaderResources(it->second.p.srb); cb.setViewport(QRhiViewport(0, 0, sz.width(), sz.height())); const auto& mesh = renderer.defaultQuad(); @@ -115,6 +184,14 @@ void GenericNodeRenderer::defaultPassesInit( } void GenericNodeRenderer::init(RenderList& renderer, QRhiResourceUpdateBatch& res) +{ + initState(renderer, res); + + for(Edge* edge : this->node.output[0]->edges) + addOutputPass(renderer, *edge, res); +} + +void GenericNodeRenderer::initState(RenderList& renderer, QRhiResourceUpdateBatch& res) { m_mesh = &renderer.defaultTriangle(); auto& mesh = *m_mesh; @@ -122,8 +199,174 @@ void GenericNodeRenderer::init(RenderList& renderer, QRhiResourceUpdateBatch& re processUBOInit(renderer); m_material.init(renderer, node.input, m_samplers); + // Upload initial material data + if(m_material.buffer && m_material.size > 0) + { + auto& n = static_cast(this->node); + if(n.m_materialData) + res.updateDynamicBuffer(m_material.buffer, 0, m_material.size, n.m_materialData.get()); + } + + m_initialized = true; +} + +void GenericNodeRenderer::addOutputPass( + RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) +{ + if(!m_mesh) + return; + if(this->node.output[0]->type != score::gfx::Types::Image) + return; + + auto rt = renderer.renderTargetForOutput(edge); + if(!rt.renderTarget) + return; + + // Every edge gets its own SRB. Layout is identical across edges + // (same node, same sampler count, same UBOs) so the SRBs are all + // layout-compatible — a requirement for sharing a pipeline built + // against any one of them. + auto* srb = score::gfx::createDefaultBindings( + renderer, rt, m_processUBO, m_material.buffer, m_samplers); + if(!srb) + return; + + // Reuse an existing pipeline when this renderer already has one built + // against the same QRhiRenderPassDescriptor. Same rp-desc pointer ⇒ + // same owning RT ⇒ every pipeline compatibility rule on Vulkan, + // D3D12 and Metal is satisfied. A different rt with an isCompatible + // rp-desc at a *different* pointer is deliberately not matched here: + // it would require tracking which pipelines still have a live rp-desc, + // and the common sharing case (two edges to the same sink port) already + // falls out of the pointer check. + QRhiGraphicsPipeline* pipeline = nullptr; + for(auto& [desc, pipe] : m_pipelineCache) + { + if(desc == rt.renderPass && pipe) + { + pipeline = pipe; + break; + } + } + + if(!pipeline) + { + auto pip = score::gfx::buildPipeline( + renderer, *m_mesh, m_vertexS, m_fragmentS, rt, srb); + if(!pip.pipeline) + { + srb->deleteLater(); + return; + } + pipeline = pip.pipeline; + m_pipelineCache.emplace_back(rt.renderPass, pipeline); + } - defaultPassesInit(renderer, mesh); + // Pass::p.pipeline is non-owning here — the cache owns it. removeOutputPass + // and releaseState null-out pipeline before Pipeline::release() so the + // Pass release path only destroys the SRB. + m_p.emplace_back(&edge, Pass{rt, Pipeline{pipeline, srb}, nullptr}); +} + +void GenericNodeRenderer::removeOutputPass(RenderList& renderer, Edge& edge) +{ + auto it + = ossia::find_if(m_p, [&](const auto& p) { return p.first == &edge; }); + if(it == m_p.end()) + return; + + QRhiGraphicsPipeline* pipeline = it->second.p.pipeline; + + // Determine ownership: the pipeline is cache-owned iff an m_pipelineCache + // entry still points to it. Passes produced by addOutputPass share + // cache-owned pipelines; Passes produced by defaultPassesInit (ImageNode + // and the like, which pre-date this cache) own their own pipeline. + auto cacheIt = ossia::find_if( + m_pipelineCache, [&](const auto& e) { return e.second == pipeline; }); + const bool cacheOwned = (cacheIt != m_pipelineCache.end()); + + if(cacheOwned) + { + // Detach so Pipeline::release() won't deleteLater() the cached + // pipeline. The SRB is still per-edge and gets dropped normally. + it->second.p.pipeline = nullptr; + } + it->second.release(); + m_p.erase(it); + + if(!cacheOwned || !pipeline) + return; + + // If no other Pass still references this cached pipeline, evict it. + // Otherwise long-lived renderers would accumulate one cache entry per + // historical rp-desc pointer until releaseState. + for(const auto& entry : m_p) + { + if(entry.second.p.pipeline == pipeline) + return; // still in use — leave the cache entry alone + } + pipeline->deleteLater(); + m_pipelineCache.erase(cacheIt); +} + +bool GenericNodeRenderer::hasOutputPassForEdge(Edge& edge) const +{ + return ossia::find_if(m_p, [&](const auto& p) { return p.first == &edge; }) + != m_p.end(); +} + +void GenericNodeRenderer::releaseState(RenderList& renderer) +{ + if(!m_initialized) + return; + + // Release any remaining passes. Pipelines stored in m_pipelineCache + // are owned by the renderer itself and must NOT be deleteLater'd via + // Pipeline::release(); any Pass whose p.pipeline is cache-owned gets + // its pipeline zeroed out first so the Pass only drops its SRB. + // Passes whose pipeline is NOT in the cache (produced by + // defaultPassesInit — see ImageNode::PreloadedRenderer) retain the + // original owning release semantics. + for(auto& pass : m_p) + { + auto* pipeline = pass.second.p.pipeline; + if(pipeline) + { + const bool cacheOwned = ossia::any_of( + m_pipelineCache, [&](const auto& e) { return e.second == pipeline; }); + if(cacheOwned) + pass.second.p.pipeline = nullptr; + } + pass.second.release(); + } + m_p.clear(); + + // Now destroy the cached pipelines. + for(auto& [desc, pipeline] : m_pipelineCache) + { + if(pipeline) + pipeline->deleteLater(); + } + m_pipelineCache.clear(); + + for(auto sampler : m_samplers) + { + delete sampler.sampler; + // texture is deleted elsewhere + } + m_samplers.clear(); + + delete m_processUBO; + m_processUBO = nullptr; + + delete m_material.buffer; + m_material.buffer = nullptr; + + // FIXME Check that they get released? + // We should have a refcount for this + m_meshbufs = {}; + + m_initialized = false; } void GenericNodeRenderer::defaultUBOUpdate( @@ -139,6 +382,7 @@ void GenericNodeRenderer::defaultUBOUpdate( char* data = n.m_materialData.get(); res.updateDynamicBuffer(m_material.buffer, 0, m_material.size, data); } + materialChanged = false; } } @@ -176,10 +420,32 @@ void GenericNodeRenderer::update( void GenericNodeRenderer::defaultRelease(RenderList&) { + // Mirror the ownership handling in releaseState — cache-owned pipelines + // are destroyed by the cache, not by Pipeline::release(). + for(auto& pass : m_p) + { + auto* pipeline = pass.second.p.pipeline; + if(pipeline) + { + const bool cacheOwned = ossia::any_of( + m_pipelineCache, [&](const auto& e) { return e.second == pipeline; }); + if(cacheOwned) + pass.second.p.pipeline = nullptr; + } + pass.second.release(); + } + m_p.clear(); + + for(auto& [desc, pipeline] : m_pipelineCache) + { + if(pipeline) + pipeline->deleteLater(); + } + m_pipelineCache.clear(); + for(auto sampler : m_samplers) { delete sampler.sampler; - // texture isdeleted elsewxheree } m_samplers.clear(); @@ -189,13 +455,9 @@ void GenericNodeRenderer::defaultRelease(RenderList&) delete m_material.buffer; m_material.buffer = nullptr; - for(auto& pass : m_p) - pass.second.release(); - m_p.clear(); - - // FIXME Check that they get released? - // We should have a refcount for this m_meshbufs = {}; + + m_initialized = false; } void NodeRenderer::runInitialPasses( @@ -206,10 +468,74 @@ void NodeRenderer::runInitialPasses( void NodeRenderer::runRenderPass(RenderList&, QRhiCommandBuffer& commands, Edge& edge) { } +// Rebuild `this->scene` as the merge of every m_portScenes entry, +// memoized on the set of input scene_state pointers. When unchanged, the +// previous merged scene_spec (and its scene_state shared_ptr) is reused +// verbatim — which is what lets downstream consumers like +// ScenePreprocessorNode keep their version/pointer caches hot instead of +// re-decoding textures and re-uploading vertex/index buffers per frame. +void NodeRenderer::rebuildMergedScene() +{ + ossia::small_vector sig; + ossia::small_vector valid; + for(auto& kv : m_portScenes) + { + const auto& s = kv.second; + // Drop the `!s.state->empty()` filter: env-only producers + // (EnvironmentLoader, CubemapLoader, …) have an empty roots vector + // but still contribute environment fields — dropping them here + // would make their skybox / ambient / fog updates invisible. Empty + // roots are handled gracefully by the downstream merge. + if(s.state) + { + sig.push_back({s.state.get(), s.state->version}); + valid.push_back(&s); + } + } + + if(sig == m_mergeCacheInputs && m_mergeCacheOutput.state) + { + this->scene = m_mergeCacheOutput; + return; + } + m_mergeCacheInputs.assign(sig.begin(), sig.end()); + + if(valid.empty()) + { + this->scene = {}; + m_mergeCacheOutput = {}; + return; + } + if(valid.size() == 1) + { + this->scene = *valid[0]; + m_mergeCacheOutput = this->scene; + return; + } + + ossia::small_vector input_copies; + input_copies.reserve(valid.size()); + for(auto* s : valid) + input_copies.push_back(*s); + this->scene + = ossia::merge_scenes(std::span{ + input_copies.data(), input_copies.size()}); + m_mergeCacheOutput = this->scene; +} + void NodeRenderer::process(int32_t port, const ossia::geometry_spec& v) { - // Store per-port for multi-geometry-port nodes (CSF) - m_portGeometries[port] = v; + process(port, v, nullptr); +} + +void NodeRenderer::process( + int32_t port, const ossia::geometry_spec& v, const void* source_key) +{ + const PortSourceKey key{port, source_key}; + + // Store per-(port,source) for multi-geometry-port nodes (CSF) and for + // multi-producer accumulation on the same port. + m_portGeometries[key] = v; // Backward compat: keep the single geometry field updated // (used by GenericNodeRenderer, RenderedRawRasterPipelineNode, etc.) @@ -218,28 +544,146 @@ void NodeRenderer::process(int32_t port, const ossia::geometry_spec& v) this->geometry = v; geometryChanged = true; } - else + else if(this->geometry.meshes) { - if(this->geometry.meshes) + for(auto& mesh : this->geometry.meshes->meshes) { - for(auto& mesh : this->geometry.meshes->meshes) + for(auto& buf : mesh.buffers) { - for(auto& buf : mesh.buffers) + if(buf.dirty) { - if(buf.dirty) - { - geometryChanged = true; - break; - } - } - if(geometryChanged) + geometryChanged = true; break; + } } + if(geometryChanged) + break; + } + } + + // Auto-wrap into scene for scene-aware renderers. The wrap is cached + // per (port,source) keyed on the geometry_spec identity: if the same + // spec is re-pushed (common case — glTF / FBX loaders re-publish every + // frame even when nothing changed) the wrapper's scene_state shared_ptr + // stays stable across frames, which is what the merge memoization + // relies on. + auto& cache_entry = m_wrapCache[key]; + if(cache_entry.first != v || !cache_entry.second.state) + { + cache_entry.first = v; + cache_entry.second = ossia::wrap_geometry_as_scene(v); + } + m_portScenes[key] = cache_entry.second; + sceneChanged = true; + rebuildMergedScene(); +} + +void NodeRenderer::process(int32_t port, const ossia::scene_spec& v) +{ + process(port, v, nullptr); +} + +void NodeRenderer::process( + int32_t port, const ossia::scene_spec& v, const void* source_key) +{ + const PortSourceKey key{port, source_key}; + m_portScenes[key] = v; + sceneChanged = true; + rebuildMergedScene(); + + // For backward compatibility: extract the first geometry from the scene + // so that renderers that only understand geometry_spec still work. + auto geom = ossia::extract_first_geometry(v); + if(geom) + { + m_portGeometries[key] = geom; + if(this->geometry != geom) + { + this->geometry = geom; + geometryChanged = true; } } } -void NodeRenderer::process(int32_t port, const ossia::transform3d& v) { } +void NodeRenderer::process(int32_t port, const ossia::transform3d& v) +{ + // Apply the matrix transform to the last root node in the scene. + // Geometry is always pushed before transform for the same edge. + // We wrap the last root's children under a scene_transform payload. + if(!this->scene.state || this->scene.state->empty()) + return; + + // Convert matrix-based transform3d to TRS scene_transform. + // The matrix is column-major (from QMatrix4x4::data()). + QMatrix4x4 mat(v.matrix, 4, 4); + QVector3D translation = mat.column(3).toVector3D(); + + // Extract rotation (assumes no shear) + QVector3D col0 = mat.column(0).toVector3D(); + QVector3D col1 = mat.column(1).toVector3D(); + QVector3D col2 = mat.column(2).toVector3D(); + QVector3D scale(col0.length(), col1.length(), col2.length()); + + QMatrix3x3 rotMat; + if(scale.x() > 0.f) col0 /= scale.x(); + if(scale.y() > 0.f) col1 /= scale.y(); + if(scale.z() > 0.f) col2 /= scale.z(); + float rot3x3[9] = { + col0.x(), col1.x(), col2.x(), + col0.y(), col1.y(), col2.y(), + col0.z(), col1.z(), col2.z()}; + QQuaternion quat = QQuaternion::fromRotationMatrix(QMatrix3x3(rot3x3)); + + ossia::scene_transform xform; + xform.translation[0] = translation.x(); + xform.translation[1] = translation.y(); + xform.translation[2] = translation.z(); + xform.rotation[0] = quat.x(); + xform.rotation[1] = quat.y(); + xform.rotation[2] = quat.z(); + xform.rotation[3] = quat.scalar(); + xform.scale[0] = scale.x(); + xform.scale[1] = scale.y(); + xform.scale[2] = scale.z(); + + // Rebuild: wrap the last root under a new parent with [transform, old_root] + auto new_roots = std::make_shared>(); + for(auto& root : *this->scene.state->roots) + new_roots->push_back(root); + + if(!new_roots->empty()) + { + auto& last_root = new_roots->back(); + if(last_root) + { + auto new_children = std::make_shared>(); + new_children->push_back(xform); + // Carry over original children + if(last_root->has_children()) + for(auto& child : *last_root->children) + new_children->push_back(child); + + auto new_node = std::make_shared(); + new_node->id = last_root->id; + new_node->children = std::move(new_children); + new_roots->back() = std::move(new_node); + } + } + + auto new_state = std::make_shared(); + new_state->roots = std::move(new_roots); + if(this->scene.state->materials) + new_state->materials = this->scene.state->materials; + if(this->scene.state->animations) + new_state->animations = this->scene.state->animations; + + this->scene.state = std::move(new_state); + // transform3d mutates the merged scene in place; republish it on the + // (port, nullptr) slot since there's no single upstream producer identity + // for the transformed result. + m_portScenes[PortSourceKey{port, nullptr}] = this->scene; + sceneChanged = true; +} void GenericNodeRenderer::defaultRenderPass( RenderList& renderer, const Mesh& mesh, QRhiCommandBuffer& cb, Edge& edge) @@ -261,7 +705,7 @@ void GenericNodeRenderer::runRenderPass( defaultRenderPass(renderer, mesh, cb, edge); } -void GenericNodeRenderer::updateInputTexture(const Port& input, QRhiTexture* tex) +void GenericNodeRenderer::updateInputTexture(const Port& input, QRhiTexture* tex, QRhiTexture* depthTex) { int sampler_idx = 0; for(auto* p : node.input) @@ -269,7 +713,12 @@ void GenericNodeRenderer::updateInputTexture(const Port& input, QRhiTexture* tex if(p == &input) break; if(p->type == Types::Image) + { sampler_idx++; + // Skip the depth sampler that follows ports with SamplableDepth + if((p->flags & Flag::SamplableDepth) == Flag::SamplableDepth) + sampler_idx++; + } } if(sampler_idx < (int)m_samplers.size()) @@ -279,15 +728,30 @@ void GenericNodeRenderer::updateInputTexture(const Port& input, QRhiTexture* tex { sampl.texture = tex; for(auto& [e, pass] : m_p) - if(pass.srb) - score::gfx::replaceTexture(*pass.srb, sampl.sampler, tex); + if(pass.p.srb) + score::gfx::replaceTexture(*pass.p.srb, sampl.sampler, tex); + } + + // Update the depth sampler if the port has SamplableDepth + if(depthTex + && (input.flags & Flag::SamplableDepth) == Flag::SamplableDepth + && sampler_idx + 1 < (int)m_samplers.size()) + { + auto& depthSampl = m_samplers[sampler_idx + 1]; + if(depthSampl.texture != depthTex) + { + depthSampl.texture = depthTex; + for(auto& [e, pass] : m_p) + if(pass.p.srb) + score::gfx::replaceTexture(*pass.p.srb, depthSampl.sampler, depthTex); + } } } } void GenericNodeRenderer::release(RenderList& r) { - defaultRelease(r); + releaseState(r); } score::gfx::NodeRenderer::~NodeRenderer() { } @@ -307,7 +771,7 @@ QRhiTexture* NodeRenderer::textureForOutput(const Port& output) return nullptr; } -void NodeRenderer::updateInputTexture(const Port& input, QRhiTexture* tex) +void NodeRenderer::updateInputTexture(const Port& input, QRhiTexture* tex, QRhiTexture* depthTex) { } diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/NodeRenderer.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/NodeRenderer.hpp index bec85ba180..71e56dfe38 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/NodeRenderer.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/NodeRenderer.hpp @@ -26,9 +26,16 @@ class SCORE_PLUGIN_GFX_EXPORT NodeRenderer //! downstream-provided render target. virtual QRhiTexture* textureForOutput(const Port& output); - //! Updates the sampler texture for a GrabsFromSource input port. - //! Called from the render loop when the upstream texture may have changed. - virtual void updateInputTexture(const Port& input, QRhiTexture* tex); + //! Updates the sampler texture for an input port. + //! Called when the upstream texture may have changed (edge add, RT recreation). + //! If the port has SamplableDepth and depthTex is non-null, the depth + //! sampler (immediately after the color sampler) is also updated. + virtual void updateInputTexture(const Port& input, QRhiTexture* tex, QRhiTexture* depthTex = nullptr); + + //! Updates the sampler filter/address settings for an input port. + //! Called when the render target spec changes (e.g. linear → nearest). + virtual void updateInputSamplerFilter( + const Port& input, const RenderTargetSpecs& spec); //! Called when all the inbound nodes to a texture input have finished rendering. //! Mainly useful to slip in a readback. @@ -47,17 +54,126 @@ class SCORE_PLUGIN_GFX_EXPORT NodeRenderer virtual void release(RenderList&) = 0; + /** + * @name Incremental lifecycle API + * + * These methods enable dynamic graph editing by splitting the init/release + * lifecycle into edge-independent state and per-edge passes. + * + * Renderers that override these are incrementally updateable: adding or + * removing an output edge only creates/destroys one pass, without touching + * the rest of the renderer's GPU resources. + * + * Default implementations are no-ops for backward compatibility. + * @{ + */ + + /// Initialize edge-independent state: material UBO, samplers, mesh, shaders. + /// Called once when the renderer enters a RenderList. + virtual void initState(RenderList& renderer, QRhiResourceUpdateBatch& res); + + /// Release edge-independent state. + /// Called once when the renderer leaves a RenderList. + virtual void releaseState(RenderList& renderer); + + /// Create a pass for a new output edge (pipeline, SRB, processUBO). + virtual void addOutputPass( + RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res); + + /// Remove the pass for a removed output edge. + /// Pure-virtual: every concrete renderer must explicitly handle edge + /// removal. Sinks (OutputNodeRenderer) and data-only renderers that + /// store no per-edge GPU state can override with an empty body. + virtual void removeOutputPass(RenderList& renderer, Edge& edge) = 0; + + /// Notify the renderer that a new input edge was connected. + /// Typically updates sampler textures or geometry bindings. + virtual void + addInputEdge(RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res); + + /// Notify the renderer that an input edge was disconnected. + virtual void removeInputEdge(RenderList& renderer, Edge& edge); + + /// Check if this renderer already has an output pass for the given edge. + virtual bool hasOutputPassForEdge(Edge& edge) const; + + /// Seed downstream consumers once at init-time with this renderer's + /// current outputs. Default no-op. Halp scene/geometry producers (Camera, + /// EnvironmentLoader, Light, …) override this to run their + /// operator()() once during reconciliation and immediately push the + /// result into each downstream sink's per-port scene cache — without + /// this, a live-inserted producer's output wouldn't reach the sink's + /// `m_portScenes` until the next render frame's upstream scan fires the + /// producer's runInitialPasses, which can arrive too late relative to + /// the sink's own frame-start cache snapshot and produce the + /// "Camera inserted live has no effect until stop/restart" symptom. + virtual void seedInitialOutputs(RenderList& renderer); + + /** @} */ + void checkForChanges() { - materialChanged = node.hasMaterialChanged(materialChangedIndex); - renderTargetSpecsChanged - = node.hasRenderTargetChanged(renderTargetSpecsChangedIndex); + // Use |= to preserve flags set externally (e.g. by reconciliation + // or maybeRebuild). The flag is cleared by the renderer's update() + // after processing, preventing infinite re-uploads. + materialChanged |= node.hasMaterialChanged(materialChangedIndex); + renderTargetSpecsChanged |= node.hasRenderTargetChanged(renderTargetSpecsChangedIndex); + } + + /// Sync only the render target spec index without touching materialChanged. + /// Used after initState() so the first render's checkForChanges() sees a + /// material mismatch (triggering initial upload) but not a spurious rt_changed. + void syncRenderTargetIndex() + { + node.hasRenderTargetChanged(renderTargetSpecsChangedIndex); + renderTargetSpecsChanged = false; } - // FIXME this will change when we have a proper scene node void process(int32_t port, const ossia::geometry_spec& v); + void process(int32_t port, const ossia::scene_spec& v); virtual void process(int32_t port, const ossia::transform3d& v); + /// Source-aware overloads. `source_key` is an opaque identity of the + /// upstream output port that produced this data (typically `edge.source`). + /// Multiple producers converging on the same sink port each get their own + /// storage slot, so their scenes accumulate additively instead of + /// overwriting each other. Callers that don't care pass nullptr — all such + /// callers then share a single per-port slot (legacy behavior). + void process(int32_t port, const ossia::geometry_spec& v, const void* source_key); + void process(int32_t port, const ossia::scene_spec& v, const void* source_key); + + /// Find the first geometry stored on the given sink port (across all + /// sources). Legacy single-producer-per-port consumers use this to + /// preserve pre-multi-producer behavior without caring who produced it. + const ossia::geometry_spec* findGeometryByPort(int32_t port) const + { + for(const auto& [k, v] : m_portGeometries) + if(k.first == port) + return &v; + return nullptr; + } + + /// Enumerate every scene_spec published on `port` (across all sources). + /// Populated for ALL geometry/scene edges — raw geometry_spec deliveries + /// are auto-wrapped into scene_specs and cached (see m_wrapCache), so the + /// scene_state_ptr returned here is stable across frames when the input + /// doesn't actually change. Callers doing scene-broadcast iterate this + /// and check scene_state::dirty_index + state pointer for invalidation. + template + void forEachSceneOnPort(int32_t port, F&& fn) const + { + for(const auto& [k, v] : m_portScenes) + if(k.first == port && v.state) + fn(v); + } + +private: + /// Recompute `this->scene` from the current per-port inputs, reusing the + /// memoized merge when the set of input scene_state pointers is unchanged. + void rebuildMergedScene(); + +public: + const Node& node; /** @@ -72,21 +188,100 @@ class SCORE_PLUGIN_GFX_EXPORT NodeRenderer */ ossia::geometry_spec geometry; - /// Per-port geometry storage for nodes with multiple geometry inputs. - /// Key is the input port index. - ossia::small_flat_map m_portGeometries; + /// Per-(port, source) geometry storage. Multi-keyed so multiple upstream + /// producers converging on the same sink port each get their own slot + /// (additive merge rather than overwrite). The source_key is the upstream + /// output Port pointer (opaque void*); nullptr is a valid single-slot key + /// for legacy callers. + using PortSourceKey = std::pair; + ossia::small_flat_map m_portGeometries; + + /** + * @brief The scene to use (when receiving scene_spec data). + * + * When a geometry_spec is received, it is auto-wrapped into a scene_spec + * so that downstream scene-aware renderers can always work with scenes. + * Backward-compat renderers continue reading the `geometry` field. + */ + ossia::scene_spec scene; + + /// Per-(port, source) scene storage. See m_portGeometries comment. + ossia::small_flat_map m_portScenes; + + /// Merge cache: the set of (scene_state pointer, version) pairs we + /// last merged, and the resulting merged scene_spec. Keyed on BOTH + /// pointer and version because halp-style producers (Camera, + /// Environment, Light, …) keep a stable `m_state` + /// shared_ptr and mutate its contents in place — keying on pointer + /// alone would return a stale cached merge even after a slider moved. + /// The version monotonically bumps on each producer update, so + /// (ptr, version) changes whenever content changes. + using MergeCacheKey = std::pair; + ossia::small_vector m_mergeCacheInputs; + ossia::scene_spec m_mergeCacheOutput; + + /// Cache the wrap_geometry_as_scene result per geometry_spec so a + /// geometry source re-pushing the same geometry_spec every frame + /// produces a stable wrapped-scene shared_ptr (otherwise every frame + /// produces a new wrapper → merge cache miss → full re-upload). + ossia::small_flat_map< + PortSourceKey, std::pair, 4> + m_wrapCache; int32_t nodeId{-1}; bool materialChanged{false}; bool geometryChanged{false}; + bool sceneChanged{false}; bool renderTargetSpecsChanged{false}; + /// Guard for idempotent release — prevents double-release of GPU resources. + /// Set to true at end of init(), cleared at start of release(). + bool m_initialized{false}; + private: int64_t materialChangedIndex{-1}; int64_t renderTargetSpecsChangedIndex{-1}; }; -using PassMap = ossia::small_vector, 2>; +struct Pass +{ + // User-declared ctors (including the implicit ones made explicit + // here) suppress -Wmissing-field-initializers on the many call sites + // that brace-init this struct with three arguments — the fallback + // plan is always default-constructed into an empty list, which is + // exactly what non-fallback pipelines need. Removing aggregate-init + // eligibility is intentional; the tradeoff is one line per call + // site (if they want to set fallback_bindings, they assign after). + Pass() = default; + Pass(TextureRenderTarget rt, Pipeline pi, QRhiBuffer* ubo) + : renderTarget{std::move(rt)}, p{pi}, processUBO{ubo} {} + + TextureRenderTarget renderTarget; + Pipeline p; + QRhiBuffer* processUBO{}; + // Bindings for "REQUIRED: false" VERTEX_INPUTS that had no matching + // upstream attribute when this pass's pipeline was built. Empty for + // pipelines where the shader is strict-matched (the common case). + // Consumed by the draw path: each slot's buffer is bound at its + // `binding_index` in the vertex-input array before the draw call. + // The buffers themselves are owned by VertexFallbackPool — the plan + // holds non-owning pointers. + FallbackBindingPlan fallback_bindings; + + void release() + { + p.release(); + if(processUBO) + { + processUBO->deleteLater(); + processUBO = nullptr; + } + fallback_bindings.clear(); + // renderTarget NOT released here — owned by RenderList + } +}; + +using PassMap = ossia::small_vector, 2>; SCORE_PLUGIN_GFX_EXPORT void defaultPassesInit( PassMap& passes, const std::vector& edges, RenderList& renderer, @@ -128,6 +323,26 @@ class SCORE_PLUGIN_GFX_EXPORT GenericNodeRenderer : public score::gfx::NodeRende // Pipeline PassMap m_p; + // Per-renderer pipeline cache, keyed by QRhiRenderPassDescriptor pointer. + // Edges targeting the same QRhiRenderTarget (and therefore the same + // rp-desc pointer) share one QRhiGraphicsPipeline — the pipeline object + // is bound to an rp-desc layout, not to the RT object itself, and QRhi + // guarantees the same pipeline can be used with any RT whose rp-desc + // isCompatible with the pipeline's. Looking up by pointer (rather than + // by serialized format) is the conservative choice: a pointer match + // means "same rp-desc, same owning RT alive" and cannot collide with a + // stale entry because a freshly allocated rp-desc always sits at a + // different address than one that was just destroyed via deleteLater. + // + // Ownership: Pass::p.pipeline is NON-OWNING — the actual QRhiGraphicsPipeline + // lives in this cache. Pass::p.srb is still per-edge and owned by the Pass. + // GenericNodeRenderer::removeOutputPass and releaseState take care of + // nulling Pass::p.pipeline before calling Pipeline::release() so it + // does not try to deleteLater() a pointer we still own here. + ossia::small_vector< + std::pair, 2> + m_pipelineCache; + MeshBuffers m_meshbufs; QRhiBuffer* m_processUBO{}; @@ -147,6 +362,13 @@ class SCORE_PLUGIN_GFX_EXPORT GenericNodeRenderer : public score::gfx::NodeRende void init(RenderList& renderer, QRhiResourceUpdateBatch& res) override; + void initState(RenderList& renderer, QRhiResourceUpdateBatch& res) override; + void releaseState(RenderList& renderer) override; + void addOutputPass( + RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) override; + void removeOutputPass(RenderList& renderer, Edge& edge) override; + bool hasOutputPassForEdge(Edge& edge) const override; + void defaultUBOUpdate(RenderList& renderer, QRhiResourceUpdateBatch& res); void defaultMeshUpdate(RenderList& renderer, QRhiResourceUpdateBatch& res); void update(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) override; @@ -163,7 +385,7 @@ class SCORE_PLUGIN_GFX_EXPORT GenericNodeRenderer : public score::gfx::NodeRende void runRenderPass(RenderList&, QRhiCommandBuffer& commands, Edge& edge) override; - void updateInputTexture(const Port& input, QRhiTexture* tex) override; + void updateInputTexture(const Port& input, QRhiTexture* tex, QRhiTexture* depthTex = nullptr) override; }; } diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/OutputNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/OutputNode.cpp index 7275300449..bfddcb57c8 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/OutputNode.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/OutputNode.cpp @@ -1,3 +1,4 @@ +#include #include namespace score::gfx { @@ -15,4 +16,31 @@ void OutputNodeRenderer::finishFrame( { } +GpuResourceRegistry& OutputNode::acquireRegistry() +{ + // Persist-across-rebuild contract: lazy-allocated once per OutputNode. + // RenderList::init then either calls GpuResourceRegistry::init() (first + // RL on this OutputNode / first RL after a releaseRegistry()) or reuses + // the populated state as-is (every subsequent rebuild — what we want + // for the resize fast path). + if(!m_registry) + m_registry = std::make_unique(); + return *m_registry; +} + +void OutputNode::releaseRegistry() +{ + // Concrete subclasses MUST call this from destroyOutput() BEFORE the + // QRhi is torn down. destroyOwned() `delete`s the QRhiBuffer / + // QRhiTexture / QRhiSampler wrappers directly (no deleteLater path — + // the registry has outlived the RenderList that used to plumb + // releaseBuffer for it), so the QRhi must still be alive to honour the + // QRhiResource destructors. + if(m_registry) + { + m_registry->destroyOwned(); + m_registry.reset(); + } +} + } diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/OutputNode.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/OutputNode.hpp index 5618ae07d7..0059662cc1 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/OutputNode.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/OutputNode.hpp @@ -5,8 +5,12 @@ #include #include + +#include + namespace score::gfx { +class GpuResourceRegistry; struct OutputConfiguration { GraphicsApi graphicsApi{}; @@ -21,6 +25,12 @@ class SCORE_PLUGIN_GFX_EXPORT OutputNodeRenderer : public score::gfx::NodeRender virtual ~OutputNodeRenderer(); virtual void finishFrame(RenderList&, QRhiCommandBuffer& commands, QRhiResourceUpdateBatch*& res); + + // Sinks have no output edges, so there is nothing to release per-edge. + // Concrete sinks may still override (e.g. to drop per-input bookkeeping + // routed through addOutputPass), but the default is a true no-op rather + // than the dangerous silent base-class no-op. + void removeOutputPass(RenderList&, Edge&) override { } }; class Window; @@ -69,7 +79,55 @@ class SCORE_PLUGIN_GFX_EXPORT OutputNode : public score::gfx::Node virtual Configuration configuration() const noexcept = 0; + /** + * @brief Persistent GPU resource registry for this output. + * + * Persist-across-rebuild contract: this used to live on the + * RenderList (created in RenderList::init, destroyed in + * RenderList::release), so every viewport-resize-driven RL rebuild + * threw away ~100 MiB of texture-array data, the mesh slabs, and + * the producer arena slot indices — all of which describe scene + * content, not framebuffer state. Hoisting ownership to the + * OutputNode lets these survive across `Graph::recreateOutputRenderList`. + * + * Lifetime: lazy-allocated on first acquireRegistry() call (typically + * from RenderList::init), tied to the OutputNode's QRhi. Concrete + * outputs MUST call releaseRegistry() inside their destroyOutput() + * BEFORE tearing down the QRhi (via RenderState::destroy or + * setSwapchainFormat-style replacement) — otherwise the registry's + * QRhi resources would be freed against a destroyed device. + * + * Returns a non-null reference. Always allocates if the slot is empty. + */ + GpuResourceRegistry& acquireRegistry(); + + /** + * @brief Non-owning accessor. Returns null if no registry has been + * acquired yet (e.g. queried before the first RenderList::init). + */ + GpuResourceRegistry* registry() const noexcept { return m_registry.get(); } + + /** + * @brief Tear down the registry's QRhi resources directly. Idempotent. + * + * MUST be called by concrete subclasses' destroyOutput() before they + * tear down the QRhi. Calls GpuResourceRegistry::destroyOwned() which + * `delete`s the buffer / texture / sampler wrappers (the QRhi is + * still alive at that point — the caller's responsibility), then + * resets the unique_ptr so a subsequent acquireRegistry() rebuilds + * fresh against the new QRhi. + * + * Safe to call when no registry exists (no-op). + */ + void releaseRegistry(); + protected: explicit OutputNode(); + + // Persistent across RenderList rebuilds. See acquireRegistry() docs. + // unique_ptr is opaque-typed in this header (forward-declared above); + // its destructor needs the full type, hence the out-of-line ~OutputNode + // implementation in OutputNode.cpp. + std::unique_ptr m_registry; }; } diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/PipelineStateHelpers.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/PipelineStateHelpers.cpp new file mode 100644 index 0000000000..ac58cefc93 --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/PipelineStateHelpers.cpp @@ -0,0 +1,360 @@ +#include "PipelineStateHelpers.hpp" + +#include +#include + +namespace +{ +// Case-insensitive comparison: "lessOrEqual" == "less_or_equal" == "LEQUAL". +// Strips underscores/hyphens so all forms compare equal. +static bool ieq(std::string_view a, const char* b) +{ + std::size_t bi = 0; + for(std::size_t i = 0; i < a.size(); ++i) + { + char ca = (char)std::tolower((unsigned char)a[i]); + if(ca == '_' || ca == '-' || ca == ' ') + continue; + if(b[bi] == '\0') + return false; + char cb = (char)std::tolower((unsigned char)b[bi]); + if(ca != cb) + return false; + ++bi; + } + return b[bi] == '\0'; +} +} + +namespace score::gfx +{ + +QRhiGraphicsPipeline::CompareOp toCompareOp(std::string_view s) noexcept +{ + if(ieq(s, "never")) return QRhiGraphicsPipeline::Never; + if(ieq(s, "less") || ieq(s, "l")) return QRhiGraphicsPipeline::Less; + if(ieq(s, "equal") || ieq(s, "eq")) return QRhiGraphicsPipeline::Equal; + if(ieq(s, "lessorequal") || ieq(s, "lessequal") || ieq(s, "lequal")) + return QRhiGraphicsPipeline::LessOrEqual; + if(ieq(s, "greater") || ieq(s, "g") || ieq(s, "gt")) + return QRhiGraphicsPipeline::Greater; + if(ieq(s, "notequal") || ieq(s, "neq") || ieq(s, "ne")) + return QRhiGraphicsPipeline::NotEqual; + if(ieq(s, "greaterorequal") || ieq(s, "greaterequal") || ieq(s, "gequal")) + return QRhiGraphicsPipeline::GreaterOrEqual; + if(ieq(s, "always")) return QRhiGraphicsPipeline::Always; + return QRhiGraphicsPipeline::Less; +} + +QRhiGraphicsPipeline::CullMode toCullMode(std::string_view s) noexcept +{ + if(ieq(s, "none")) return QRhiGraphicsPipeline::None; + if(ieq(s, "front")) return QRhiGraphicsPipeline::Front; + if(ieq(s, "back")) return QRhiGraphicsPipeline::Back; + return QRhiGraphicsPipeline::None; +} + +QRhiGraphicsPipeline::FrontFace toFrontFace(std::string_view s) noexcept +{ + if(ieq(s, "ccw") || ieq(s, "counterclockwise")) + return QRhiGraphicsPipeline::CCW; + if(ieq(s, "cw") || ieq(s, "clockwise")) + return QRhiGraphicsPipeline::CW; + return QRhiGraphicsPipeline::CCW; +} + +QRhiGraphicsPipeline::PolygonMode toPolygonMode(std::string_view s) noexcept +{ + if(ieq(s, "fill") || ieq(s, "solid")) return QRhiGraphicsPipeline::Fill; + if(ieq(s, "line") || ieq(s, "wireframe")) return QRhiGraphicsPipeline::Line; + return QRhiGraphicsPipeline::Fill; +} + +QRhiGraphicsPipeline::Topology toTopology(std::string_view s) noexcept +{ + if(ieq(s, "triangles") || ieq(s, "triangle_list")) + return QRhiGraphicsPipeline::Triangles; + if(ieq(s, "triangle_strip")) return QRhiGraphicsPipeline::TriangleStrip; + if(ieq(s, "triangle_fan")) return QRhiGraphicsPipeline::TriangleFan; + if(ieq(s, "lines") || ieq(s, "line_list")) + return QRhiGraphicsPipeline::Lines; + if(ieq(s, "line_strip")) return QRhiGraphicsPipeline::LineStrip; + if(ieq(s, "points")) return QRhiGraphicsPipeline::Points; + return QRhiGraphicsPipeline::Triangles; +} + +QRhiGraphicsPipeline::BlendFactor toBlendFactor(std::string_view s) noexcept +{ + using B = QRhiGraphicsPipeline; + if(ieq(s, "zero")) return B::Zero; + if(ieq(s, "one")) return B::One; + if(ieq(s, "srccolor")) return B::SrcColor; + if(ieq(s, "oneminussrccolor") || ieq(s, "1-srccolor")) return B::OneMinusSrcColor; + if(ieq(s, "dstcolor")) return B::DstColor; + if(ieq(s, "oneminusdstcolor") || ieq(s, "1-dstcolor")) return B::OneMinusDstColor; + if(ieq(s, "srcalpha")) return B::SrcAlpha; + if(ieq(s, "oneminussrcalpha") || ieq(s, "1-srcalpha")) return B::OneMinusSrcAlpha; + if(ieq(s, "dstalpha")) return B::DstAlpha; + if(ieq(s, "oneminusdstalpha") || ieq(s, "1-dstalpha")) return B::OneMinusDstAlpha; + if(ieq(s, "constantcolor")) return B::ConstantColor; + if(ieq(s, "oneminusconstantcolor") || ieq(s, "1-constantcolor")) return B::OneMinusConstantColor; + if(ieq(s, "constantalpha")) return B::ConstantAlpha; + if(ieq(s, "oneminusconstantalpha") || ieq(s, "1-constantalpha")) return B::OneMinusConstantAlpha; + if(ieq(s, "srcalphasaturate")) return B::SrcAlphaSaturate; + if(ieq(s, "src1color")) return B::Src1Color; + if(ieq(s, "oneminussrc1color")) return B::OneMinusSrc1Color; + if(ieq(s, "src1alpha")) return B::Src1Alpha; + if(ieq(s, "oneminussrc1alpha")) return B::OneMinusSrc1Alpha; + return B::One; +} + +QRhiGraphicsPipeline::BlendOp toBlendOp(std::string_view s) noexcept +{ + using B = QRhiGraphicsPipeline; + if(ieq(s, "add")) return B::Add; + if(ieq(s, "subtract") || ieq(s, "sub")) return B::Subtract; + if(ieq(s, "reversesubtract") || ieq(s, "revsub")) return B::ReverseSubtract; + if(ieq(s, "min")) return B::Min; + if(ieq(s, "max")) return B::Max; + return B::Add; +} + +QRhiGraphicsPipeline::StencilOp toStencilOp(std::string_view s) noexcept +{ + using S = QRhiGraphicsPipeline; + if(ieq(s, "zero")) return S::StencilZero; + if(ieq(s, "keep")) return S::Keep; + if(ieq(s, "replace")) return S::Replace; + if(ieq(s, "incrementandclamp") || ieq(s, "incclamp") || ieq(s, "increment")) + return S::IncrementAndClamp; + if(ieq(s, "decrementandclamp") || ieq(s, "decclamp") || ieq(s, "decrement")) + return S::DecrementAndClamp; + if(ieq(s, "invert")) return S::Invert; + if(ieq(s, "incrementandwrap") || ieq(s, "incwrap")) + return S::IncrementAndWrap; + if(ieq(s, "decrementandwrap") || ieq(s, "decwrap")) + return S::DecrementAndWrap; + return S::Keep; +} + +QRhiGraphicsPipeline::ColorMask toColorMask(std::string_view s) noexcept +{ + using M = QRhiGraphicsPipeline; + M::ColorMask out = M::ColorMask(0); + for(char c : s) + { + switch(std::tolower((unsigned char)c)) + { + case 'r': out |= M::R; break; + case 'g': out |= M::G; break; + case 'b': out |= M::B; break; + case 'a': out |= M::A; break; + default: break; + } + } + if(out == M::ColorMask(0)) + out = M::R | M::G | M::B | M::A; + return out; +} + +QRhiGraphicsPipeline::TargetBlend toTargetBlend(const isf::blend_attachment& b) noexcept +{ + QRhiGraphicsPipeline::TargetBlend out; + out.enable = b.enable; + out.srcColor = toBlendFactor(b.src_color); + out.dstColor = toBlendFactor(b.dst_color); + out.opColor = toBlendOp(b.op_color); + out.srcAlpha = toBlendFactor(b.src_alpha); + out.dstAlpha = toBlendFactor(b.dst_alpha); + out.opAlpha = toBlendOp(b.op_alpha); + out.colorWrite = toColorMask(b.color_write); + return out; +} + +QRhiGraphicsPipeline::StencilOpState toStencilOpState(const isf::stencil_op_state& s) noexcept +{ + QRhiGraphicsPipeline::StencilOpState out; + out.failOp = toStencilOp(s.fail_op); + out.depthFailOp = toStencilOp(s.depth_fail_op); + out.passOp = toStencilOp(s.pass_op); + out.compareOp = toCompareOp(s.compare_op); + return out; +} + +// --- pipeline_state manipulation ------------------------------------------ + +isf::pipeline_state mergeState(isf::pipeline_state base, const isf::pipeline_state& over) +{ + if(over.depth_test.has_value()) base.depth_test = over.depth_test; + if(over.depth_write.has_value()) base.depth_write = over.depth_write; + if(over.depth_compare.has_value()) base.depth_compare = over.depth_compare; + if(over.depth_bias.has_value()) base.depth_bias = over.depth_bias; + if(over.slope_scaled_depth_bias.has_value())base.slope_scaled_depth_bias = over.slope_scaled_depth_bias; + if(over.cull_mode.has_value()) base.cull_mode = over.cull_mode; + if(over.front_face.has_value()) base.front_face = over.front_face; + if(over.polygon_mode.has_value()) base.polygon_mode = over.polygon_mode; + if(over.line_width.has_value()) base.line_width = over.line_width; + if(over.vertex_count.has_value()) base.vertex_count = over.vertex_count; + if(over.instance_count.has_value()) base.instance_count = over.instance_count; + if(over.topology.has_value()) base.topology = over.topology; + if(over.blend_all.has_value()) base.blend_all = over.blend_all; + if(!over.blend_per_attachment.empty()) base.blend_per_attachment = over.blend_per_attachment; + if(over.stencil_test.has_value()) base.stencil_test = over.stencil_test; + if(over.stencil_read_mask.has_value()) base.stencil_read_mask = over.stencil_read_mask; + if(over.stencil_write_mask.has_value()) base.stencil_write_mask = over.stencil_write_mask; + if(over.stencil_front.has_value()) base.stencil_front = over.stencil_front; + if(over.stencil_back.has_value()) base.stencil_back = over.stencil_back; + return base; +} + +bool stateAffectsPipeline(const isf::pipeline_state& s) noexcept +{ + return s.depth_test.has_value() + || s.depth_write.has_value() + || s.depth_compare.has_value() + || s.depth_bias.has_value() + || s.slope_scaled_depth_bias.has_value() + || s.cull_mode.has_value() + || s.front_face.has_value() + || s.polygon_mode.has_value() + || s.line_width.has_value() + || s.blend_all.has_value() + || !s.blend_per_attachment.empty() + || s.stencil_test.has_value() + || s.stencil_read_mask.has_value() + || s.stencil_write_mask.has_value() + || s.stencil_front.has_value() + || s.stencil_back.has_value() + || s.topology.has_value() +#if QT_VERSION >= QT_VERSION_CHECK(6, 9, 0) + // shading_rate toggles the QRhiGraphicsPipeline::UsesShadingRate opt-in + // flag (set in Utils.cpp buildPipelineWithState), so it does affect the + // pipeline even though the per-draw rate itself is recorded on the + // command buffer at draw time. + || s.shading_rate.has_value() +#endif + ; + // vertex_count / instance_count don't affect the pipeline itself + // (they change draw arguments, not pipeline state), so they're + // intentionally absent from this check. +} + +void applyPipelineState( + QRhiGraphicsPipeline& pip, + const isf::pipeline_state& state, + int colorAttachmentCount, + bool depthAttachmentAvailable, + bool wantsDepthByDefault) noexcept +{ + // ── Depth ────────────────────────────────────────────────────────── + // Only override depth state when explicitly set, OR when we need to force + // it off (no depth attachment, or upstream doesn't require depth). This + // preserves whatever the caller / mesh.preparePipeline already configured. + if(state.depth_test.has_value()) + { + pip.setDepthTest(depthAttachmentAvailable && *state.depth_test); + } + else if(!depthAttachmentAvailable || !wantsDepthByDefault) + { + pip.setDepthTest(false); + } + + if(state.depth_write.has_value()) + { + pip.setDepthWrite(depthAttachmentAvailable && *state.depth_write); + } + else if(!depthAttachmentAvailable || !wantsDepthByDefault) + { + pip.setDepthWrite(false); + } + + // Reverse-Z project rule: when depth is enabled and the shader didn't + // pick a compare op explicitly, default to Greater (near → 1.0, far → + // 0.0 in the float depth buffer). QRhi's built-in default is Less, which + // rejects every fragment under reverse-Z conventions. + if(state.depth_compare.has_value()) + pip.setDepthOp(toCompareOp(*state.depth_compare)); + else + pip.setDepthOp(QRhiGraphicsPipeline::Greater); + if(state.depth_bias.has_value()) + pip.setDepthBias((int)*state.depth_bias); + if(state.slope_scaled_depth_bias.has_value()) + pip.setSlopeScaledDepthBias(*state.slope_scaled_depth_bias); + + // ── Cull / front-face / polygon mode ──────────────────────────────── + // Only override when explicitly set; else preserve the caller's setup. + if(state.cull_mode.has_value()) + pip.setCullMode(toCullMode(*state.cull_mode)); + + if(state.front_face.has_value()) + pip.setFrontFace(toFrontFace(*state.front_face)); + + if(state.polygon_mode.has_value()) + pip.setPolygonMode(toPolygonMode(*state.polygon_mode)); + + if(state.line_width.has_value()) + pip.setLineWidth(*state.line_width); + + // Topology override (paired with vertex_count for procedural draws): + // lets a shader that uses VERTEX_COUNT emit points / lines / strips + // without depending on the incoming geometry's topology. + if(state.topology.has_value()) + pip.setTopology(toTopology(*state.topology)); + + // ── Blending ──────────────────────────────────────────────────────── + // Only override target blends when the shader explicitly declares blend + // state. Otherwise the caller's seeded blend (e.g. legacy premul-alpha) + // is preserved bit-exact. + const int nAttachments = std::max(1, colorAttachmentCount); + if(!state.blend_per_attachment.empty()) + { + QVarLengthArray blends; + blends.reserve(nAttachments); + for(int i = 0; i < nAttachments; ++i) + { + std::size_t idx = std::min(i, state.blend_per_attachment.size() - 1); + blends.push_back(toTargetBlend(state.blend_per_attachment[idx])); + } + pip.setTargetBlends(blends.begin(), blends.end()); + } + else if(state.blend_all.has_value()) + { + QVarLengthArray blends; + blends.reserve(nAttachments); + auto t = toTargetBlend(*state.blend_all); + for(int i = 0; i < nAttachments; ++i) + blends.push_back(t); + pip.setTargetBlends(blends.begin(), blends.end()); + } + + // ── Stencil ───────────────────────────────────────────────────────── + // Toggle is gated on `stencil_test` only; sub-fields apply + // independently so a shader can override e.g. front op without + // re-stating `stencil_test`. + if(state.stencil_test.has_value()) + pip.setStencilTest(*state.stencil_test); + if(state.stencil_front.has_value()) + pip.setStencilFront(toStencilOpState(*state.stencil_front)); + if(state.stencil_back.has_value()) + pip.setStencilBack(toStencilOpState(*state.stencil_back)); + if(state.stencil_read_mask.has_value()) + pip.setStencilReadMask(*state.stencil_read_mask); + if(state.stencil_write_mask.has_value()) + pip.setStencilWriteMask(*state.stencil_write_mask); + + // ── Variable-rate shading (per-draw rate) ─────────────────────────── + // NOTE: there is NO QRhiGraphicsPipeline::setShadingRate() and no + // QRhiGraphicsPipeline::ShadingRate enum in ANY Qt version (the previous + // code here did not compile on the >=6.12 builds it claimed to target). + // The pipeline only carries the opt-in flag + // QRhiGraphicsPipeline::UsesShadingRate, which Utils.cpp's + // buildPipelineWithState() already sets when caps.variableRateShading is + // true. The actual per-draw coarse-pixel rate is the command-buffer state + // QRhiCommandBuffer::setShadingRate(QSize), which must be recorded between + // setGraphicsPipeline() and draw() at the draw site (CustomMesh::draw / + // Mesh::draw). applyPipelineState() has no command buffer in scope, so it + // intentionally does nothing with state.shading_rate here. The requested + // {w,h} maps directly to the coarse-pixel QSize (clamped to {1,2,4}). +} + +} diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/PipelineStateHelpers.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/PipelineStateHelpers.hpp new file mode 100644 index 0000000000..5984d32ca1 --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/PipelineStateHelpers.hpp @@ -0,0 +1,85 @@ +#pragma once +#include + +#include + +#include + +#include + +namespace score::gfx +{ + +// --- String → Qt RHI enum mappers ---------------------------------------- +// +// All mappers are case-insensitive and accept common synonyms +// (e.g. "lequal" / "less_equal" both map to CompareOp::LessOrEqual). +// Unknown strings fall back to a sensible default (documented per function). + +SCORE_PLUGIN_GFX_EXPORT +QRhiGraphicsPipeline::CompareOp toCompareOp(std::string_view s) noexcept; + +SCORE_PLUGIN_GFX_EXPORT +QRhiGraphicsPipeline::CullMode toCullMode(std::string_view s) noexcept; + +SCORE_PLUGIN_GFX_EXPORT +QRhiGraphicsPipeline::FrontFace toFrontFace(std::string_view s) noexcept; + +SCORE_PLUGIN_GFX_EXPORT +QRhiGraphicsPipeline::PolygonMode toPolygonMode(std::string_view s) noexcept; + +SCORE_PLUGIN_GFX_EXPORT +QRhiGraphicsPipeline::BlendFactor toBlendFactor(std::string_view s) noexcept; + +SCORE_PLUGIN_GFX_EXPORT +QRhiGraphicsPipeline::BlendOp toBlendOp(std::string_view s) noexcept; + +SCORE_PLUGIN_GFX_EXPORT +QRhiGraphicsPipeline::StencilOp toStencilOp(std::string_view s) noexcept; + +SCORE_PLUGIN_GFX_EXPORT +QRhiGraphicsPipeline::ColorMask toColorMask(std::string_view s) noexcept; + +// --- Conversion helpers --------------------------------------------------- + +SCORE_PLUGIN_GFX_EXPORT +QRhiGraphicsPipeline::TargetBlend toTargetBlend(const isf::blend_attachment& b) noexcept; + +SCORE_PLUGIN_GFX_EXPORT +QRhiGraphicsPipeline::StencilOpState toStencilOpState(const isf::stencil_op_state& s) noexcept; + +// --- pipeline_state manipulation ------------------------------------------ + +// Merge two pipeline_states: every field that is set in `over` wins, otherwise +// `base`'s field is kept. Used to combine the descriptor's global state with a +// per-pass override_state. +SCORE_PLUGIN_GFX_EXPORT +isf::pipeline_state mergeState(isf::pipeline_state base, const isf::pipeline_state& over); + +// Returns true if the state has any field set (i.e. would affect a pipeline). +SCORE_PLUGIN_GFX_EXPORT +bool stateAffectsPipeline(const isf::pipeline_state&) noexcept; + +// Apply the state to a graphics pipeline. +// - `colorAttachmentCount`: used to size per-attachment blend vectors. +// - `depthAttachmentAvailable`: true when the target RT has a depth attachment; +// depth-test/write are forced off otherwise. +// - `wantsDepthByDefault`: legacy fallback. When state.depth_test is nullopt +// AND wantsDepthByDefault is false, depth test/write are force-disabled +// (equivalent to today's `!renderer.anyNodeRequiresDepth()` path). +// +// Only fields explicitly set in `state` are overridden. Cull, front-face, +// polygon mode, blend, and stencil all preserve whatever the caller (or +// `mesh.preparePipeline()`) configured before this call. The caller is +// responsible for seeding sensible defaults (e.g. premul-alpha blend) before +// invoking this, so that shaders declaring partial pipeline_state don't +// silently lose unrelated defaults. +SCORE_PLUGIN_GFX_EXPORT +void applyPipelineState( + QRhiGraphicsPipeline& pip, + const isf::pipeline_state& state, + int colorAttachmentCount, + bool depthAttachmentAvailable, + bool wantsDepthByDefault) noexcept; + +} diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/PreviewNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/PreviewNode.cpp index 80a89926b2..9151b8e6e5 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/PreviewNode.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/PreviewNode.cpp @@ -36,9 +36,15 @@ std::shared_ptr importRenderState(QSize sz, QRhi* rhi) } state.version = Gfx::Settings::shaderVersionForAPI(state.api); state.rhi = rhi; - state.samples = 1; // FIXME + // The host widget owns this rhi, so we can't follow the global samples + // setting here — but we should at least query what the rhi actually + // supports rather than assuming 1. Final RT sample count is set by the + // host via setSampleCount on its own swap chain. + state.samples = rhi->supportedSampleCounts().value(0, 1); state.renderSize = sz; state.outputSize = sz; + + state.caps.populate(*rhi); return st; } @@ -106,7 +112,24 @@ void PreviewNode::createOutput(score::gfx::OutputConfiguration conf) conf.onReady(); } -void PreviewNode::destroyOutput() { } +void PreviewNode::destroyOutput() +{ + // Persist-across-rebuild contract: registry survives RL teardown, + // so its QRhi resources must be released here (BEFORE we drop our + // RenderState reference) while the host-owned QRhi is still alive. + // The host (Qt widget) is responsible for outliving us, but we tear + // down our own resources first to keep the contract symmetric with + // ScreenNode / BackgroundNode / MultiWindowNode. + releaseRegistry(); + + // Host owns the underlying QRhi and the m_renderTarget / m_texture aliases + // — we don't free those. The shared_ptr is the only piece + // PreviewNode actually owns; reset it so a createOutput → destroyOutput → + // createOutput cycle drops the prior state instead of relying on + // make_shared assignment to release the previous holder. Matches the + // unified sink contract every other OutputNode subclass observes. + m_renderState.reset(); +} std::shared_ptr PreviewNode::renderState() const { @@ -233,7 +256,7 @@ class PreviewRendererInvertY final : public score::gfx::OutputNodeRenderer score::gfx::RenderList& renderer, QRhiCommandBuffer& cb, QRhiResourceUpdateBatch*& res) override { - cb.beginPass(m_renderTarget.renderTarget, Qt::black, {1.0f, 0}, res); + cb.beginPass(m_renderTarget.renderTarget, Qt::black, {0.0f, 0}, res); res = nullptr; { const auto sz = renderer.state.renderSize; diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderList.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderList.cpp index 768b003273..f689aa964d 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderList.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderList.cpp @@ -1,13 +1,20 @@ #include +#include #include #include #include #include +#include #include #include +#include + +#include +#include + //#define RENDERDOC_PROFILING 0 #if defined(RENDERDOC_PROFILING) #include "renderdoc_app.h" @@ -59,6 +66,20 @@ RenderList::RenderList(OutputNode& output, const std::shared_ptr& s RenderList::~RenderList() { + // Defensive: run release() here too. The normal path is Graph::~Graph + // calling release() on every RL before the destructor fires, but a + // late onResize during app shutdown can spawn a brand-new RL (via + // Graph::recreateOutputRenderList) after the ~Graph loop has already + // moved past the release step. That new RL reaches ~RenderList + // without anyone having freed its QRhi resources — by the time the + // shared_ptr drops, the output node's destroyOutput() is next in + // line, calling RenderState::destroy() → vkDestroyDevice on a device + // that still owns the new RL's empty textures, InvertYRenderer's + // render target, etc. (observed as VUID-vkDestroyDevice-device-05137 + // leaks of a handful of VkImages + views + one render pass + + // framebuffer). release() is idempotent, so calling it again when + // the Graph already did is a no-op. + release(); for(auto node : this->nodes) { node->renderedNodes.erase(this); @@ -84,18 +105,140 @@ void RenderList::init() m_outputUBO = rhi.newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, sizeof(OutputUBO)); m_outputUBO->setName("RenderList::m_outputUBO"); - m_outputUBO->create(); - + SCORE_ASSERT(m_outputUBO->create()); + + // Typed placeholders so that a shader declaring sampler3D / samplerCube / + // sampler2DArray / sampler2D can be bound to a view of the matching type + // before any upstream edge has delivered a real texture. Without these, + // Vulkan's VUID-vkCmdDraw-viewType-07752 fires ("VkImageViewType is + // VK_IMAGE_VIEW_TYPE_2D but OpTypeImage has Dim=3D") every frame until + // an upstream texture arrives — and forever if no edge ever connects. + // + // create() must succeed here: a null handle reaches vkUpdateDescriptorSets + // as VK_NULL_HANDLE and the NVIDIA driver segfaults while dereferencing + // it in a later vkCmdPipelineBarrier. Assert the typed fallbacks exist. m_emptyTexture = rhi.newTexture(QRhiTexture::RGBA8, QSize{1, 1}, 1, QRhiTexture::Flag{}); m_emptyTexture->setName("RenderList::m_emptyTexture"); - m_emptyTexture->create(); - - m_lastSize = state.renderSize; - + SCORE_ASSERT(m_emptyTexture->create()); + + m_emptyTexture3D = rhi.newTexture( + QRhiTexture::RGBA8, 1, 1, 1, 1, + QRhiTexture::ThreeDimensional); + m_emptyTexture3D->setName("RenderList::m_emptyTexture3D"); + SCORE_ASSERT(m_emptyTexture3D->create()); + + m_emptyTextureCube = rhi.newTexture( + QRhiTexture::RGBA8, QSize{1, 1}, 1, QRhiTexture::CubeMap); + m_emptyTextureCube->setName("RenderList::m_emptyTextureCube"); + SCORE_ASSERT(m_emptyTextureCube->create()); + + // Must use newTextureArray — the 6-arg newTexture() overload is for 3D + // textures (depth > 1 is a volume slice count, not an array layer count), + // and QRhi rejects any texture with both ThreeDimensional and TextureArray + // flags. Passing TextureArray to the 3D overload happened to be tolerated + // by earlier Qt builds on some backends but hits an assertion under the + // current validation path. + m_emptyTextureArray = rhi.newTextureArray( + QRhiTexture::RGBA8, /*arraySize*/ 1, QSize(1, 1)); + m_emptyTextureArray->setName("RenderList::m_emptyTextureArray"); + SCORE_ASSERT(m_emptyTextureArray->create()); + + // Allocate the initial resource-update batch NOW (before the registry + // init below would otherwise allocate it) so we can queue zero-fills + // for the empty texture placeholders into the same batch. Vulkan does + // NOT zero-initialise new VkImage memory — without these uploads the + // placeholders carry device-memory garbage on every fresh RL. + // + // Why this matters: classic_pbr_openpbr samples cubemaps + // (irradiance_map, prefiltered_map, skybox) and a 2D LUT (brdf_lut). + // When NO upstream producer is wired for those inputs the consumer + // falls back to m_emptyTextureCube / m_emptyTexture. Sampling those + // returns the uninit page contents -> the BSDF math reads garbage + // -> wildly different IBL contribution per resize ("drift" symptom). + // classic_pbr_full doesn't sample any cubemap input, so it never + // hits the empty-cubemap fallback and is immune to this bug. + // + // 1x1 RGBA8 = 4 bytes per face. Cubemap = 6 faces. Total upload per + // RL init: ~16 bytes. Trivial. SCORE_ASSERT(!m_initialBatch); m_initialBatch = state.rhi->nextResourceUpdateBatch(); SCORE_ASSERT(m_initialBatch); + { + static const std::array blackPixel{0, 0, 0, 0}; + QRhiTextureSubresourceUploadDescription src(blackPixel.data(), 4); + src.setSourceSize(QSize{1, 1}); + // 2D + { + QRhiTextureUploadEntry e(0, 0, src); + m_initialBatch->uploadTexture(m_emptyTexture, {e}); + } + // 3D — one slice + { + QRhiTextureUploadEntry e(0, 0, src); + m_initialBatch->uploadTexture(m_emptyTexture3D, {e}); + } + // 2D Array — one layer + { + QRhiTextureUploadEntry e(0, 0, src); + m_initialBatch->uploadTexture(m_emptyTextureArray, {e}); + } + // Cube — six faces + { + QRhiTextureUploadDescription cubeDesc; + QVarLengthArray entries; + for(int face = 0; face < 6; ++face) + entries.append(QRhiTextureUploadEntry(face, 0, src)); + cubeDesc.setEntries(entries.cbegin(), entries.cend()); + m_initialBatch->uploadTexture(m_emptyTextureCube, cubeDesc); + } + } + + // Scene-graph arena store (camera / light / material / per_draw + // buffers). Source nodes grab slots from it at construction and + // write their own packed bytes at their own update(), so + // ScenePreprocessor never CPU-touches this data in the render path. + // + // Persist-across-rebuild contract: the registry is OWNED by the + // OutputNode (OutputNode::m_registry). On the first RL for this + // output it is freshly allocated + init()'d; on every subsequent + // RL rebuild (viewport resize / fallback rebuild path) we adopt + // the populated state as-is. Skipping the re-init() preserves + // ~100 MiB of texture-array layers, ~70 K-vertex mesh slabs, every + // arena buffer (no zero-fill), and all producer slot indices — + // none of that scene-content data depends on framebuffer size. + // See REPORT/OPT-resize-perf.md §3 #2 for the full cost analysis. + m_registry = &output.acquireRegistry(); + if(!m_registry->isInitialized()) + { + m_registry->init(rhi, *m_initialBatch); + // Seed reserved arena slots (e.g. Material slot 0 = default white + // dielectric). Runs after registry init so the seed lands AFTER the + // arena zero-fill (uploadStaticBuffer ordering is preserved within + // the same batch). Idempotent on repeat calls but we gate it here + // anyway so the explicit upload only happens when the arena was + // actually re-initialised this RL cycle. + m_registry->seedDefaults(*m_initialBatch); + } + else + { + // Reuse path. Arena buffers, texture arrays, mesh slabs and slot + // generations all carry over from the previous RL on this output. + // Producers' raw_*_slot members survive (the renderers themselves + // are recreated on RL rebuild — they re-allocate fresh slots — but + // the slot-stride / generation-table / free-list state is intact). + // ScenePreprocessor::init() compares against this same pointer to + // decide whether to wipe its m_loaderMaterialSlots / m_envSlot + // bookkeeping; matching pointer → no wipe → no re-allocation churn. + SCORE_ASSERT(m_registry->boundRhi() == &rhi); + } + + // Fallback vertex-buffer pool for "REQUIRED: false" VERTEX_INPUTS. + // Lazy-allocates on first use (remapPipelineVertexInputs side), so + // zero cost when no shader opts in. + m_vertexFallbackPool = std::make_unique(); + + m_lastSize = state.renderSize; } QRhiResourceUpdateBatch* RenderList::initialBatch() const noexcept @@ -103,31 +246,151 @@ QRhiResourceUpdateBatch* RenderList::initialBatch() const noexcept return m_initialBatch; } +QSize RenderList::resolveDownstreamSize( + const Node* node, + const ossia::small_flat_map& resolvedSpecs) + const noexcept +{ + QSize best{0, 0}; + + for(const auto* out_port : node->output) + { + for(const auto* edge : out_port->edges) + { + const Port* sink = edge->sink; + + // Case 1: sink is the output node — use its render size. + if(sink->node == &output) + { + best = QSize( + std::max(best.width(), state.renderSize.width()), + std::max(best.height(), state.renderSize.height())); + continue; + } + + // Case 2: sink port was already resolved (downstream, processed earlier + // in reverse topological order). + if(auto it = resolvedSpecs.find(sink); it != resolvedSpecs.end()) + { + best = QSize( + std::max(best.width(), it->second.size.width()), + std::max(best.height(), it->second.size.height())); + continue; + } + + // Case 3: sink has a renderer that provides its own RT + // (e.g. Crousti nodes overriding renderTargetForInput). + if(auto rn_it = sink->node->renderedNodes.find(this); + rn_it != sink->node->renderedNodes.end()) + { + auto tex = rn_it->second->renderTargetForInput(*sink); + if(tex.texture) + { + auto sz = tex.texture->pixelSize(); + best = QSize( + std::max(best.width(), sz.width()), + std::max(best.height(), sz.height())); + continue; + } + } + } + } + + return best; // {0,0} if no downstream found — caller keeps renderSize fallback +} + void RenderList::createAllInputRenderTargets() { - int cur_port = 0; - for(auto* node : nodes) + // Phase 1: resolve specs in reverse topological order (sinks first). + // This ensures downstream RTs are resolved before upstream ones, + // so that nodes without explicit sizes inherit the downstream size + // instead of defaulting to the global output resolution. + ossia::small_flat_map resolvedSpecs; + + for(auto it = nodes.rbegin(); it != nodes.rend(); ++it) { - // Output node manages its own RT via its renderer (e.g. ScaledRenderer::m_inputTarget) + auto* node = *it; + // Output node manages its own RT via its renderer if(node == &output) continue; - cur_port = 0; + + int cur_port = 0; for(auto* in : node->input) { if(in->type == Types::Image && (in->flags & Flag::GrabsFromSource) != Flag::GrabsFromSource) { auto spec = node->resolveRenderTargetSpecs(cur_port, *this); - bool wantsDepth = requiresDepth(*in); - bool wantsSamplableDepth = (in->flags & Flag::SamplableDepth) == Flag::SamplableDepth; - auto rt = score::gfx::createRenderTarget( - state, spec.format, spec.size, samples(), - wantsDepth || wantsSamplableDepth, wantsSamplableDepth); - m_inputRenderTargets[in] = std::move(rt); + + // If no explicit size, inherit from downstream. + if(!node->hasExplicitRenderTargetSize(cur_port)) + { + QSize downstream = resolveDownstreamSize(node, resolvedSpecs); + if(!downstream.isEmpty()) + spec.size = downstream; + // else: keep renderer.state.renderSize (ultimate fallback) + } + + resolvedSpecs[in] = spec; } cur_port++; } } + + // Phase 2: create render targets using resolved specs. + for(auto& [port, spec] : resolvedSpecs) + { + bool wantsDepth = requiresDepth(*port); + bool wantsSamplableDepth + = (port->flags & Flag::SamplableDepth) == Flag::SamplableDepth; + auto rt = score::gfx::createRenderTarget( + state, spec.format, spec.size, samples(), + wantsDepth || wantsSamplableDepth, wantsSamplableDepth); + m_inputRenderTargets[port] = std::move(rt); + } +} + +void RenderList::onEdgeRemoved( + Edge& edge, const ossia::hash_set* preserveSinks) +{ + // Notify source renderer + if(auto src_it = edge.source->node->renderedNodes.find(this); + src_it != edge.source->node->renderedNodes.end()) + { + src_it->second->removeOutputPass(*this, edge); + } + + // Notify sink renderer (needs a batch for potential resource updates) + if(auto sink_it = edge.sink->node->renderedNodes.find(this); + sink_it != edge.sink->node->renderedNodes.end()) + { + sink_it->second->removeInputEdge(*this, edge); + } + + // If the sink port has no more edges after this one is removed + // (called before actual edge destruction, so the edge is still in the list), + // release the render target — unless the caller has told us a new feed + // is coming in the same batch. Inserting a filter between A and B would + // otherwise destroy B's input RT here, only for reconcile to immediately + // re-allocate an RT with the same spec at the same slot. The caller is + // responsible for only marking sinks whose RT specs will remain valid; + // a mismatch is picked up later by the rt_changed surgical path in + // render(). + if(edge.sink->edges.size() <= 1) + { + if(!preserveSinks || !preserveSinks->contains(edge.sink)) + removeInputRenderTarget(edge.sink); + } +} + +void RenderList::removeInputRenderTarget(const Port* port) +{ + auto it = m_inputRenderTargets.find(port); + if(it != m_inputRenderTargets.end()) + { + it->second.release(); + m_inputRenderTargets.erase(it); + } } TextureRenderTarget RenderList::renderTargetForInputPort(const Port& p) const noexcept @@ -155,7 +418,15 @@ void RenderList::release() { for(auto& b : bufs.second.buffers) { - delete b.handle; + // Only delete buffers this RenderList owns. Borrowed gpu_buffer + // handles (e.g., the scene preprocessor's MDI arena buffers, the + // GpuResourceRegistry's arena buffers wrapped as gpu_buffer in the + // emitted geometry) are destroyed by their original producer and + // must NOT be raw-deleted here — otherwise the later + // registry->destroy() hits a freed pointer in + // QRhiResource::deleteLater. + if(b.owned && b.handle) + delete b.handle; } } @@ -172,6 +443,36 @@ void RenderList::release() delete m_emptyTexture; m_emptyTexture = nullptr; + // The 3 typed empty-texture placeholders are also allocated in init() + // but were originally missing from the release path — they leaked on + // every maybeRebuild cycle (ASan flagged both createRenderList's and + // maybeRebuild's init() call sites). + delete m_emptyTexture3D; + m_emptyTexture3D = nullptr; + + delete m_emptyTextureCube; + m_emptyTextureCube = nullptr; + + delete m_emptyTextureArray; + m_emptyTextureArray = nullptr; + + // Persist-across-rebuild contract: do NOT destroy the registry here. + // It is owned by the OutputNode and survives RL rebuild — the next + // createRenderList for this output will re-adopt the same instance + // and skip the (expensive) init() path. The actual QRhi-resource + // teardown lives in OutputNode::releaseRegistry() which the concrete + // sink (ScreenNode / BackgroundNode / MultiWindowNode / ...) calls + // from destroyOutput() before the QRhi itself is freed. Just clear + // our non-owning pointer so a stale dereference after release() is + // a clean nullptr crash, not a use-after-free. + m_registry = nullptr; + + if(m_vertexFallbackPool) + { + m_vertexFallbackPool->release(); + m_vertexFallbackPool.reset(); + } + // If nothing happened if(m_initialBatch) { @@ -210,6 +511,36 @@ bool RenderList::maybeRebuild(bool force) const QSize outputSize = state.renderSize; if(outputSize != m_lastSize || !m_built || force) { + // Drain the in-flight CB before the mid-frame release()+init(). + // + // maybeRebuild is called from renderInternal (line ~845), which runs + // INSIDE Window::render's beginFrame/endFrame brackets. release() + // raw-deletes / deleteLater()s SRBs, samplers, UBOs, etc. that may + // be referenced by the resource-update batch already queued into + // cbD->commands earlier in renderInternal (commands.resourceUpdate + // around line 1036), or by ScenePreprocessor's runInitialPasses + // beginExternal/copyBuffer/endExternal block (which synchronously + // flushes cbD->commands into the VkCommandBuffer at + // qrhivulkan.cpp:6640-6643). + // + // Without this drain, recordPrimaryCommandBuffer at endFrame + // dereferences the released VkBuffer/VkSampler handles -> validation + // cascade (vkResetCommandPool with pending CBs, vkBeginCommandBuffer + // on active CB, eventual device loss in vkQueueSubmit / + // vkWaitForFences) -> CRASH in nvoglv64.dll (NVIDIA's unified Vulkan + // driver) at vkCmdBeginRenderPass. + // + // finish() mid-frame is a documented and supported QRhi operation + // (qrhivulkan.cpp:3121-3164): it submits the partial CB, + // vkQueueWaitIdle, then restarts a fresh CB on the same slot. After + // finish(), the CB queue is empty and we can safely tear down + + // re-init RenderList resources. + // + // Triggers only on first frame after a resize / m_built==false / + // forced rebuild. Steady-state cost: zero. + if(state.rhi && state.rhi->isRecordingFrame()) + state.rhi->finish(); + m_built = false; release(); @@ -304,20 +635,40 @@ RenderList::Buffers RenderList::acquireMesh( auto& rhi = *state.rhi; // 1. Try to find mesh from the exact same geometry const auto& [p, f] = spec; + + auto dump_bufs = [](const char* tag, CustomMesh* m, const MeshBuffers& mb) { + if(!::score::gfx::buftrace_enabled()) + return; + QDebug d = qDebug().nospace(); + d << "[BUFTRACE] " << tag << " mesh=" << (void*)m + << " bufs.size=" << (qsizetype)mb.buffers.size() << " ["; + for(std::size_t i = 0; i < mb.buffers.size(); ++i) + { + if(i) + d << ","; + d << (void*)mb.buffers[i].handle; + } + d << "] indirect=" << (void*)mb.indirectDrawBuffer; + }; + if(auto it = m_customMeshCache.find(spec); it != m_customMeshCache.end()) { if(auto m = const_cast(safe_cast(it->second))) { auto meshbufs_it = this->m_vertexBuffers.find(m); SCORE_ASSERT(meshbufs_it != this->m_vertexBuffers.end()); - auto mb = meshbufs_it->second; + auto& mb = meshbufs_it->second; - // FIX the thraed-unsafety: basically, we need to - // have some level of double- or triple-buffering if(auto cur_idx = p->dirty_index; m->dirtyGeometryIndex != cur_idx) { + BUFTRACE() << "acquireMesh PATH 1a: dirty_index " + << m->dirtyGeometryIndex << "->" << cur_idx + << " mesh=" << (void*)m + << " spec=" << (void*)p.get(); + dump_bufs(" before reload", m, mb); m->reload(*p, f); m->update(rhi, mb, res); + dump_bufs(" after reload", m, mb); for(auto& mesh: p->meshes) { for(auto& buf : mesh.buffers) { buf.dirty = false; @@ -338,8 +689,11 @@ RenderList::Buffers RenderList::acquireMesh( if(dirty) { + BUFTRACE() << "acquireMesh PATH 1b: buf.dirty mesh=" << (void*)m; + dump_bufs(" before reload", m, mb); m->reload(*p, f); m->update(rhi, mb, res); + dump_bufs(" after reload", m, mb); for(auto& mesh: p->meshes) { for(auto& buf : mesh.buffers) { buf.dirty = false; @@ -364,8 +718,13 @@ RenderList::Buffers RenderList::acquireMesh( auto& mb = currentbufs; auto cur_idx = p->dirty_index; + BUFTRACE() << "acquireMesh PATH 2 (reuse): mesh=" << (void*)m + << " old_spec=" << (void*)it->first.meshes.get() + << " new_spec=" << (void*)p.get(); + dump_bufs(" before reload", m, mb); m->reload(*p, f); m->update(rhi, mb, res); + dump_bufs(" after reload", m, mb); for(auto& mesh: p->meshes) { for(auto& buf : mesh.buffers) { @@ -375,6 +734,11 @@ RenderList::Buffers RenderList::acquireMesh( m->dirtyGeometryIndex = cur_idx; + // Sync the vertex buffer cache so that path 1 on subsequent frames + // picks up the updated handles (especially gpu_buffer pointers that + // were replaced rather than resized in-place). + meshbufs_it->second = mb; + // Re-key: erase stale entry and insert under the new geometry_spec // to prevent cache growth from feedback loops creating new shared_ptrs each frame. m_customMeshCache.erase(it); @@ -386,31 +750,32 @@ RenderList::Buffers RenderList::acquireMesh( } // 3. Really not found, we allocate a new mesh for good + BUFTRACE() << "acquireMesh PATH 3 (fresh): spec=" << (void*)p.get(); auto m = new CustomMesh{*p, f}; auto meshbufs = initMeshBuffer(*m, res); #if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0) - // Check for well-known _indirect_draw auxiliary buffer convention + // Check for well-known _indirect_draw auxiliary buffer convention. + // + // The engine emits a uniform 5-word indirect command (stride 20): + // { index_or_vertex_count, instance_count, first_index_or_vertex, + // base_vertex, first_instance } -- see ossia::geometry::draw_command / + // ScenePreprocessorNode's IndirectCmd. This matches QRhiDrawIndexedIndirect- + // Command (5 u32) exactly, so the INDEXED path is GPU-safe at stride 20. + // + // The NON-indexed QRhiDrawIndirectCommand is only 4 u32 (vertexCount, + // instanceCount, firstVertex, firstInstance). Pointing drawIndirect() at a + // 5-word/stride-20 buffer makes the GPU read firstInstance from word 3 + // (our base_vertex dummy) instead of word 4 — diverging from the CPU + // fallback, which reads word 4. There is no way to reshape the producer's + // buffer here, so we deliberately DO NOT enable the GPU indirect path for + // the non-indexed case (force indexed-only MDI): the mesh falls back to its + // normal draw, avoiding wrong/garbage firstInstance. Indexed MDI below gets + // the full stride/count treatment. if(!meshbufs.useIndirectDraw && !p->meshes.empty()) { const auto& mesh = p->meshes[0]; - if(auto* aux = mesh.find_auxiliary("_indirect_draw")) - { - if(aux->buffer >= 0 && aux->buffer < (int)mesh.buffers.size()) - { - const auto& buf_data = mesh.buffers[aux->buffer].data; - if(auto* gpu = ossia::get_if(&buf_data)) - { - if(gpu->handle) - { - meshbufs.indirectDrawBuffer = static_cast(gpu->handle); - meshbufs.useIndirectDraw = true; - meshbufs.indirectDrawIndexed = false; - } - } - } - } - else if(auto* aux_idx = mesh.find_auxiliary("_indirect_draw_indexed")) + if(auto* aux_idx = mesh.find_auxiliary("_indirect_draw_indexed")) { if(aux_idx->buffer >= 0 && aux_idx->buffer < (int)mesh.buffers.size()) { @@ -419,13 +784,31 @@ RenderList::Buffers RenderList::acquireMesh( { if(gpu->handle) { + constexpr quint32 stride = 5 * sizeof(uint32_t); // 20, matches CustomMesh meshbufs.indirectDrawBuffer = static_cast(gpu->handle); meshbufs.useIndirectDraw = true; meshbufs.indirectDrawIndexed = true; + meshbufs.indirectDrawOffset = (quint32)std::max(0, aux_idx->byte_offset); + meshbufs.indirectDrawStride = stride; + // drawIndirect requires stride >= 16 and count >= 1; derive the + // command count from the aux region size (was never set before → + // count defaulted to 1, drawing only the first command). + const int64_t avail = (aux_idx->byte_size > 0) + ? aux_idx->byte_size + : (int64_t)gpu->byte_size - aux_idx->byte_offset; + meshbufs.indirectDrawCount + = (avail > 0) ? (quint32)(avail / stride) : 1u; + if(meshbufs.indirectDrawCount == 0) + meshbufs.indirectDrawCount = 1; } } } } + else if(mesh.find_auxiliary("_indirect_draw")) + { + // Non-indexed GPU MDI intentionally unsupported (see comment above). + // Leave useIndirectDraw=false so the mesh draws via its normal path. + } } #endif @@ -441,7 +824,51 @@ void RenderList::clearRenderers() m_built = false; } -bool RenderList::requiresDepth(Port& p) const noexcept +bool RenderList::resizeSwapchainSizedTargets(QSize newSize) +{ + // Bail to fallback if there's nothing to resize. The fallback + // (recreateOutputRenderList) handles initial output setup. + if(newSize.width() <= 0 || newSize.height() <= 0) + return false; + if(renderers.empty()) + return false; + + // Already at the right size — no-op success. Avoids a wasted + // round-trip through maybeRebuild when Qt fires multiple onResize + // callbacks for the same final size. + if(newSize == m_lastSize) + return true; + + // Update the shared RenderState's size. m_lastSize stays at the + // OLD value here — we WANT maybeRebuild's `outputSize != m_lastSize` + // check to fire on the next render frame so it triggers a full + // release+init cycle. With the persistent GpuResourceRegistry + // (commit 703c2937f) and the rt_changed downstream-size + // propagation (createAllInputRenderTargets), maybeRebuild is now + // cheap enough to be the correct way to handle resize. + // + // Why we don't try to update RTs here directly: the rt_changed + // surgical block called resolveRenderTargetSpecs PER-PORT without + // the downstream-propagation that createAllInputRenderTargets + // applies. Nodes with explicit per-port sizes cached from earlier + // graph setup keep their explicit size on resize, while + // createAllInputRenderTargets uses resolveDownstreamSize to + // properly propagate the new output size upstream. The user's + // openpbr scene has nodes with cached explicit sizes that wouldn't + // update via the surgical path → low-resolution rendering on resize. + // + // maybeRebuild() routes through release()+init()+createAllInputRenderTargets() + // which IS the correct propagation; with registry persistence the + // cost is bounded (no arena destroy/create, no texture re-upload, + // pipeline cache stays warm). + state.renderSize = newSize; + state.outputSize = newSize; + m_built = false; // forces maybeRebuild's release+init on next frame + + return true; +} + +bool RenderList::requiresDepth(const Port& p) const noexcept { for(auto& edge : p.edges) if(edge->source->node->requiresDepth) @@ -503,6 +930,82 @@ void RenderList::render(QRhiCommandBuffer& commands, bool force) if(renderers.size() <= 1 && !force) return; + // Frame counter + wall-clock timer for diagnostics. Emits the frame + // header with the time since the previous render() entry so the pasted + // log shows per-frame cost. Values include CPU record + any synchronous + // GPU waits inside setShaderResources / beginPass etc., i.e. roughly + // the wall-time equivalent of "how fast is this pipeline". + // Plan 09 S6: per-frame GPU-time + PSO-stall observability. Read the + // CB-wide GPU time for the most recently COMPLETED frame and attribute + // it to the "frame" label; the per-pass breakdown is a QRhi follow-up + // (current API only exposes CB-scoped timings). + // + // One-frame staleness is a QRhi contract: `lastCompletedGpuTime()` + // returns the PREVIOUS frame's elapsed GPU time, not the in-progress + // one. The panel reports it as such. +#if QT_VERSION >= QT_VERSION_CHECK(6, 6, 0) + // Use the per-instance `frame` member (incremented at the end of render()) + // as the diagnostic frame number rather than a process-/thread-global + // counter, so the number is attributed to THIS RenderList. + const int64_t frameNumber = this->frame; + if(state.caps.timestamps) + { + const double last_ms = commands.lastCompletedGpuTime(); + if(last_ms > 0.0) + m_gpuTimings.record("frame", last_ms); + } + // PSO stall telemetry: sample totalPipelineCreationTime, compute the + // delta since last frame. A spike > 10 ms means a new PSO compiled + // on the hot path — usually a cold cache or new preset variant. + if(state.rhi) + { + // NOTE: totalPipelineCreationTime is rhi-wide and these two throttle + // counters SHOULD be per-RenderList members so that multiple RenderLists + // sharing a render thread don't (a) consume each other's PSO-time delta + // or (b) race a shared thread_local cooldown. Converting them to members + // requires adding fields to RenderList.hpp, which is outside this change's + // editable scope — see report. The two genuine bugs that ARE fixable here + // (the frame-number misattribution and the cooldown decrement being gated + // on the stall branch) are fixed: frameNumber comes from this->frame, and + // the decrement now ticks every frame below. + static thread_local qint64 s_lastPsoCreationNs = 0; + static thread_local int s_flushCoolDown = 0; + const auto stats = state.rhi->statistics(); + const qint64 delta_ns = stats.totalPipelineCreationTime - s_lastPsoCreationNs; + s_lastPsoCreationNs = stats.totalPipelineCreationTime; + const double delta_ms = double(delta_ns) / 1'000'000.0; + + // Tick the cooldown EVERY frame (was previously decremented only inside + // the stall branch, so it counted stalls rather than frames and the + // ~5s throttle never actually elapsed in wall time). + if(s_flushCoolDown > 0) + --s_flushCoolDown; + + if(delta_ms > 10.0) + { + qWarning().noquote().nospace() + << "[GPU] PSO compile stall on frame " << frameNumber + << ": " << delta_ms << " ms — consider prewarming preset pipelines."; + + // Plan 09 S6: mid-session pipeline-cache flush. When a stall + // hits we've just compiled one or more fresh PSOs — good time + // to persist the cache so the same compilation doesn't have to + // happen again on next launch, even if score crashes. Throttled + // to at most once per ~5s (300 frames at 60 Hz) to avoid + // churning the cache file on prolonged compile-heavy scenes. + if(s_flushCoolDown <= 0 && state.savePipelineCache) + { + state.savePipelineCache(); + s_flushCoolDown = 300; + } + } + // Also record into the timings panel so it shows up next to frame + // time. Zero deltas are filtered out by GpuTimings::record. + m_gpuTimings.record("pso_compile", delta_ms); + } +#endif + m_gpuTimings.tickFrame(); + bool rt_changed = false; for(auto* renderer : renderers) { @@ -531,23 +1034,137 @@ void RenderList::render(QRhiCommandBuffer& commands, bool force) if(rt_changed && !rebuilt) { - for(auto node : renderers) + // Surgical render target update: only recreate the specific RTs and + // passes that actually changed, rather than destroying everything. + // + // Process output node first (its RT size/format determines upstream defaults), + // then intermediate nodes. + + // Pass 1: output node + if(auto out_it = output.renderedNodes.find(this); + out_it != output.renderedNodes.end()) { - node->release(*this); + auto* outRenderer = out_it->second; + if(outRenderer->renderTargetSpecsChanged) + { + // Output renderer owns its RT — re-init it. + outRenderer->releaseState(*this); + outRenderer->initState(*this, *updateBatch); + outRenderer->checkForChanges(); + outRenderer->materialChanged = true; + outRenderer->geometryChanged = true; + outRenderer->renderTargetSpecsChanged = false; + + // Recreate upstream passes that target the output's input ports. + for(auto* in : output.input) + { + for(auto* edge : in->edges) + { + auto src_it = edge->source->node->renderedNodes.find(this); + if(src_it != edge->source->node->renderedNodes.end()) + { + src_it->second->removeOutputPass(*this, *edge); + src_it->second->addOutputPass(*this, *edge, *updateBatch); + } + } + } + } } - // Recreate centralized input render targets - for(auto& [port, rt] : m_inputRenderTargets) - rt.release(); - m_inputRenderTargets.clear(); - createAllInputRenderTargets(); - - for(auto node : renderers) + // Pass 2: intermediate nodes with changed RT specs + for(auto* renderer : renderers) { - node->init(*this, *updateBatch); - node->materialChanged = true; - node->geometryChanged = true; - node->renderTargetSpecsChanged = true; + if(!renderer->renderTargetSpecsChanged) + continue; + // Skip output node (handled above) + if(&renderer->node == &output) + continue; + + // Phase A: scan ports, recreate input RTs whose specs changed, + // and collect the changed-port set so phase C only re-adds + // upstream passes for those. + QVarLengthArray changedPorts; + int cur_port = 0; + for(auto* in : renderer->node.input) + { + if(in->type == Types::Image + && (in->flags & Flag::GrabsFromSource) != Flag::GrabsFromSource) + { + auto newSpec = renderer->node.resolveRenderTargetSpecs(cur_port, *this); + auto oldIt = m_inputRenderTargets.find(in); + + bool specChanged = false; + if(oldIt != m_inputRenderTargets.end()) + { + auto* oldTex = oldIt->second.texture; + if(oldTex) + specChanged = (oldTex->format() != newSpec.format) + || (oldTex->pixelSize() != newSpec.size); + } + + // Always update sampler filter settings when specs changed + // (filter/address changes don't require RT recreation) + renderer->updateInputSamplerFilter(*in, newSpec); + + if(specChanged) + { + changedPorts.append(in); + + // Remove upstream passes that target this port + for(auto* edge : in->edges) + { + auto src_it = edge->source->node->renderedNodes.find(this); + if(src_it != edge->source->node->renderedNodes.end()) + src_it->second->removeOutputPass(*this, *edge); + } + + // Recreate the render target + oldIt->second.release(); + bool wantsDepth = requiresDepth(*in); + bool wantsSamplableDepth + = (in->flags & Flag::SamplableDepth) == Flag::SamplableDepth; + oldIt->second = score::gfx::createRenderTarget( + state, newSpec.format, newSpec.size, samples(), + wantsDepth || wantsSamplableDepth, wantsSamplableDepth); + } + } + cur_port++; + } + + // Phase B: if ANY input RT actually changed shape, the renderer's + // INTERNAL size-dependent state (intermediate RTs, MRT, + // persistent AUX, depth/MSAA attachments sized to output, etc.) + // is stale and needs re-init. Without this, the resize-only + // fast path produced "internal render resolution not updated" -- + // input RT was recreated correctly but the renderer's own + // internal RTs stayed at the old size. initState wires up + // samplers against the current m_inputRenderTargets so we + // don't need a separate updateInputTexture pass. + // + // Phase C: re-add upstream passes ONLY for the ports whose RT + // was recreated (others kept their existing passes intact in + // phase A). Done after Phase B so the upstream's addOutputPass + // sees this renderer's freshly-built per-pass state. + if(!changedPorts.empty()) + { + renderer->releaseState(*this); + renderer->initState(*this, *updateBatch); + renderer->checkForChanges(); + renderer->materialChanged = true; + renderer->geometryChanged = true; + + for(auto* in : changedPorts) + { + for(auto* edge : in->edges) + { + auto src_it = edge->source->node->renderedNodes.find(this); + if(src_it != edge->source->node->renderedNodes.end()) + src_it->second->addOutputPass(*this, *edge, *updateBatch); + } + } + } + + renderer->renderTargetSpecsChanged = false; } } // Check if the viewport has changed @@ -586,11 +1203,14 @@ void RenderList::render(QRhiCommandBuffer& commands, bool force) for(auto edge : input->edges) { auto src = edge->source; - SCORE_ASSERT(src); + if(!src) + continue; + + auto rn_it = src->node->renderedNodes.find(this); + if(rn_it == src->node->renderedNodes.end()) + continue; // Source node has no renderer in this RL (transient during incremental update) - SCORE_ASSERT( - src->node->renderedNodes.find(this) != src->node->renderedNodes.end()); - NodeRenderer* prev_renderer = src->node->renderedNodes.find(this)->second; + NodeRenderer* prev_renderer = rn_it->second; prevRenderers.push_back({edge, prev_renderer}); @@ -650,14 +1270,16 @@ void RenderList::render(QRhiCommandBuffer& commands, bool force) // Update the downstream node's sampler to point to the // upstream's current texture (it may have changed since init). auto rendered = node->renderedNodes.find(this); - SCORE_ASSERT(rendered != node->renderedNodes.end()); + if(rendered == node->renderedNodes.end()) + continue; NodeRenderer* sink_renderer = rendered->second; for(auto [edge, prev_renderer] : prevRenderers) { if(auto* srcTex = prev_renderer->textureForOutput(*edge->source)) { - sink_renderer->updateInputTexture(*input, srcTex); + auto rt = renderTargetForInputPort(*input); + sink_renderer->updateInputTexture(*input, srcTex, rt.depthTexture); } } @@ -674,7 +1296,16 @@ void RenderList::render(QRhiCommandBuffer& commands, bool force) // issues a clearBuffers command. { auto rendered = node->renderedNodes.find(this); - SCORE_ASSERT(rendered != node->renderedNodes.end()); + if(rendered == node->renderedNodes.end()) + { + if(updateBatch) + { + commands.resourceUpdate(updateBatch); + updateBatch = nullptr; + } + updateBatch = state.rhi->nextResourceUpdateBatch(); + continue; + } NodeRenderer* renderer = rendered->second; auto rt = renderer->renderTargetForInput(*input); @@ -683,8 +1314,7 @@ void RenderList::render(QRhiCommandBuffer& commands, bool force) if(rt) { QColor bg = (it + 1 == this->nodes.rend() ? Qt::black : Qt::transparent); - // Normal drawing node - commands.beginPass(rt.renderTarget, bg, {1.0f, 0}, updateBatch); + commands.beginPass(rt.renderTarget, bg, {0.0f, 0}, updateBatch); updateBatch = nullptr; // FIXME z-sort @@ -716,13 +1346,14 @@ void RenderList::render(QRhiCommandBuffer& commands, bool force) SCORE_ASSERT(updateBatch); } } - else if(input->type == Types::Buffer || input->type == Types::Geometry) + else if(input->type == Types::Buffer || input->type == Types::Geometry || input->type == Types::Scene) { prepare_render(input); { auto rendered = node->renderedNodes.find(this); - SCORE_ASSERT(rendered != node->renderedNodes.end()); + if(rendered == node->renderedNodes.end()) + continue; NodeRenderer* renderer = rendered->second; if(updateBatch) @@ -752,11 +1383,23 @@ void RenderList::render(QRhiCommandBuffer& commands, bool force) // Finally the output node may have some rendering to do too { - SCORE_ASSERT(!this->output.renderedNodes.empty()); - SCORE_ASSERT( - dynamic_cast(this->output.renderedNodes.begin()->second)); + if(this->output.renderedNodes.empty()) + { + // Pool-leak fix: updateBatch was allocated earlier in the render + // loop (line 769 or via the per-edge prepare_render path) and + // must be returned before bailing out — otherwise the pool slot + // stays pinned until the QRhi is destroyed, and during rapid + // resize this condition can fire many times in succession. + if(updateBatch) { updateBatch->release(); updateBatch = nullptr; } + return; + } auto output_renderer - = static_cast(this->output.renderedNodes.begin()->second); + = dynamic_cast(this->output.renderedNodes.begin()->second); + if(!output_renderer) + { + if(updateBatch) { updateBatch->release(); updateBatch = nullptr; } + return; + } if(this->output.configuration().outputNeedsRenderPass) { @@ -800,9 +1443,40 @@ void RenderList::update(QRhiResourceUpdateBatch& res) m_outputUBOData.renderSize[0] = this->m_lastSize.width(); m_outputUBOData.renderSize[1] = this->m_lastSize.height(); + m_outputUBOData.sampleCount = m_samples; res.updateDynamicBuffer(m_outputUBO, 0, sizeof(OutputUBO), &m_outputUBOData); } } +void RenderState::Caps::populate(QRhi& rhi) +{ +#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0) + drawIndirect = rhi.isFeatureSupported(QRhi::DrawIndirect); + drawIndirectMulti = rhi.isFeatureSupported(QRhi::DrawIndirectMulti); +#endif +#if QT_VERSION >= QT_VERSION_CHECK(6, 11, 0) + instanceIndexIncludesBaseInstance + = rhi.isFeatureSupported(QRhi::InstanceIndexIncludesBaseInstance); + depthClamp = rhi.isFeatureSupported(QRhi::DepthClamp); +#endif +#if QT_VERSION >= QT_VERSION_CHECK(6, 10, 0) +#endif +#if QT_VERSION >= QT_VERSION_CHECK(6, 9, 0) + variableRateShading = rhi.isFeatureSupported(QRhi::VariableRateShading); +#endif +#if QT_VERSION >= QT_VERSION_CHECK(6, 8, 0) + textureViewFormat = rhi.isFeatureSupported(QRhi::TextureViewFormat); + resolveDepthStencil = rhi.isFeatureSupported(QRhi::ResolveDepthStencil); +#endif +#if QT_VERSION >= QT_VERSION_CHECK(6, 7, 0) + multiview = rhi.isFeatureSupported(QRhi::MultiView); +#endif + + timestamps = rhi.isFeatureSupported(QRhi::Timestamps); + tessellation = rhi.isFeatureSupported(QRhi::Tessellation); + geometryShader = rhi.isFeatureSupported(QRhi::GeometryShader); + baseInstance = rhi.isFeatureSupported(QRhi::BaseInstance); + pipelineCacheDataLoadSave = rhi.isFeatureSupported(QRhi::PipelineCacheDataLoadSave); +} } diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderList.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderList.hpp index 94a0aa6ae3..5e2b407a12 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderList.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderList.hpp @@ -1,11 +1,22 @@ #pragma once #include +#include #include +#include + +#include + +namespace Gfx +{ +class AssetTable; +} namespace score::gfx { +class GpuResourceRegistry; class OutputNode; +class VertexFallbackPool; /** * @brief List of nodes to be rendered to an output. * @@ -17,6 +28,7 @@ class OutputNode; */ class SCORE_PLUGIN_GFX_EXPORT RenderList { + friend struct Graph; private: std::shared_ptr m_state; @@ -36,6 +48,14 @@ class SCORE_PLUGIN_GFX_EXPORT RenderList */ [[nodiscard]] QRhiResourceUpdateBatch* initialBatch() const noexcept; + /** + * @brief Store a resource update batch to be submitted on the next render frame. + * + * Used by incremental edge additions that happen after the first render frame + * (when the original m_initialBatch has already been consumed). + */ + void setInitialBatch(QRhiResourceUpdateBatch* batch) noexcept { m_initialBatch = batch; } + /** * @brief Create buffers for a mesh and mark them for upload. * @@ -66,6 +86,32 @@ class SCORE_PLUGIN_GFX_EXPORT RenderList */ bool maybeRebuild(bool force = false); + /** + * @brief Fast-path for pure viewport resize. + * + * Update state.renderSize / state.outputSize / m_lastSize to @p newSize + * and mark every renderer's renderTargetSpecsChanged so the existing + * `rt_changed` surgical block in renderInternal handles the actual + * RT recreation + sampler rebinding on the next render frame. + * + * Skips the full `recreateOutputRenderList` teardown + rebuild + * (release+createRenderList) — saves the bulk of resize cost + * (pipeline compiles, ScenePreprocessor REBUILD, mesh slab uploads, + * texture array reallocation, etc.). Persistent registry + + * persistent ScenePreprocessor caches mean none of that work is + * actually needed for a pure size change. + * + * Returns true on success. Returns false (caller should fall back + * to recreateOutputRenderList) when: + * - newSize is invalid + * - renderers vector is empty (RL not yet initialised) + * The caller (Graph::onResize) handles the fallback path. + * + * Cost: O(N renderers), no GPU drain, no allocations until the + * next render frame's rt_changed block recreates the RTs. + */ + bool resizeSwapchainSizedTargets(QSize newSize); + /** * @brief Obtain the texture corresponding to an output port. * @@ -120,10 +166,25 @@ class SCORE_PLUGIN_GFX_EXPORT RenderList void clearRenderers(); /** - * @brief Texture to use when a texture is missing + * @brief Texture to use when a texture is missing (2D) */ QRhiTexture& emptyTexture() const noexcept { return *m_emptyTexture; } + /** + * @brief Texture to use when a 3D (sampler3D) texture is missing + */ + QRhiTexture& emptyTexture3D() const noexcept { return *m_emptyTexture3D; } + + /** + * @brief Texture to use when a cubemap (samplerCube) is missing + */ + QRhiTexture& emptyTextureCube() const noexcept { return *m_emptyTextureCube; } + + /** + * @brief Texture to use when a 2D array (sampler2DArray) is missing + */ + QRhiTexture& emptyTextureArray() const noexcept { return *m_emptyTextureArray; } + /** * @brief UBO corresponding to the output parameters: * @@ -132,6 +193,63 @@ class SCORE_PLUGIN_GFX_EXPORT RenderList */ QRhiBuffer& outputUBO() const noexcept { return *m_outputUBO; } + /** + * @brief Per-output GPU arena store for scene-graph source nodes. + * + * Returns a reference to the registry that owns the Camera / Light / + * Material / PerDraw arena buffers. Source nodes (Camera, Light, + * PBRMesh, …) allocate a slot from this registry at construction and + * write their packed bytes into it at their own update(). + * + * Persist-across-rebuild contract: the registry is owned by the + * OutputNode (OutputNode::m_registry) and survives RenderList + * rebuilds — the same registry pointer is observed by both the + * pre- and post-rebuild RenderList for a given OutputNode. Consumers + * that cache the registry pointer (e.g. ScenePreprocessor's + * m_registry) can compare against the new RL's registry on init(), + * skip cache wipes when unchanged. + * + * Valid between init() and release(). + */ + GpuResourceRegistry& registry() noexcept { return *m_registry; } + const GpuResourceRegistry& registry() const noexcept { return *m_registry; } + + /** + * @brief Per-RenderList pool of neutral fallback vertex buffers for + * "REQUIRED: false" VERTEX_INPUTS whose upstream geometry does + * not provide a matching attribute. + * + * Valid between init() and release(). See VertexFallbackPool.hpp. + */ + VertexFallbackPool& vertexFallbackPool() noexcept { return *m_vertexFallbackPool; } + + /** + * @brief Per-RenderList GPU-timing collector. + * + * Renderers wrap their begin/endPass regions in `ScopedGpuTimer` to + * attribute the CB-wide lastCompletedGpuTime to the named pass. The + * result is one frame stale — see GpuTiming.hpp for details. + * + * The S6 observability panel reads `gpuTimings().snapshot()` on its + * UI tick and displays per-pass rolling means. + */ + GpuTimings& gpuTimings() noexcept { return m_gpuTimings; } + const GpuTimings& gpuTimings() const noexcept { return m_gpuTimings; } + + /** + * @brief Session-wide asset decode cache. + * + * Set by Graph::createRenderList from GfxContext's AssetTable. + * May be null on test RenderLists or after teardown. Consumers + * must guard. + * + * Plan 09 S1: one decode per asset per session; preprocessor's + * texture-decode path checks this first, falls back to decode + + * stage otherwise. + */ + Gfx::AssetTable* assetTable() const noexcept { return m_assetTable; } + void setAssetTable(Gfx::AssetTable* t) noexcept { m_assetTable = t; } + /** * @brief A quad mesh correct for this API */ @@ -147,7 +265,7 @@ class SCORE_PLUGIN_GFX_EXPORT RenderList * * e.g. it's not needed if we're just doing some generative shaders. */ - bool requiresDepth(score::gfx::Port& p) const noexcept; + bool requiresDepth(const score::gfx::Port& p) const noexcept; bool anyNodeRequiresDepth() const noexcept { return m_requiresDepth; } int samples() const noexcept { return m_samples; } @@ -160,14 +278,82 @@ class SCORE_PLUGIN_GFX_EXPORT RenderList void createAllInputRenderTargets(); + /** + * @brief Mark this render list as fully built. + * + * Prevents maybeRebuild() from unnecessarily tearing down and + * recreating all resources on the first render frame after + * createRenderList() has already fully initialized everything. + */ + void markBuilt() noexcept { m_built = true; m_lastSize = state.renderSize; } + + /// Set the "any node requires depth" flag computed from the node graph. + /// Mirrors what maybeRebuild() recomputes; called from + /// Graph::createRenderList so the freshly-built RL doesn't need a + /// first-frame maybeRebuild to populate it. + void markRequiresDepth(bool value) noexcept { m_requiresDepth = value; } + + /// Notify that an edge was removed. Notifies renderers, releases RT if unused. + /// + /// @param preserveSinks Optional set of sink Ports that should keep their + /// input render target even if this edge was their only feed. Used by + /// batched edge updates (see GfxContext::incrementalEdgeUpdate) so that + /// inserting a filter between two nodes doesn't destroy and immediately + /// re-allocate the same RT when the old and new edges share a sink port. + void + onEdgeRemoved(Edge& edge, const ossia::hash_set* preserveSinks = nullptr); + + /// Remove the render target for a specific input port. + void removeInputRenderTarget(const Port* port); + + /** + * @brief Resolve the downstream render target size for a node. + * + * Returns the maximum size across all downstream render targets that + * this node renders to. Used as fallback when a node's input port + * has no explicit render target size. + */ + QSize resolveDownstreamSize( + const Node* node, + const ossia::small_flat_map& resolvedSpecs) + const noexcept; + private: OutputUBO m_outputUBOData; QRhiResourceUpdateBatch* m_initialBatch{}; + // Scene-graph arena store (camera / light / material / per_draw buffers). + // Persist-across-rebuild contract: ownership is on the OutputNode + // (OutputNode::m_registry), so the registry — and all its arena + // buffers, mesh slabs, texture-array channels, ScenePreprocessor + // material/env slots — survives `Graph::recreateOutputRenderList` + // (viewport resize / fallback rebuild). RenderList::init() either + // calls GpuResourceRegistry::init() once (first RL on this output) + // or adopts the populated state as-is (every subsequent rebuild). + // RenderList::release() does NOT destroy it. OutputNode::releaseRegistry() + // tears it down via destroyOwned() when its QRhi goes away. + GpuResourceRegistry* m_registry{}; + + // Pool of tiny shared vertex buffers used to satisfy "REQUIRED: false" + // VERTEX_INPUTS whose upstream geometry is missing an attribute. + // Same lifetime as m_registry. + std::unique_ptr m_vertexFallbackPool; + + // GPU-timing collector. Lives as long as the RenderList — outlives + // individual renderers so per-pass measurements survive node churn. + GpuTimings m_gpuTimings; + + // Session-wide asset decode cache. Non-owning; GfxContext is the + // owner. May be null. + Gfx::AssetTable* m_assetTable{}; + // Material QRhiBuffer* m_outputUBO{}; QRhiTexture* m_emptyTexture{}; + QRhiTexture* m_emptyTexture3D{}; + QRhiTexture* m_emptyTextureCube{}; + QRhiTexture* m_emptyTextureArray{}; /** * @brief Cache of vertex buffers. diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderState.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderState.hpp index 33299f5a50..2ab3fef624 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderState.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderState.hpp @@ -55,21 +55,87 @@ struct RenderState GraphicsApi api{}; QShaderVersion version{}; -#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0) - struct + struct Caps { + // Indirect draw — Qt 6.12+; populated only on compatible builds. bool drawIndirect{false}; bool drawIndirectMulti{false}; + + // Always queryable. + bool multiview{false}; + bool resolveDepthStencil{false}; + bool tessellation{false}; + bool geometryShader{false}; + + // Extended set (Plan 09 S0). Drives shader feature gating + + // observability. + // + // baseInstance: + // Lets indirect draws use `firstInstance` as the draw ID via + // `gl_BaseInstance` (ARB_shader_draw_parameters). MDI's per-draw + // lookup table reads this way. + // + // instanceIndexIncludesBaseInstance: + // Disambiguates whether `gl_InstanceIndex` already contains the + // `firstInstance` offset (Vulkan-like) or not. Shader prepass + // injects a `#define SCORE_INSTANCE_INDEX_INCLUDES_BASE_INSTANCE` + // based on this flag so presets work on both paths. + // + // variableRateShading: + // Per-tile shading-rate maps (VK_EXT_fragment_shading_rate, + // D3D12 VRS). Feeds the VRS-opt-in path on fullscreen presets. + // + // timestamps: + // Whether `QRhiCommandBuffer::lastCompletedGpuTime()` returns + // meaningful values. Prereq for the per-pass timing panel. + // + // pipelineCacheDataLoadSave: + // Backend supports pipeline binary cache round-trip. Used by + // tryLoadPipelineCache / tryStorePipelineCache; surfaced so + // upper layers can skip PSO prewarm when unsupported. + // + // textureViewFormat: + // R32UI ↔ R32F aliasing. Needed by the visibility buffer preset + // and surfaced early so consumers can feature-detect uniformly. + // + // depthClamp: + // For reverse-Z shadow passes to avoid near-plane clipping; + // shadow_cascades / point_shadow presets opt in when available. + bool baseInstance{false}; + bool instanceIndexIncludesBaseInstance{false}; + bool variableRateShading{false}; + bool timestamps{false}; + bool pipelineCacheDataLoadSave{false}; + bool textureViewFormat{false}; + bool depthClamp{false}; + + void populate(QRhi& rhi); } caps; -#endif // Called after QRhi is destroyed to clean up an imported VkDevice std::function customDeviceCleanup; + // Called right before the QRhi is destroyed, while its pipeline cache is + // still accessible. Used to persist QRhi::pipelineCacheData() to disk. + std::function preRhiDestroy; + + // Mid-session pipeline-cache flush (Plan 09 S6). Same storage path + // as preRhiDestroy but callable during normal operation — invoked + // from RenderList::render after a PSO-compile burst so the cache + // survives crashes / force-quits without a clean shutdown. Null + // when the backend doesn't support PipelineCacheDataLoadSave. + std::function savePipelineCache; + void destroy() { window.reset(); + if(preRhiDestroy) + { + preRhiDestroy(); + preRhiDestroy = nullptr; + } + delete rhi; rhi = nullptr; diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedCSFNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedCSFNode.cpp index 947a9d5e31..ad9691e6d9 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedCSFNode.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedCSFNode.cpp @@ -2,6 +2,8 @@ #include #include +#include +#include #include #include #include @@ -12,6 +14,8 @@ #include #include +#include +#include #include #include @@ -24,36 +28,63 @@ namespace score::gfx static QRhiTexture::Format getTextureFormat(const QString& format) noexcept { - // Map CSF format strings to Qt RHI texture formats - if(format == "RGBA8") - return QRhiTexture::RGBA8; - else if(format == "BGRA8") - return QRhiTexture::BGRA8; - else if(format == "R8") - return QRhiTexture::R8; - + // Map CSF format strings to Qt RHI texture formats. + // + // Case-insensitive comparison: libisf emits the FORMAT layout qualifier + // lowercased into the GLSL (`layout(r32ui) uniform uimage3D ...`), but + // the CSF JSON parser stores `image->format` verbatim — so an author + // writing `"FORMAT": "r32ui"` (the lowercase form that matches the + // generated GLSL one-to-one) used to silently fall through to the + // RGBA8 default at texture creation, while the shader compiled with + // r32ui — producing a Vulkan validation error + // VUID-vkCmdDispatch-format-07753 ("UINT component type required, bound + // descriptor format is VK_FORMAT_R8G8B8A8_UNORM") and undefined values + // on every imageLoad / imageStore. Normalise to upper-case once and + // dispatch. + const QString f = format.toUpper(); + + if(f == "RGBA8") return QRhiTexture::RGBA8; + if(f == "BGRA8") return QRhiTexture::BGRA8; + if(f == "R8") return QRhiTexture::R8; #if QT_VERSION >= QT_VERSION_CHECK(6, 4, 0) - else if(format == "RG8") - return QRhiTexture::RG8; + if(f == "RG8") return QRhiTexture::RG8; #endif - else if(format == "R16") - return QRhiTexture::R16; - + if(f == "R16") return QRhiTexture::R16; #if QT_VERSION >= QT_VERSION_CHECK(6, 4, 0) - else if(format == "RG16") - return QRhiTexture::RG16; + if(f == "RG16") return QRhiTexture::RG16; #endif - else if(format == "RGBA16F") return QRhiTexture::RGBA16F; - else if(format == "RGBA32F") return QRhiTexture::RGBA32F; - else if(format == "R16F") - return QRhiTexture::R16F; - else if(format == "R32F") - return QRhiTexture::R32F; - + if(f == "RGBA16F") return QRhiTexture::RGBA16F; + if(f == "RGBA32F") return QRhiTexture::RGBA32F; + if(f == "R16F") return QRhiTexture::R16F; + if(f == "R32F") return QRhiTexture::R32F; #if QT_VERSION >= QT_VERSION_CHECK(6, 4, 0) - else if(format == "RGB10A2") - return QRhiTexture::RGB10A2; + if(f == "RGB10A2") return QRhiTexture::RGB10A2; +#endif + + // Integer formats — required for atomic image ops (imageAtomicOr / Add / + // Min / Max / Exchange / CompareExchange in GLSL). Atomics in Vulkan, + // D3D12 and Metal 3.1+ work on the R{8,32}{UI,SI} family; the wider + // {RG,RGBA}{32}{UI,SI} variants are sample-only on most desktop GPUs but + // still legal as storage images. Keep symmetry with QRhiTexture::Format + // — RG32UI / RGBA32UI are exposed so users who want to pack two/four + // counters per voxel into one atomic-OR target can opt in. + // + // Added to QRhiTexture::Format in Qt 6.10 — guard so older Qt builds + // (6.2 / 6.4 / 6.6 / 6.8) compile. On older Qt, the request silently + // falls through to RGBA8 (and a Vulkan validation error if the shader + // declared an integer layout qualifier on its image), but the builds + // don't break. +#if QT_VERSION >= QT_VERSION_CHECK(6, 10, 0) + if(f == "R8UI") return QRhiTexture::R8UI; + if(f == "R32UI") return QRhiTexture::R32UI; + if(f == "RG32UI") return QRhiTexture::RG32UI; + if(f == "RGBA32UI") return QRhiTexture::RGBA32UI; + if(f == "R8SI") return QRhiTexture::R8SI; + if(f == "R32SI") return QRhiTexture::R32SI; + if(f == "RG32SI") return QRhiTexture::RG32SI; + if(f == "RGBA32SI") return QRhiTexture::RGBA32SI; #endif + // Default to RGBA8 if format not recognized return QRhiTexture::RGBA8; } @@ -140,7 +171,7 @@ RenderedCSFNode::RenderedCSFNode(const ISFNode& node) noexcept RenderedCSFNode::~RenderedCSFNode() { } -void RenderedCSFNode::updateInputTexture(const Port& input, QRhiTexture* tex) +void RenderedCSFNode::updateInputTexture(const Port& input, QRhiTexture* tex, QRhiTexture* depthTex) { int sampler_idx = 0; for(auto* p : node.input) @@ -148,21 +179,36 @@ void RenderedCSFNode::updateInputTexture(const Port& input, QRhiTexture* tex) if(p == &input) break; if(p->type == Types::Image) + { sampler_idx++; + if((p->flags & Flag::SamplableDepth) == Flag::SamplableDepth) + sampler_idx++; + } } - if(sampler_idx < (int)m_inputSamplers.size()) + auto replaceSampler = [&](Sampler& sampl, QRhiTexture* t) { - auto& sampl = m_inputSamplers[sampler_idx]; - if(sampl.texture != tex) + if(sampl.texture != t) { - sampl.texture = tex; + sampl.texture = t; for(auto& [e, cp] : m_computePasses) if(cp.srb) - score::gfx::replaceTexture(*cp.srb, sampl.sampler, tex); + score::gfx::replaceTexture(*cp.srb, sampl.sampler, t); for(auto& [e, gp] : m_graphicsPasses) if(gp.pipeline.srb) - score::gfx::replaceTexture(*gp.pipeline.srb, sampl.sampler, tex); + score::gfx::replaceTexture(*gp.pipeline.srb, sampl.sampler, t); + } + }; + + if(sampler_idx < (int)m_inputSamplers.size()) + { + replaceSampler(m_inputSamplers[sampler_idx], tex); + + if(depthTex + && (input.flags & Flag::SamplableDepth) == Flag::SamplableDepth + && sampler_idx + 1 < (int)m_inputSamplers.size()) + { + replaceSampler(m_inputSamplers[sampler_idx + 1], depthTex); } } } @@ -204,57 +250,21 @@ struct is_output bool operator()(const auto& v) { return false; } }; +// Thin adapter over the canonical isf_input_port_count_vis (ISFVisitors.hpp) so +// the existing call sites that do `ossia::visit(p, input.data)` keep working. +// Use walk_descriptor_inputs() in new code; this shim preserves the +// "inlet_i / outlet_i mid-loop" pattern these consumers rely on. struct port_indices { int inlet_i = 0; int outlet_i = 0; - void operator()(const isf::storage_input& v) + template + void operator()(const T& v) noexcept { - if(v.access == "read_only") - inlet_i++; - else - { - inlet_i++; - outlet_i++; - } - } - void operator()(const isf::csf_image_input& v) - { - if(v.access == "read_only") - inlet_i++; - else - outlet_i++; - } - void operator()(const isf::geometry_input& v) - { - if(v.attributes.empty()) - { - // Pure pass-through: one inlet + one outlet - inlet_i++; - outlet_i++; - } - else - { - // Inlet if any attribute needs upstream data (read_only or read_write) - for(const auto& attr : v.attributes) - if(attr.access == "read_only" || attr.access == "read_write") { inlet_i++; break; } - // Outlet if any attribute is writable (write_only or read_write) - for(const auto& attr : v.attributes) - { - if(attr.access == "write_only" || attr.access == "read_write") - { - outlet_i++; // one geometry output port if any attribute is writable - break; - } - } - } - // $USER ports for vertex_count, instance_count, aux.size - if(v.vertex_count.find("$USER") != std::string::npos) inlet_i++; - if(v.instance_count.find("$USER") != std::string::npos) inlet_i++; - for(const auto& aux : v.auxiliary) - if(aux.size.find("$USER") != std::string::npos) inlet_i++; + auto c = isf_input_port_count_vis{}(v); + inlet_i += c.inlets; + outlet_i += c.outlets; } - void operator()(const auto& v) { inlet_i++; } }; QSize RenderedCSFNode::computeTextureSize( const isf::csf_image_input& pass) const noexcept @@ -300,13 +310,32 @@ int RenderedCSFNode::resolveCountExpression( if(expr.empty()) return 0; - // Try fixed integer first - try - { - return std::max(1, std::stoi(expr)); - } - catch(...) + // Try fixed integer first — but only when the whole string is a pure + // integer literal. std::stoi greedily parses the leading digits and + // silently stops at the first non-digit, so "6 * $x * $x" would + // otherwise be accepted as the integer 6 and the expression evaluator + // never runs. Require every character after optional leading whitespace + // to be a digit before taking the fast path. { + std::size_t i = 0; + while(i < expr.size() && std::isspace((unsigned char)expr[i])) + ++i; + const std::size_t first_digit = i; + while(i < expr.size() && std::isdigit((unsigned char)expr[i])) + ++i; + std::size_t last_digit = i; + while(i < expr.size() && std::isspace((unsigned char)expr[i])) + ++i; + if(first_digit < last_digit && i == expr.size()) + { + try + { + return std::max(1, std::stoi(expr)); + } + catch(...) + { + } + } } // Build expression evaluator @@ -387,38 +416,81 @@ void RenderedCSFNode::registerCommonExpressionVariables( { const auto& desc = n.descriptor(); - // Register texture dimensions ($WIDTH_, $HEIGHT_) + // Register full geometry of each input image/texture: + // $WIDTH_, $HEIGHT_, $DEPTH_, $LAYERS_ + // + // DEPTH/LAYERS are sourced from the live QRhiTexture when available + // (tex->depth() for 3D, tex->arraySize() for arrays). Both fall back to 1 + // for plain 2D textures so expressions like "$DEPTH_vol" remain defined + // regardless of whether the bound texture is actually volumetric — lets + // shaders write one size formula and have it parse cleanly in both cases. + // + // The first input image also exposes un-suffixed $WIDTH/$HEIGHT/$DEPTH/ + // $LAYERS for the common "filter that inherits its input's size" case. + auto register_texture_size = [&](const std::string& name, QRhiTexture* tex, + bool& first) { + QSize px = tex ? tex->pixelSize() : QSize{1280, 720}; + int depth = 1; + int layers = 1; + if(tex) + { + if((int)(tex->flags() & QRhiTexture::ThreeDimensional)) + depth = std::max(1, tex->depth()); + if((int)(tex->flags() & QRhiTexture::TextureArray)) + layers = std::max(1, tex->arraySize()); + } + if(px.width() <= 0) + px.setWidth(1280); + if(px.height() <= 0) + px.setHeight(720); + + e.add_constant(fmt::format("var_WIDTH_{}", name), data.emplace_back(px.width())); + e.add_constant(fmt::format("var_HEIGHT_{}", name), data.emplace_back(px.height())); + e.add_constant(fmt::format("var_DEPTH_{}", name), data.emplace_back(depth)); + e.add_constant(fmt::format("var_LAYERS_{}", name), data.emplace_back(layers)); + if(first) + { + e.add_constant("var_WIDTH", data.emplace_back(px.width())); + e.add_constant("var_HEIGHT", data.emplace_back(px.height())); + e.add_constant("var_DEPTH", data.emplace_back(depth)); + e.add_constant("var_LAYERS", data.emplace_back(layers)); + first = false; + } + }; + + bool first_image = true; int input_image_index = 0; for(const auto& img : desc.inputs) { if(ossia::get_if(&img.data)) { + QRhiTexture* t = nullptr; if(input_image_index < (int)m_inputSamplers.size()) - { - auto [s, t] = this->m_inputSamplers[input_image_index]; - QSize tex_sz = t ? t->pixelSize() : QSize{1280, 720}; - e.add_constant( - fmt::format("var_WIDTH_{}", img.name), data.emplace_back(tex_sz.width())); - e.add_constant( - fmt::format("var_HEIGHT_{}", img.name), data.emplace_back(tex_sz.height())); - } + t = this->m_inputSamplers[input_image_index].texture; + register_texture_size(img.name, t, first_image); input_image_index++; } else if(auto* img_input = ossia::get_if(&img.data)) { + // Resolve dimensions for ALL csf_image_input access modes: + // - read_only: bound as sampled texture in m_inputSamplers + // - write_only / read_write: bound as storage image in m_storageImages + QRhiTexture* t = nullptr; if(img_input->access == "read_only") { if(input_image_index < (int)m_inputSamplers.size()) - { - auto [s, t] = this->m_inputSamplers[input_image_index]; - QSize tex_sz = t ? t->pixelSize() : QSize{1280, 720}; - e.add_constant( - fmt::format("var_WIDTH_{}", img.name), data.emplace_back(tex_sz.width())); - e.add_constant( - fmt::format("var_HEIGHT_{}", img.name), data.emplace_back(tex_sz.height())); - } + t = this->m_inputSamplers[input_image_index].texture; input_image_index++; } + else + { + auto it = std::find_if( + m_storageImages.begin(), m_storageImages.end(), + [&](const StorageImage& si) { return si.name.toStdString() == img.name; }); + if(it != m_storageImages.end()) + t = it->texture; + } + register_texture_size(img.name, t, first_image); } } @@ -444,36 +516,151 @@ void RenderedCSFNode::registerCommonExpressionVariables( // Register named geometry vertex/instance counts // ($VERTEX_COUNT_, $INSTANCE_COUNT_, and first one as $VERTEX_COUNT, $INSTANCE_COUNT) + // + // Always register the symbol so the expression parses, even on the very + // first frame when no upstream geometry has flowed yet — fall back to the + // descriptor's static vertex_count/instance_count strings (parsed as int) + // and ultimately to 1. Without this fallback, $VERTEX_COUNT_ raises + // ERR232 - Undefined symbol on every dispatch evaluation that runs before + // updateGeometryBindings has populated geo_bind, breaking csf-copy-from / + // csf-geo-read-write and any CSF whose dispatch refers to a not-yet-bound + // geometry input. + auto parse_static_count = [](const std::string& s, int fallback) -> int { + if(s.empty()) return fallback; + try + { + int v = std::stoi(s); + return v > 0 ? v : fallback; + } + catch(...) + { + return fallback; + } + }; + int geo_idx = 0; bool first_geo = true; for(const auto& input : desc.inputs) { - if(ossia::get_if(&input.data)) + if(auto* geo = ossia::get_if(&input.data)) { + int vertex_count = 0; + int instance_count = 0; if(geo_idx < (int)m_geometryBindings.size()) { const auto& geo_bind = m_geometryBindings[geo_idx]; - if(geo_bind.vertex_count > 0) - { - e.add_constant( - fmt::format("var_VERTEX_COUNT_{}", input.name), - data.emplace_back(geo_bind.vertex_count)); - if(first_geo) - e.add_constant("var_VERTEX_COUNT", data.emplace_back(geo_bind.vertex_count)); - } - if(geo_bind.instance_count > 0) - { - e.add_constant( - fmt::format("var_INSTANCE_COUNT_{}", input.name), - data.emplace_back(geo_bind.instance_count)); - if(first_geo) - e.add_constant("var_INSTANCE_COUNT", data.emplace_back(geo_bind.instance_count)); - } + vertex_count = geo_bind.vertex_count; + instance_count = geo_bind.instance_count; + } + if(vertex_count <= 0) + vertex_count = parse_static_count(geo->vertex_count, 1); + if(instance_count <= 0) + instance_count = parse_static_count(geo->instance_count, 1); + + e.add_constant( + fmt::format("var_VERTEX_COUNT_{}", input.name), + data.emplace_back(vertex_count)); + e.add_constant( + fmt::format("var_INSTANCE_COUNT_{}", input.name), + data.emplace_back(instance_count)); + if(first_geo) + { + e.add_constant("var_VERTEX_COUNT", data.emplace_back(vertex_count)); + e.add_constant("var_INSTANCE_COUNT", data.emplace_back(instance_count)); first_geo = false; } geo_idx++; } } + + // Register $COUNT_ and $BYTESIZE_ for every addressable SSBO / + // UBO the node binds, input or output. Lets SIZE / TARGET / WIDTH / HEIGHT + // expressions size themselves to upstream buffer extents by name — + // removes the need for user-visible "max N" scalar inputs on filters + // whose output should always mirror their input size. + // + // Registration order matters when names collide (e.g. an upstream- + // provided nested aux shadowed by a top-level AUXILIARY of the same + // name in a replace-style shader): the nested (input-side) binding + // is registered first so the top-level (output-side) redundant + // re-registration is suppressed — semantically, when a user writes + // `$COUNT_scene_lights` they mean the upstream count, not the size + // of the output buffer they're about to overwrite. + // + // For UBOs, COUNT always resolves to 1 (a UBO is one struct instance); + // BYTESIZE resolves to the struct byte size. For SSBOs with a flexible + // array, stride is inferred from `calculateStorageBufferSize(layout, 1) + // - calculateStorageBufferSize(layout, 0)` and COUNT is the allocation's + // element count. For SSBOs without a flexible array, COUNT resolves to 1. + { + ossia::hash_set registered; + const auto& eff_desc = n.descriptor(); + + auto register_buffer + = [&](const std::string& name, int64_t byte_size, bool is_uniform, + std::span layout) { + if(name.empty() || registered.contains(name)) + return; + int64_t element_count = 1; + if(is_uniform) + { + // UBO: single struct. $COUNT = 1, $BYTESIZE = struct size. + element_count = 1; + } + else + { + const int64_t fixed_part + = score::gfx::calculateStorageBufferSize(layout, 0, eff_desc); + const int64_t with_one + = score::gfx::calculateStorageBufferSize(layout, 1, eff_desc); + const int64_t stride = with_one - fixed_part; + if(stride > 0 && byte_size > fixed_part) + element_count = (byte_size - fixed_part) / stride; + else + element_count = 1; + if(element_count < 1) + element_count = 1; + } + e.add_constant( + fmt::format("var_COUNT_{}", name), + data.emplace_back((double)element_count)); + e.add_constant( + fmt::format("var_BYTESIZE_{}", name), + data.emplace_back((double)byte_size)); + registered.insert(name); + }; + + // Pass 1 — nested auxiliaries on every geometry input (the "upstream + // side" of filters; these are the buffers whose counts the user most + // often wants to size against). Registered first so collisions with + // top-level same-name overrides in Pass 2 fall through. + for(const auto& binding : m_geometryBindings) + { + for(const auto& aux : binding.auxiliary_ssbos) + { + register_buffer(aux.name, aux.size, aux.is_uniform, aux.layout); + } + } + + // Pass 2 — top-level storage buffers (INPUTS storage_input + + // top-level AUXILIARY writes). + for(const auto& sb : m_storageBuffers) + { + // Whether this top-level buffer is a UBO or SSBO depends on the + // descriptor input it came from; look up by name. + bool is_uniform = false; + for(const auto& inp : eff_desc.inputs) + { + if(inp.name == sb.name.toStdString()) + { + if(ossia::get_if(&inp.data)) + is_uniform = true; + break; + } + } + register_buffer(sb.name.toStdString(), sb.size, is_uniform, sb.layout); + } + } } int RenderedCSFNode::resolveDispatchExpression(const std::string& expr) const @@ -481,13 +668,28 @@ int RenderedCSFNode::resolveDispatchExpression(const std::string& expr) const if(expr.empty()) return 1; - // Try fixed integer first - try - { - return std::max(1, std::stoi(expr)); - } - catch(...) + // Pure integer literal fast-path. Same guard as resolveCountExpression: + // std::stoi would otherwise silently accept "6 * $x" as 6. { + std::size_t i = 0; + while(i < expr.size() && std::isspace((unsigned char)expr[i])) + ++i; + const std::size_t first_digit = i; + while(i < expr.size() && std::isdigit((unsigned char)expr[i])) + ++i; + std::size_t last_digit = i; + while(i < expr.size() && std::isspace((unsigned char)expr[i])) + ++i; + if(first_digit < last_digit && i == expr.size()) + { + try + { + return std::max(1, std::stoi(expr)); + } + catch(...) + { + } + } } // Build expression evaluator @@ -537,8 +739,6 @@ BufferView RenderedCSFNode::createStorageBuffer( QRhi& rhi = *renderer.state.rhi; QRhiBuffer* buffer = rhi.newBuffer( QRhiBuffer::Static, QRhiBuffer::VertexBuffer | QRhiBuffer::StorageBuffer, size); - qWarning() << "CSF ALLOC [createStorageBuffer]" << name << "size=" << size; - if(buffer) { buffer->setName(QStringLiteral("CSF_StorageBuffer_%1").arg(name).toLocal8Bit()); @@ -597,6 +797,8 @@ int RenderedCSFNode::getArraySizeFromUI(const QString& bufferName) const } // Default array size if not found + qWarning() << "RenderedCSFNode: storage size port not resolved (storageSizeInputIndex=" + << storageSizeInputIndex << "); falling back to 1024."; return 1024; } @@ -630,7 +832,7 @@ void RenderedCSFNode::updateStorageBuffers(RenderList& renderer, QRhiResourceUpd // Search all port geometries since storage buffers aren't tied to a specific port. const auto stdName = storageBuffer.name.toStdString(); bool found_aux = false; - for(const auto& [port_idx, geo_spec] : m_portGeometries) + for(const auto& [port_key, geo_spec] : m_portGeometries) { if(!geo_spec.meshes || geo_spec.meshes->meshes.empty()) continue; @@ -711,7 +913,6 @@ void RenderedCSFNode::updateStorageBuffers(RenderList& renderer, QRhiResourceUpd QRhiBuffer::VertexBuffer | QRhiBuffer::StorageBuffer | QRhiBuffer::IndirectBuffer, requiredSize); - qWarning() << "CSF ALLOC [updateStorage/indirect]" << storageBuffer.name << "size=" << requiredSize; if(storageBuffer.buffer) { storageBuffer.buffer->setName( @@ -753,27 +954,21 @@ void RenderedCSFNode::updateStorageBuffers(RenderList& renderer, QRhiResourceUpd // intermediate SRBs that reference stale/dangling buffer pointers. } -// Returns the byte size of a GLSL type for SoA SSBO element stride -static int glslTypeSizeBytes(const std::string& type) noexcept -{ - if(type == "float" || type == "int" || type == "uint") - return 4; - if(type == "vec2" || type == "ivec2" || type == "uvec2") - return 8; - if(type == "vec3" || type == "ivec3" || type == "uvec3") - return 12; - if(type == "vec4" || type == "ivec4" || type == "uvec4") - return 16; - if(type == "mat4") - return 64; - return 16; // fallback -} - -// Returns the byte size of an ossia::geometry attribute format -static int geometryFormatSizeBytes(int format) noexcept +// GLSL type → byte size lives in IsfBindingsBuilder.hpp +// (score::gfx::glslTypeSizeBytes for the bare type, std430ArrayStride for +// the per-element stride inside an std430 SSBO array — these differ for +// vec3, see header doc for the rationale). All call sites below resolve +// via ADL inside `namespace score::gfx`. + +// Returns the byte size of one upstream-side element of an +// ossia::geometry attribute. For the user_struct format the producer +// carries the size out-of-line on `element_byte_size` (sizeof of the +// user-defined struct in std430); otherwise dispatches on the format +// enum. +static int geometryFormatSizeBytes(const ossia::geometry::attribute& a) noexcept { using F = ossia::geometry::attribute; - switch(format) + switch(a.format) { case F::float4: return 16; case F::float3: return 12; @@ -794,6 +989,7 @@ static int geometryFormatSizeBytes(int format) noexcept case F::half3: return 6; case F::half2: return 4; case F::half1: return 2; + case F::user_struct: return (int)a.element_byte_size; default: return 4; } } @@ -815,13 +1011,12 @@ void RenderedCSFNode::updateGeometryBindings( auto& binding = m_geometryBindings[pre_idx]; if(binding.input_port_index >= 0 && !binding.has_vertex_count_spec) { - auto it = m_portGeometries.find(binding.input_port_index); - if(it != m_portGeometries.end() - && it->second.meshes && !it->second.meshes->meshes.empty()) + if(auto* geo = findGeometryByPort(binding.input_port_index); + geo && geo->meshes && !geo->meshes->meshes.empty()) { - binding.vertex_count = it->second.meshes->meshes[0].vertices; - if(it->second.meshes->meshes[0].instances > 0) - binding.instance_count = it->second.meshes->meshes[0].instances; + binding.vertex_count = geo->meshes->meshes[0].vertices; + if(geo->meshes->meshes[0].instances > 0) + binding.instance_count = geo->meshes->meshes[0].instances; } } pre_idx++; @@ -846,12 +1041,11 @@ void RenderedCSFNode::updateGeometryBindings( const ossia::geometry* upstream_mesh = nullptr; if(binding.input_port_index >= 0) { - auto it = m_portGeometries.find(binding.input_port_index); - if(it != m_portGeometries.end() - && it->second.meshes && !it->second.meshes->meshes.empty()) + if(auto* geo = findGeometryByPort(binding.input_port_index); + geo && geo->meshes && !geo->meshes->meshes.empty()) { binding_has_upstream = true; - upstream_mesh = &it->second.meshes->meshes[0]; + upstream_mesh = &geo->meshes->meshes[0]; } } @@ -898,7 +1092,6 @@ void RenderedCSFNode::updateGeometryBindings( auto* buf = renderer.state.rhi->newBuffer( QRhiBuffer::Static, QRhiBuffer::StorageBuffer, requiredSize); - qWarning() << "CSF ALLOC [auxResize]" << aux.name.c_str() << "size=" << requiredSize; buf->setName(QByteArray("CSF_GeoAux_") + aux.name.c_str()); buf->create(); aux.buffer = buf; @@ -907,6 +1100,15 @@ void RenderedCSFNode::updateGeometryBindings( QByteArray zero(requiredSize, 0); res.uploadStaticBuffer(aux.buffer, 0, requiredSize, zero.constData()); aux.size = requiredSize; + + // Keep read_buffer in sync for feedback receivers + if(aux.read_buffer) + { + aux.read_buffer->destroy(); + aux.read_buffer->setSize(requiredSize); + aux.read_buffer->create(); + res.uploadStaticBuffer(aux.read_buffer, 0, requiredSize, zero.constData()); + } } } @@ -962,15 +1164,14 @@ void RenderedCSFNode::updateGeometryBindings( auto& ssbo = binding.attribute_ssbos[attr_idx]; if(req.access == "read_write" && !ssbo.read_buffer) { - const int elem_size = glslTypeSizeBytes(req.type); + const int64_t elem_stride = std430ArrayStride(req.type, n.m_descriptor); const int count = ssbo.per_instance ? binding.instance_count : binding.vertex_count; - const int64_t buf_size = (int64_t)elem_size * count; + const int64_t buf_size = elem_stride * count; if(buf_size > 0) { auto* buf = renderer.state.rhi->newBuffer( QRhiBuffer::Static, QRhiBuffer::StorageBuffer | QRhiBuffer::VertexBuffer, buf_size); - qWarning() << "CSF ALLOC [feedbackPingPong]" << req.name.c_str() << "size=" << buf_size; buf->setName(QByteArray("CSF_GeomPP_") + req.name.c_str()); buf->create(); QByteArray zero(buf_size, 0); @@ -990,7 +1191,6 @@ void RenderedCSFNode::updateGeometryBindings( auto* buf = renderer.state.rhi->newBuffer( QRhiBuffer::Static, QRhiBuffer::StorageBuffer, aux.size); - qWarning() << "CSF ALLOC [feedbackPingPongAux]" << aux.name.c_str() << "size=" << aux.size; buf->setName(QByteArray("CSF_GeomPPAux_") + aux.name.c_str()); buf->create(); QByteArray zero(aux.size, 0); @@ -1040,25 +1240,28 @@ void RenderedCSFNode::updateGeometryBindings( const auto& req = geo_input->attributes[attr_idx]; auto& ssbo = binding.attribute_ssbos[attr_idx]; - // Match by semantic - const ossia::attribute_semantic sem = ossia::name_to_semantic(req.semantic); - const ossia::geometry::attribute* geo_attr = nullptr; - if(sem != ossia::attribute_semantic::custom) - geo_attr = mesh.find(sem); - else - geo_attr = mesh.find(req.name); + // Match against upstream geometry — same 3-stage cascade as raw + // raster (findGeometryAttribute in Utils.cpp). The display_name + // stage handles `{ NAME: "position", SEMANTIC: "custom" }` falling + // back to the real position attribute when no shadowing custom one + // exists. + const ossia::geometry::attribute* geo_attr + = score::gfx::findGeometryAttribute(mesh, req.name, req.semantic); if(!geo_attr) { - // Create or keep a zero-filled fallback buffer - const int elem_size = glslTypeSizeBytes(req.type); + // Create or keep a zero-filled fallback buffer. std430ArrayStride + // ensures vec3 attributes get 16-byte stride to match what the + // shader's `T array[]` SSBO actually reads in std430. + const int64_t elem_stride = std430ArrayStride(req.type, n.m_descriptor); const int fallback_count = ssbo.per_instance ? std::max(1, mesh.instances) : std::max(1, mesh.vertices); - const int64_t needed = (int64_t)elem_size * fallback_count; + const int64_t needed = elem_stride * fallback_count; if(!ssbo.buffer || ssbo.size < needed) { if(req.required && req.access == "read_only") - qWarning() << "CSF geometry: required read_only attribute" << req.name.c_str() << "not found" - << "(semantic=" << (int)sem << ")"; + qWarning() << "CSF geometry: required read_only attribute" + << req.name.c_str() << "not found" + << "(semantic=" << req.semantic.c_str() << ")"; else qDebug() << " attr" << req.name.c_str() << "not in upstream — creating fallback buffer"; @@ -1069,7 +1272,6 @@ void RenderedCSFNode::updateGeometryBindings( auto* buf = renderer.state.rhi->newBuffer( QRhiBuffer::Static, QRhiBuffer::StorageBuffer | QRhiBuffer::VertexBuffer, needed); - qWarning() << "CSF ALLOC [geomFallback]" << req.name.c_str() << "size=" << needed; buf->setName(QByteArray("CSF_GeomFallback_") + req.name.c_str()); buf->create(); QByteArray zero(needed, 0); @@ -1077,6 +1279,15 @@ void RenderedCSFNode::updateGeometryBindings( ssbo.buffer = buf; ssbo.size = needed; ssbo.owned = true; + + // Keep read_buffer in sync for feedback receivers + if(ssbo.read_buffer) + { + ssbo.read_buffer->destroy(); + ssbo.read_buffer->setSize(needed); + ssbo.read_buffer->create(); + res.uploadStaticBuffer(ssbo.read_buffer, 0, needed, zero.constData()); + } } continue; } @@ -1099,14 +1310,21 @@ void RenderedCSFNode::updateGeometryBindings( ? mesh.bindings[binding_idx] : mesh.bindings[0]; - const int attr_size = geometryFormatSizeBytes(geo_attr->format); + const int attr_size = geometryFormatSizeBytes(*geo_attr); + const int64_t csf_elem_stride = std430ArrayStride(req.type, n.m_descriptor); const int stride = geo_bind.byte_stride; - const bool is_soa = (stride == 0 || stride == attr_size); + // SoA upstream is bindable directly when the binding stride matches + // either the bare attribute size (tightly-packed mesh vertex buffer) + // or the std430 element stride (CSF SSBO output, vec3-padded). Both + // shapes are valid sources for an std430 SSBO consumer. + const bool is_soa = (stride == 0 || stride == attr_size + || stride == (int)csf_elem_stride); if(auto* gpu = ossia::get_if(&geo_buf.data)) { - const int elem_size = glslTypeSizeBytes(req.type); - if(is_soa && gpu->handle && attr_size == elem_size) + const int elem_size = glslTypeSizeBytes(req.type, n.m_descriptor); + if(is_soa && gpu->handle + && (attr_size == elem_size || stride == (int)csf_elem_stride)) { // SoA GPU buffer with matching element size: bind directly (zero-copy) auto* rhi_buf = static_cast(gpu->handle); @@ -1117,9 +1335,8 @@ void RenderedCSFNode::updateGeometryBindings( // feedback loop when the downstream node hasn't produced data yet). if(binding.has_vertex_count_spec && ssbo.owned && ssbo.buffer) { - const int elem_size = glslTypeSizeBytes(req.type); const int attr_count = ssbo.per_instance ? binding.instance_count : binding.vertex_count; - const int64_t needed = (int64_t)elem_size * attr_count; + const int64_t needed = csf_elem_stride * attr_count; if(needed > 0 && gpu->byte_size < needed) { continue; @@ -1163,9 +1380,10 @@ void RenderedCSFNode::updateGeometryBindings( continue; const auto* src = static_cast(cpu->raw_data.get()); - const int64_t elem_size = glslTypeSizeBytes(req.type); + const int64_t elem_size = glslTypeSizeBytes(req.type, n.m_descriptor); + const int64_t elem_stride = std430ArrayStride(req.type, n.m_descriptor); const int data_count = ssbo.per_instance ? mesh.instances : mesh.vertices; - const int64_t needed = elem_size * data_count; + const int64_t needed = elem_stride * data_count; // Skip re-upload if we already own a correctly-sized buffer // and the upstream data hasn't changed (same CPU pointer as last upload). @@ -1183,20 +1401,35 @@ void RenderedCSFNode::updateGeometryBindings( auto* buf = renderer.state.rhi->newBuffer( QRhiBuffer::Static, QRhiBuffer::StorageBuffer | QRhiBuffer::VertexBuffer, needed); - qWarning() << "CSF ALLOC [geomUpload]" << req.name.c_str() << "size=" << needed; buf->setName(QByteArray("CSF_Geom_") + req.name.c_str()); buf->create(); ssbo.buffer = buf; ssbo.size = needed; ssbo.owned = true; + + // For feedback receivers, also resize read_buffer to keep both + // ping-pong buffers the same size. Otherwise after the swap, + // ssbo.buffer would be the old undersized read_buffer while + // ssbo.size reflects the new size, causing buffer overruns. + if(ssbo.read_buffer) + { + ssbo.read_buffer->destroy(); + ssbo.read_buffer->setSize(needed); + ssbo.read_buffer->create(); + } } // Total byte offset into the buffer: input entry offset + attribute offset within stride const int64_t base_offset = input_byte_offset + geo_attr->byte_offset; - if(is_soa && attr_size == (int)elem_size) + // Direct upload only when source and destination strides match + // exactly. For vec3 attributes, that means upstream must already + // be std430-strided (16 bytes per element) — a tightly-packed + // upstream vec3 (stride 12) routes through scatter so the + // destination's 4-byte trailing padding stays zeroed. + if(is_soa && (int64_t)stride == elem_stride) { - // SoA CPU buffer with matching element size: upload directly + // SoA CPU buffer with matching stride: upload directly const int64_t upload_size = std::min(needed, cpu->byte_size - base_offset); if(upload_size > 0) res.uploadStaticBuffer(ssbo.buffer, 0, upload_size, src + base_offset); @@ -1222,7 +1455,13 @@ void RenderedCSFNode::updateGeometryBindings( const int64_t upload_size = std::min(staging_needed, cpu->byte_size); res.uploadStaticBuffer(ssbo.scatterStaging, 0, upload_size, src); - // Prepare the scatter dispatch (will execute in runInitialPasses) + // The scatter compute lays out destination elements at + // dst_components * sizeof(float) per slot — for vec3 in std430 + // that's 3 floats of data + 1 float of padding implicit in the + // 16-byte stride. dst_components is 3 for vec3, so the compute + // writes 12 bytes per element and the buffer's std430 padding + // bytes stay at their zero-initialised value. That matches + // what a well-behaved compute shader would produce. ssbo.scatterParams = GPUBufferScatter::Params{ .staging = ssbo.scatterStaging, .output = ssbo.buffer, @@ -1241,14 +1480,16 @@ void RenderedCSFNode::updateGeometryBindings( else { // CPU fallback: scatter per-element with format conversion - // (used when compute shaders are not available) + // (used when compute shaders are not available). Destination + // slots are elem_stride bytes apart; the first elem_size bytes + // hold the data, any trailing std430 padding stays zero. QByteArray scattered(needed, 0); if(elem_size > attr_size && elem_size >= (int)sizeof(float)) { const float one = 1.0f; for(int i = 0; i < data_count; i++) - std::memcpy(scattered.data() + (int64_t)i * elem_size + elem_size - sizeof(float), + std::memcpy(scattered.data() + (int64_t)i * elem_stride + elem_size - sizeof(float), &one, sizeof(float)); } @@ -1257,7 +1498,7 @@ void RenderedCSFNode::updateGeometryBindings( { const int64_t src_off = (int64_t)i * stride + base_offset; if(src_off + copy_size <= cpu->byte_size) - std::memcpy(scattered.data() + (int64_t)i * elem_size, src + src_off, copy_size); + std::memcpy(scattered.data() + (int64_t)i * elem_stride, src + src_off, copy_size); } res.uploadStaticBuffer(ssbo.buffer, 0, needed, scattered.constData()); } @@ -1319,10 +1560,12 @@ void RenderedCSFNode::updateGeometryBindings( { renderer.releaseBuffer(aux.buffer); } + // Usage flag matches the aux kind so the created buffer can + // be bound as the intended descriptor type. + const auto usage = aux.is_uniform ? QRhiBuffer::UniformBuffer + : QRhiBuffer::StorageBuffer; auto* buf = renderer.state.rhi->newBuffer( - QRhiBuffer::Static, - QRhiBuffer::StorageBuffer, requiredSize); - qWarning() << "CSF ALLOC [geoAuxNoMatch]" << aux.name.c_str() << "size=" << requiredSize; + QRhiBuffer::Static, usage, requiredSize); buf->setName(QByteArray("CSF_GeoAux_") + aux.name.c_str()); buf->create(); QByteArray zero(requiredSize, 0); @@ -1334,6 +1577,29 @@ void RenderedCSFNode::updateGeometryBindings( } } + // Auxiliary textures: match by name against the mesh's + // auxiliary_textures list. Fall back to the shape-matched + // placeholder when no match — same safety model as the raster + // path (never leave a stale upstream handle that may have been + // freed). SRB rebuild on handle change is driven by the existing + // initComputeSRBAndPasses / recreateSRB cycle; we only update + // the cached texture pointer here. + for(auto& at : binding.auxiliary_textures) + { + // Owned textures (auto-allocated writable storage images) are + // never overwritten by upstream resolution — we own the data, + // there's no upstream contributor. + if(at.owned) + continue; + const auto* aux = mesh.find_auxiliary_texture(at.name); + auto* tex = aux + ? static_cast(aux->native_handle) + : nullptr; + if(!tex) + tex = at.placeholder; + at.texture = tex; + } + // When has_vertex_count_spec AND the upstream is a feedback loop (our own // SSBOs came back as gpu handles, identity check kept them owned), we must // still resize if $USER changed. Without this, the SSBOs stay at whatever @@ -1350,9 +1616,9 @@ void RenderedCSFNode::updateGeometryBindings( { continue; } - const int elem_size = glslTypeSizeBytes(geo_input->attributes[attr_idx].type); + const int64_t elem_stride = std430ArrayStride(geo_input->attributes[attr_idx].type, n.m_descriptor); const int attr_count = ssbo.per_instance ? binding.instance_count : binding.vertex_count; - const int64_t needed = (int64_t)elem_size * attr_count; + const int64_t needed = elem_stride * attr_count; if(needed > 0 && ssbo.size != needed) { ssbo.buffer->destroy(); @@ -1410,8 +1676,8 @@ void RenderedCSFNode::updateGeometryBindings( const int count = ssbo.per_instance ? binding.instance_count : binding.vertex_count; if(count <= 0) continue; - const int elem_size = glslTypeSizeBytes(req.type); - const int64_t needed = (int64_t)elem_size * count; + const int64_t elem_stride = std430ArrayStride(req.type, n.m_descriptor); + const int64_t needed = elem_stride * count; if(!ssbo.buffer || ssbo.size != needed) { @@ -1426,7 +1692,6 @@ void RenderedCSFNode::updateGeometryBindings( auto* buf = renderer.state.rhi->newBuffer( QRhiBuffer::Static, QRhiBuffer::StorageBuffer | QRhiBuffer::VertexBuffer, needed); - qWarning() << "CSF ALLOC [geomSpecResize]" << req.name.c_str() << "size=" << needed; buf->setName(QByteArray("CSF_GeomSpec_") + req.name.c_str()); buf->create(); ssbo.buffer = buf; @@ -1496,11 +1761,10 @@ void RenderedCSFNode::pushOutputGeometry(RenderList& renderer, QRhiResourceUpdat const ossia::geometry* binding_upstream = nullptr; if(binding.input_port_index >= 0) { - auto it = m_portGeometries.find(binding.input_port_index); - if(it != m_portGeometries.end() - && it->second.meshes && !it->second.meshes->meshes.empty()) + if(auto* geo = findGeometryByPort(binding.input_port_index); + geo && geo->meshes && !geo->meshes->meshes.empty()) { - binding_upstream = &it->second.meshes->meshes[0]; + binding_upstream = &geo->meshes->meshes[0]; } } @@ -1556,10 +1820,18 @@ void RenderedCSFNode::pushOutputGeometry(RenderList& renderer, QRhiResourceUpdat if(binding_upstream) { out_geo.bounds = binding_upstream->bounds; - // Inherit topology from upstream for filter-type nodes - out_geo.topology = (decltype(out_geo.topology))binding_upstream->topology; - out_geo.cull_mode = (decltype(out_geo.cull_mode))binding_upstream->cull_mode; + // Inherit topology / cull / face / blend / depth-write / filter + // metadata from upstream for filter-type nodes. Anything the CSF + // doesn't explicitly produce on its own should pass through — + // otherwise inserting a CSF between ScenePreprocessor and a + // rasterizer silently drops state the rasterizer relies on. + out_geo.topology = (decltype(out_geo.topology))binding_upstream->topology; + out_geo.cull_mode = (decltype(out_geo.cull_mode))binding_upstream->cull_mode; out_geo.front_face = (decltype(out_geo.front_face))binding_upstream->front_face; + out_geo.blend = binding_upstream->blend; + out_geo.depth_write = binding_upstream->depth_write; + out_geo.filter_tag = binding_upstream->filter_tag; + out_geo.filter_material_index = binding_upstream->filter_material_index; } for(int attr_idx = 0; attr_idx < (int)geo_input->attributes.size(); attr_idx++) @@ -1574,7 +1846,12 @@ void RenderedCSFNode::pushOutputGeometry(RenderList& renderer, QRhiResourceUpdat continue; const int buf_index = (int)out_geo.buffers.size(); - const int elem_size = glslTypeSizeBytes(req.type); + // The buffer underneath is sized at std430 stride (16 bytes per + // vec3 element); declaring the binding stride to match is what + // lets a downstream raw-raster vertex shader read these + // attributes without the silent vec3-padding drift that left + // every fourth splat misaligned. + const int64_t elem_stride = std430ArrayStride(req.type, n.m_descriptor); ossia::geometry::buffer buf{ .data = ossia::geometry::gpu_buffer{ssbo.buffer, ssbo.size}, @@ -1582,7 +1859,7 @@ void RenderedCSFNode::pushOutputGeometry(RenderList& renderer, QRhiResourceUpdat out_geo.buffers.push_back(std::move(buf)); ossia::geometry::binding bind; - bind.byte_stride = elem_size; + bind.byte_stride = (uint32_t)elem_stride; bind.classification = ssbo.per_instance ? ossia::geometry::binding::per_instance : ossia::geometry::binding::per_vertex; @@ -1720,6 +1997,62 @@ void RenderedCSFNode::pushOutputGeometry(RenderList& renderer, QRhiResourceUpdat .byte_offset = in_aux.byte_offset, .byte_size = in_aux.byte_size}); } } + + // First: publish THIS CSF's own writable storage images so they + // ride the geometry cable downstream and ExtractTexture / flat + // AUXILIARY rasterizer reads can resolve them by name. Mirrors + // the m_storageBuffers → out_geo.buffers forward done above. + for(const auto& si : m_storageImages) + { + if(si.access == "read_only" || !si.texture) + continue; + out_geo.auxiliary_textures.push_back( + ossia::geometry::auxiliary_texture{ + .name = si.name.toStdString(), + .native_handle = si.texture, + .sampler_handle = nullptr}); + } + + // Same forward for nested-aux storage images this binding + // auto-allocated (at.owned == true). Lets a CSF declare its + // writable storage image under the geometry-input AUXILIARY + // block and have it published to downstream consumers + // identically to the top-level csf_image_input case. + for(const auto& at : binding.auxiliary_textures) + { + if(!at.owned || !at.texture) + continue; + bool already_present = false; + for(const auto& existing : out_geo.auxiliary_textures) + if(existing.name == at.name) { already_present = true; break; } + if(already_present) + continue; + out_geo.auxiliary_textures.push_back( + ossia::geometry::auxiliary_texture{ + .name = at.name, + .native_handle = at.texture, + .sampler_handle = nullptr}); + } + + // Forward upstream auxiliary TEXTURES (skybox, irradiance_map, + // baseColorArray*, normalArray*, shadow_map_array, …). Without + // this, classic_pbr_full / classic_pbr_openpbr / any rasterizer + // that samples material texture arrays via sample_slot_* finds + // the bindings empty (or fallback-placeholder), every textureRef + // resolves to placeholder-black, and every textured fragment + // renders fully black. Same name-collision skip rule as the + // buffer forward — if THIS CSF declared an aux texture of the + // same name (RESOURCES.auxiliary_textures or similar), keep its + // binding and skip the upstream re-add. + for(const auto& in_atx : binding_upstream->auxiliary_textures) + { + bool already_present = false; + for(const auto& existing : out_geo.auxiliary_textures) + if(existing.name == in_atx.name) { already_present = true; break; } + if(already_present) + continue; + out_geo.auxiliary_textures.push_back(in_atx); + } } // Explicit COPY_FROM: forward auxiliary buffers from other geometries @@ -1741,11 +2074,13 @@ void RenderedCSFNode::pushOutputGeometry(RenderList& renderer, QRhiResourceUpdat = aux_req.forward->auxiliary.empty() ? aux_req.name : aux_req.forward->auxiliary; // Search all input port geometries for the source - for(const auto& [port_idx, geo_spec] : m_portGeometries) + for(const auto& [port_key, geo_spec] : m_portGeometries) { if(!geo_spec.meshes || geo_spec.meshes->meshes.empty()) continue; + const int port_idx = port_key.first; + // Match by geometry resource name → find the binding with that name int src_binding_idx = 0; bool found_geo = false; @@ -1806,11 +2141,13 @@ void RenderedCSFNode::pushOutputGeometry(RenderList& renderer, QRhiResourceUpdat const std::string& src_geo_name = attr_req.forward->geometry; const std::string& src_attr_name = attr_req.forward->attribute; - for(const auto& [port_idx, geo_spec] : m_portGeometries) + for(const auto& [port_key, geo_spec] : m_portGeometries) { if(!geo_spec.meshes || geo_spec.meshes->meshes.empty()) continue; + const int port_idx = port_key.first; + // Find the matching source geometry binding int src_binding_idx = 0; bool found_geo = false; @@ -1945,16 +2282,51 @@ void RenderedCSFNode::pushOutputGeometry(RenderList& renderer, QRhiResourceUpdat out_geo.indices = binding_upstream->indices; } -#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0) - if(binding.uses_indirect_draw && binding.indirectDrawBuffer) + if(binding.uses_indirect_draw && binding.indirectBuffer) { out_geo.indirect_count = ossia::geometry::gpu_buffer{ - binding.indirectDrawBuffer, - binding.indirect_draw_indexed - ? (int64_t)sizeof(QRhiIndexedIndirectDrawCommand) - : (int64_t)sizeof(QRhiIndirectDrawCommand)}; + binding.indirectBuffer, + binding.indirectBufferSize}; + } + else if(binding_upstream + && binding_upstream->indirect_count.handle) + { + // Forward upstream's indirect-draw buffer when this CSF doesn't + // produce its own. ScenePreprocessor sets indirect_count to the + // MDI indirect_draw_cmds buffer (ScenePreprocessorNode.cpp:2329); + // an MDI rasterizer downstream reads from out_geo.indirect_count + // for vkCmdDrawIndexedIndirect dispatch. Without this forward, + // every passthrough CSF inserted between Preprocessor and an MDI + // rasterizer hands the rasterizer a null indirect buffer → + // garbage indexCount / firstIndex / baseVertex → triangles + // render at wild positions / wrong index ranges. + out_geo.indirect_count = binding_upstream->indirect_count; + } + + // Forward CPU-side draw commands too. ScenePreprocessor populates + // these (`cpu_draw_commands`, ScenePreprocessorNode.cpp:2334) for + // the Qt < 6.12 / non-GPU-indirect fallback path. Without this + // forward, CustomMesh::update sees an empty vector and skips the + // assign() at line 370 — leaving `output_meshbuf.cpuDrawCommands` + // with stale data from a previous frame OR uninitialised + // small-vector contents, which the CPU draw fallback then issues + // as drawIndexed(garbage, garbage, ...). Symptom: Vulkan + // VUID-vkCmdDrawIndexed-robustBufferAccess2-08798 with huge + // firstIndex/indexCount values that look like pointer low bits. + if(binding_upstream && !binding_upstream->cpu_draw_commands.empty()) + { + out_geo.cpu_draw_commands.assign( + binding_upstream->cpu_draw_commands.begin(), + binding_upstream->cpu_draw_commands.end()); } -#endif + + // Stamp format_id from the descriptor's RESOURCES[geoOut] so a + // CSF that produces a primitive-cloud-shaped output declares its + // format identity in JSON and downstream FlattenedSceneFilter + // mode-12 can route it. Same hash + truncation as the + // ScenePreprocessor splat-bucket stamp. + if(!geo_input->format_id.empty()) + out_geo.filter_tag = (uint32_t)ossia::hash_string(geo_input->format_id); meshes->meshes.push_back(std::move(out_geo)); meshes->dirty_index = 1; // Initial structural build @@ -2125,16 +2497,117 @@ void RenderedCSFNode::pushOutputGeometry(RenderList& renderer, QRhiResourceUpdat } } -#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0) - if(binding.uses_indirect_draw && binding.indirectDrawBuffer) + if(binding.uses_indirect_draw && binding.indirectBuffer) { out_geo.indirect_count = ossia::geometry::gpu_buffer{ - binding.indirectDrawBuffer, - binding.indirect_draw_indexed - ? (int64_t)sizeof(QRhiIndexedIndirectDrawCommand) - : (int64_t)sizeof(QRhiIndirectDrawCommand)}; + binding.indirectBuffer, + binding.indirectBufferSize}; + } + else if(binding_upstream + && binding_upstream->indirect_count.handle) + { + // Mirror the full-rebuild path: forward upstream's indirect- + // draw buffer when this CSF doesn't produce its own. Required + // for any passthrough CSF inserted in front of an MDI + // rasterizer (ScenePreprocessor → CSF → classic_pbr_mdi / + // openpbr / debug_lights). Without this, the fast path keeps + // the previously-published indirect_count handle, which is + // empty for compute passes that never set it themselves. + if(out_geo.indirect_count.handle != binding_upstream->indirect_count.handle + || out_geo.indirect_count.byte_size != binding_upstream->indirect_count.byte_size) + { + out_geo.indirect_count = binding_upstream->indirect_count; + any_handle_changed = true; + } + } + + // Re-forward upstream's CPU draw commands every frame. The vector + // contents are immutable in the typical scene flow but the + // binding's outputGeometry mesh holds a copy that can drift if + // upstream rebuilds (e.g. a scene reload). Cheap re-assign each + // frame; ScenePreprocessor's command list is at most ~1k entries. + if(binding_upstream && !binding_upstream->cpu_draw_commands.empty()) + { + out_geo.cpu_draw_commands.assign( + binding_upstream->cpu_draw_commands.begin(), + binding_upstream->cpu_draw_commands.end()); + } + + // Re-forward upstream metadata that the rasterizer reads but the + // CSF doesn't override: pipeline-state hints (blend, depth_write) + // and filter metadata (filter_tag, filter_material_index). + // Identity assignments — the upstream values either stayed the + // same since the structural pass or shifted (scene reload), and + // we want the latter to propagate. + if(binding_upstream) + { + out_geo.blend = binding_upstream->blend; + out_geo.depth_write = binding_upstream->depth_write; + out_geo.filter_tag = binding_upstream->filter_tag; + out_geo.filter_material_index = binding_upstream->filter_material_index; + + // Re-forward upstream auxiliary TEXTURES (skybox, baseColorArray, + // shadow_map_array, …). Same forward as the structural-rebuild + // path; needed every frame in case upstream rebakes (CubemapLoader + // refresh, IBL bake, etc.). Skip names already declared by this + // CSF or already pushed in this frame. + out_geo.auxiliary_textures.clear(); + + // Publish THIS CSF's own writable storage images (write_only and + // read_write csf_image_input declarations) into the geometry + // cable's auxiliary_textures so downstream consumers (ExtractTexture + // node, rasterizers reading them as flat AUXILIARY) can resolve + // them by name. Without this push, the texture exists in this + // CSF's m_storageImages but is invisible to the world — the + // mirror of how m_storageBuffers is forwarded into out_geo.buffers + // a few lines above. + for(const auto& si : m_storageImages) + { + if(si.access == "read_only" || !si.texture) + continue; + out_geo.auxiliary_textures.push_back( + ossia::geometry::auxiliary_texture{ + .name = si.name.toStdString(), + .native_handle = si.texture, + .sampler_handle = nullptr}); + } + + // Same forward for write_only / read_write storage images + // declared as nested aux on the geometry input (auto-allocated + // in the binding setup with at.owned = true). Required for + // voxelize_scene_aabb.csf's `voxel_grid` to ship downstream + // when declared as a nested aux on the scene geometry input + // rather than as a top-level csf_image_input. + for(const auto& at : binding.auxiliary_textures) + { + if(!at.owned || !at.texture) + continue; + bool already_present = false; + for(const auto& existing : out_geo.auxiliary_textures) + if(existing.name == at.name) { already_present = true; break; } + if(already_present) + continue; + out_geo.auxiliary_textures.push_back( + ossia::geometry::auxiliary_texture{ + .name = at.name, + .native_handle = at.texture, + .sampler_handle = nullptr}); + } + + // Then forward upstream auxiliary textures, skipping any name + // this CSF already published above so producer-side overrides + // win over upstream defaults (consistent with the buffer-forward + // shadowing rule). + for(const auto& in_atx : binding_upstream->auxiliary_textures) + { + bool already_present = false; + for(const auto& existing : out_geo.auxiliary_textures) + if(existing.name == in_atx.name) { already_present = true; break; } + if(already_present) + continue; + out_geo.auxiliary_textures.push_back(in_atx); + } } -#endif // Only bump dirty_index if any handle actually changed, // so downstream acquireMesh picks up the new buffers. @@ -2188,7 +2661,7 @@ void RenderedCSFNode::pushOutputGeometry(RenderList& renderer, QRhiResourceUpdat continue; int port_idx = it - sink->node->input.begin(); - rendered->second->process(port_idx, binding.outputGeometry); + rendered->second->process(port_idx, binding.outputGeometry, out_edge->source); } } @@ -2197,79 +2670,284 @@ void RenderedCSFNode::pushOutputGeometry(RenderList& renderer, QRhiResourceUpdat } } -void RenderedCSFNode::initComputePass( + +void RenderedCSFNode::createGraphicsPass( const TextureRenderTarget& rt, RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) { - QRhi& rhi = *renderer.state.rhi; - - if(!m_computePipeline) - { - createComputePipeline(renderer); - } - - if(!m_computePipeline) - return; - - // Ensure storage buffers are created before setting up bindings - updateStorageBuffers(renderer, res); - - // Eagerly populate geometry bindings so we can detect buffer aliasing across - // attribute/auxiliary SSBOs (caused by feedback edges sharing the same - // physical buffer with conflicting access modes) BEFORE we emit any binding. - updateGeometryBindings(renderer, res); + // Create a graphics pass to render our compute output texture to the render target + static const constexpr auto vertex_shader = R"_(#version 450 +layout(location = 0) in vec2 position; +layout(location = 1) in vec2 texcoord; - // Pre-pass: collect physical buffers used with conflicting access modes - // (read on one binding, write on another) so we can promote them to - // bufferLoadStore. The Qt RHI / Vulkan validation layer rejects bindings - // that reference the same buffer with different access flags within a pass. - std::unordered_set aliased_buffers; - { - std::unordered_map access_flags; // 1=read, 2=write, 3=both - int gb_idx = 0; - for(const auto& inp : n.m_descriptor.inputs) - { - auto* g = ossia::get_if(&inp.data); - if(!g) - continue; - if(gb_idx >= (int)m_geometryBindings.size()) - break; - const auto& gb = m_geometryBindings[gb_idx++]; +layout(location = 0) out vec2 v_texcoord; - for(int ai = 0; ai < (int)g->attributes.size() && ai < (int)gb.attribute_ssbos.size(); ai++) - { - const auto& req = g->attributes[ai]; - const auto& ssbo = gb.attribute_ssbos[ai]; - if(req.access == "none" || !ssbo.buffer) - continue; - int f = (req.access == "read_only") ? 1 : (req.access == "write_only") ? 2 : 3; - access_flags[ssbo.buffer] |= f; - if(req.access == "read_write" && ssbo.read_buffer && ssbo.read_buffer != ssbo.buffer) - access_flags[ssbo.read_buffer] |= 1; - } - for(const auto& aux : gb.auxiliary_ssbos) - { - if(!aux.buffer) - continue; - int f = (aux.access == "read_only") ? 1 : (aux.access == "write_only") ? 2 : 3; - access_flags[aux.buffer] |= f; - if(aux.read_buffer && aux.read_buffer != aux.buffer) - access_flags[aux.read_buffer] |= 1; - } - } - for(const auto& [buf, flags] : access_flags) - if(flags == 3) - aliased_buffers.insert(buf); - } +layout(std140, binding = 0) uniform renderer_t { + mat4 clipSpaceCorrMatrix; + vec2 renderSize; +} renderer; - // Create shader resource bindings - QList bindings; +out gl_PerVertex { vec4 gl_Position; }; - // Binding 0: Renderer UBO (part of ProcessUBO in defaultUniforms) +void main() +{ + v_texcoord = texcoord; + gl_Position = renderer.clipSpaceCorrMatrix * vec4(position.xy, 0.0, 1.); +#if defined(QSHADER_SPIRV) || defined(QSHADER_HLSL) || defined(QSHADER_MSL) + gl_Position.y = - gl_Position.y; +#endif +} +)_"; + + static const constexpr auto fragment_shader_rgba = R"_(#version 450 +layout(std140, binding = 0) uniform renderer_t { + mat4 clipSpaceCorrMatrix; + vec2 renderSize; +} renderer; + +layout(binding = 3) uniform sampler2D outputTexture; + +layout(location = 0) in vec2 v_texcoord; +layout(location = 0) out vec4 fragColor; + +void main() { fragColor = texture(outputTexture, v_texcoord); } +)_"; + static const constexpr auto fragment_shader_r = R"_(#version 450 +layout(std140, binding = 0) uniform renderer_t { + mat4 clipSpaceCorrMatrix; + vec2 renderSize; +} renderer; + +layout(binding = 3) uniform sampler2D outputTexture; + +layout(location = 0) in vec2 v_texcoord; +layout(location = 0) out vec4 fragColor; + +void main() { fragColor = vec4(texture(outputTexture, v_texcoord).rrr, 1.0); } +)_"; + + // Get the mesh for rendering a fullscreen quad + const auto& mesh = renderer.defaultTriangle(); + + // Find the texture for the specific output port this edge is connected to + QRhiTexture* textureToRender = textureForOutput(*edge.source); + // If we still don't have a texture, we can't create the graphics pass + if(!textureToRender) + { + qWarning() << "No output texture available for graphics pass"; + return; + } + + auto fmt = textureToRender->format(); + const char* fragment_shader{}; + switch(fmt) + { + case QRhiTexture::Format::R8: + case QRhiTexture::Format::RED_OR_ALPHA8: +#if QT_VERSION >= QT_VERSION_CHECK(6, 9, 0) + case QRhiTexture::Format::R8UI: + case QRhiTexture::Format::R32UI: +#endif + case QRhiTexture::Format::R16: + case QRhiTexture::Format::R16F: + case QRhiTexture::Format::R32F: + case QRhiTexture::Format::D16: +#if QT_VERSION >= QT_VERSION_CHECK(6, 4, 0) + case QRhiTexture::Format::D24: + case QRhiTexture::Format::D24S8: +#endif + case QRhiTexture::Format::D32F: +#if QT_VERSION >= QT_VERSION_CHECK(6, 9, 0) + case QRhiTexture::Format::D32FS8: +#endif + fragment_shader = fragment_shader_r; + break; + default: + fragment_shader = fragment_shader_rgba; + break; + } + + // Compile shaders + auto [vertexS, fragmentS] = score::gfx::makeShaders(renderer.state, vertex_shader, fragment_shader); + + // Create a sampler for our output texture + QRhiSampler* outputSampler = renderer.state.rhi->newSampler( + QRhiSampler::Linear, QRhiSampler::Linear, QRhiSampler::None, + QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge); + outputSampler->setName("RenderedCSFNode::OutputSampler"); + outputSampler->create(); + + // Initialize mesh buffers + MeshBuffers meshBuffers = renderer.initMeshBuffer(mesh, res); + + // Build the pipeline to render our compute result + auto pip = score::gfx::buildPipeline( + renderer, mesh, vertexS, fragmentS, rt, nullptr, nullptr, + std::array{Sampler{outputSampler, textureToRender}}); + + if(pip.pipeline) + { + m_graphicsPasses.emplace_back(&edge, GraphicsPass{pip, outputSampler, meshBuffers}); + } + else + { + delete outputSampler; + } +} + +QString RenderedCSFNode::updateShaderWithImageFormats(QString current) +{ + int sampler_index = 0; + for(const auto& input : n.m_descriptor.inputs) + { + if(auto tex_input = ossia::get_if(&input.data)) + { + sampler_index++; + } + if(auto image = ossia::get_if(&input.data)) + { + if(image->access == "read_only") + { + SCORE_ASSERT(sampler_index < m_inputSamplers.size()); + auto tex_n = m_inputSamplers[sampler_index].texture; + if(!tex_n) + return current; + + const auto fmt = tex_n->format(); + const auto layout_fmt = rhiTextureFormatToShaderLayoutFormatString(fmt); + + const auto before = QStringLiteral(", rgba8) readonly uniform image2D %1;").arg(input.name.c_str()); + const auto after = QStringLiteral(", %1) readonly uniform image2D %2;").arg(layout_fmt).arg(input.name.c_str()); + + current.replace(before, after); + sampler_index++; + } + } + } + return current; + +} + +void RenderedCSFNode::createComputePipeline(RenderList& renderer) +{ + QRhi& rhi = *renderer.state.rhi; + + if(!rhi.isFeatureSupported(QRhi::Compute)) + { + qWarning() << "Compute shaders not supported on this backend"; + return; + } + + try + { + // Prepare the shader template with image format substitution. + // LOCAL_SIZE placeholders will be substituted per-pass below. + m_computeShaderSource = updateShaderWithImageFormats(n.m_computeS); + + // Compile one pipeline per unique LOCAL_SIZE, reuse when passes share the same size. + m_perPassPipelines.clear(); + std::map, QRhiComputePipeline*> pipelineCache; + + for(std::size_t passIdx = 0; passIdx < n.m_descriptor.csf_passes.size(); passIdx++) + { + const auto& passDesc = n.m_descriptor.csf_passes[passIdx]; + const auto key = passDesc.local_size; + + auto it = pipelineCache.find(key); + if(it != pipelineCache.end()) + { + // Reuse existing pipeline + m_perPassPipelines.push_back(it->second); + } + else + { + // Compile new pipeline for this local_size + QString src = m_computeShaderSource; + src.replace("ISF_LOCAL_SIZE_X", QString::number(key[0])); + src.replace("ISF_LOCAL_SIZE_Y", QString::number(key[1])); + src.replace("ISF_LOCAL_SIZE_Z", QString::number(key[2])); + + QShader compiled = score::gfx::makeCompute(renderer.state, src); + + auto* pipeline = rhi.newComputePipeline(); + pipeline->setShaderStage(QRhiShaderStage(QRhiShaderStage::Compute, compiled)); + + pipelineCache[key] = pipeline; + m_perPassPipelines.push_back(pipeline); + } + } + + // Store unique pipelines for cleanup + m_ownedPipelines.clear(); + for(auto& [k, v] : pipelineCache) + m_ownedPipelines.push_back(v); + + // For backward compat + m_computePipeline = m_perPassPipelines.empty() ? nullptr : m_perPassPipelines[0]; + if(!m_perPassPipelines.empty()) + m_computeShader = m_perPassPipelines[0]->shaderStage().shader(); + } + catch(const std::exception& e) + { + qWarning() << "Failed to create compute shader:" << e.what(); + m_computePipeline = nullptr; + } +} + +void RenderedCSFNode::buildComputeSrbBindings( + RenderList& renderer, QRhiResourceUpdateBatch& res, + QList& bindings) +{ + QRhi& rhi = *renderer.state.rhi; + + // Pre-pass: collect physical buffers used with conflicting access modes + // (read on one binding, write on another) so we can promote them to + // bufferLoadStore. The Qt RHI / Vulkan validation layer rejects bindings + // that reference the same buffer with different access flags within a pass. + std::unordered_set aliased_buffers; + { + std::unordered_map access_flags; // 1=read, 2=write, 3=both + int gb_idx = 0; + for(const auto& inp : n.m_descriptor.inputs) + { + auto* g = ossia::get_if(&inp.data); + if(!g) + continue; + if(gb_idx >= (int)m_geometryBindings.size()) + break; + const auto& gb = m_geometryBindings[gb_idx++]; + + for(int ai = 0; ai < (int)g->attributes.size() && ai < (int)gb.attribute_ssbos.size(); ai++) + { + const auto& req = g->attributes[ai]; + const auto& ssbo = gb.attribute_ssbos[ai]; + if(req.access == "none" || !ssbo.buffer) + continue; + int f = (req.access == "read_only") ? 1 : (req.access == "write_only") ? 2 : 3; + access_flags[ssbo.buffer] |= f; + if(req.access == "read_write" && ssbo.read_buffer && ssbo.read_buffer != ssbo.buffer) + access_flags[ssbo.read_buffer] |= 1; + } + for(const auto& aux : gb.auxiliary_ssbos) + { + if(!aux.buffer) + continue; + int f = (aux.access == "read_only") ? 1 : (aux.access == "write_only") ? 2 : 3; + access_flags[aux.buffer] |= f; + if(aux.read_buffer && aux.read_buffer != aux.buffer) + access_flags[aux.read_buffer] |= 1; + } + } + for(const auto& [buf, flags] : access_flags) + if(flags == 3) + aliased_buffers.insert(buf); + } + + // Binding 0: Renderer UBO (part of ProcessUBO in defaultUniforms) bindings.append(QRhiShaderResourceBinding::uniformBuffer( 0, QRhiShaderResourceBinding::ComputeStage, &renderer.outputUBO())); // Binding 1: Process UBO (time, passIndex, etc.) - // Per-pass: actual pointer will be set later + // Per-pass: actual pointer is patched by each caller after this returns. bindings.append( QRhiShaderResourceBinding::uniformBuffer( 1, QRhiShaderResourceBinding::ComputeStage, nullptr)); @@ -2292,14 +2970,14 @@ void RenderedCSFNode::initComputePass( for(const auto& input : n.m_descriptor.inputs) { // Storage buffers - if(ossia::get_if(&input.data)) + if(auto* storage_in = ossia::get_if(&input.data)) { // Find the corresponding storage buffer auto it = std::find_if(m_storageBuffers.begin(), m_storageBuffers.end(), - [&input](const StorageBuffer& sb) { - return sb.name == QString::fromStdString(input.name); + [&input](const StorageBuffer& sb) { + return sb.name == QString::fromStdString(input.name); }); - + if(it != m_storageBuffers.end() && it->buffer) { if(it->access == "read_only") @@ -2322,42 +3000,73 @@ void RenderedCSFNode::initComputePass( else if(it->access == "write_only") { bindings.append(QRhiShaderResourceBinding::bufferStore( - bindingIndex++, QRhiShaderResourceBinding::ComputeStage, + bindingIndex++, QRhiShaderResourceBinding::ComputeStage, it->buffer)); output_port_index++; } else // read_write { bindings.append(QRhiShaderResourceBinding::bufferLoadStore( - bindingIndex++, QRhiShaderResourceBinding::ComputeStage, + bindingIndex++, QRhiShaderResourceBinding::ComputeStage, it->buffer)); output_port_index++; } } - else if(it != m_storageBuffers.end()) + else { - if(!it->buffer) { - qDebug() << "CSF: cannot bind null buffer"; - } + // Missing storage buffer: warn (used to be silent on the recreate + // path / qDebug on the init path — unify to qWarning) and bump + // bindingIndex so the rest of the layout stays in sync with the + // shader's expected slots. + if(it == m_storageBuffers.end()) + qWarning() << "CSF: storage buffer not found for input" + << QString::fromStdString(input.name); + else + qWarning() << "CSF: cannot bind null buffer for input" + << QString::fromStdString(input.name); bindingIndex++; } - else + + // Write-access buffers whose layout ends in a flexible-array member get a + // synthesized "size" INPUT port on the model (setupCSF / isf_input_port_- + // vis). The read_only branch advanced input_port_index for its own inlet, + // but the write branches above only touched output_port_index — so this + // sizing inlet was never skipped and every later storage input resolved + // the wrong port (its upstream buffer silently never bound). The geometry + // branch already does the equivalent for its $USER ports. Advance here + // under the SAME flex-array condition used everywhere else. + if(storage_in->access.contains("write") && !storage_in->layout.empty() + && storage_in->layout.back().type.find("[]") != std::string::npos) { - qDebug() << "CSF: storage buffer not found"; - bindingIndex++; + input_port_index++; } } // Regular textures (sampled) else if(ossia::get_if(&input.data)) { // Regular sampled textures from m_inputSamplers - SCORE_ASSERT(input_image_index < m_inputSamplers.size()); - auto [sampler, tex] = m_inputSamplers[input_image_index]; - SCORE_ASSERT(sampler); - SCORE_ASSERT(tex); - bindings.append( - QRhiShaderResourceBinding::sampledTexture( - bindingIndex++, QRhiShaderResourceBinding::ComputeStage, tex, sampler)); + if(input_image_index < m_inputSamplers.size()) + { + auto [sampler, tex, fb_] = m_inputSamplers[input_image_index]; + if(sampler && tex) + { + bindings.append( + QRhiShaderResourceBinding::sampledTexture( + bindingIndex, QRhiShaderResourceBinding::ComputeStage, tex, sampler)); + } + else + { + qWarning() << "CSF: sampler/texture missing for texture_input" + << QString::fromStdString(input.name); + } + } + else + { + qWarning() << "CSF: input_samplers under-allocated for texture_input" + << QString::fromStdString(input.name); + } + // Always bump bindingIndex to keep the shader-layout slot count stable. + bindingIndex++; input_port_index++; input_image_index++; } @@ -2366,23 +3075,35 @@ void RenderedCSFNode::initComputePass( { // Find the corresponding storage image auto it = std::find_if(m_storageImages.begin(), m_storageImages.end(), - [&input](const StorageImage& si) { - return si.name == QString::fromStdString(input.name); + [&input](const StorageImage& si) { + return si.name == QString::fromStdString(input.name); }); - + if(it != m_storageImages.end()) { if(it->access == "read_only") { - SCORE_ASSERT(input_image_index < m_inputSamplers.size()); - auto [sampler, tex] = m_inputSamplers[input_image_index]; - SCORE_ASSERT(sampler); - SCORE_ASSERT(tex); - - bindings.append( - QRhiShaderResourceBinding::imageLoad( - bindingIndex++, QRhiShaderResourceBinding::ComputeStage, tex, 0)); - + if(input_image_index < m_inputSamplers.size()) + { + auto [sampler, tex, fb_] = m_inputSamplers[input_image_index]; + if(tex) + { + bindings.append( + QRhiShaderResourceBinding::imageLoad( + bindingIndex, QRhiShaderResourceBinding::ComputeStage, tex, 0)); + } + else + { + qWarning() << "CSF: missing read_only image texture for" + << QString::fromStdString(input.name); + } + } + else + { + qWarning() << "CSF: input_samplers under-allocated for csf_image_input" + << QString::fromStdString(input.name); + } + bindingIndex++; input_port_index++; input_image_index++; } @@ -2396,48 +3117,69 @@ void RenderedCSFNode::initComputePass( if(imageSize.width() < 1 || imageSize.height() < 1) imageSize = renderer.state.renderSize; - if(!it->texture) - { - QRhiTexture* texture{}; - if(image->is3D()) + // Lazy-allocate the storage-image texture (and its persistent + // _prev twin) on first emission. After init this branch is a + // no-op (it->texture is already set), so the recreate path + // re-emits against the existing handle. + auto make_tex = [&](const char* suffix) -> QRhiTexture* { + QRhiTexture* t{}; + if(image->isCube()) + { + const int edge + = std::max(imageSize.width(), imageSize.height()); + QRhiTexture::Flags flags + = QRhiTexture::CubeMap | QRhiTexture::UsedWithLoadStore; + t = rhi.newTexture(format, QSize(edge, edge), 1, flags); + } + else if(image->is3D()) { - // 3D texture int depth = !image->depth_expression.empty() ? resolveDispatchExpression(image->depth_expression) - : imageSize.height(); // Default: cubic if only DIMENSIONS:3 - + : imageSize.height(); QRhiTexture::Flags flags = QRhiTexture::ThreeDimensional | QRhiTexture::UsedWithLoadStore; - texture = rhi.newTexture(format, imageSize.width(), imageSize.height(), depth, 1, flags); - qWarning() << "CSF ALLOC [storageImage3D]" << input.name.c_str() << "size=" << imageSize.width() << "x" << imageSize.height() << "x" << depth; + t = rhi.newTexture( + format, imageSize.width(), imageSize.height(), depth, 1, flags); + } + else if(image->is_array) + { + int layers = !image->layers_expression.empty() + ? resolveDispatchExpression(image->layers_expression) + : 1; + if(layers < 1) layers = 1; + QRhiTexture::Flags flags = QRhiTexture::UsedWithLoadStore; + t = rhi.newTextureArray(format, layers, imageSize, 1, flags); } else { - // 2D texture QRhiTexture::Flags flags = QRhiTexture::RenderTarget | QRhiTexture::UsedWithLoadStore | QRhiTexture::MipMapped | QRhiTexture::UsedWithGenerateMips; - texture = rhi.newTexture(format, imageSize, 1, flags); - qWarning() << "CSF ALLOC [storageImage2D]" << input.name.c_str() << "size=" << imageSize; + t = rhi.newTexture(format, imageSize, 1, flags); } - texture->setName(("RenderedCSFNode::storageImage::" + input.name).c_str()); - - if(texture && texture->create()) + t->setName( + ("RenderedCSFNode::storageImage::" + input.name + suffix).c_str()); + if(!t->create()) { - // If this is the first write-only or read-write image, use it as the output - if(!m_outputTexture) - { - m_outputTexture = texture; - m_outputFormat = format; - } - it->texture = texture; + delete t; + return nullptr; } - else + return t; + }; + + if(!it->texture) + { + it->texture = make_tex(""); + if(it->texture && !m_outputTexture) { - delete texture; + m_outputTexture = it->texture; + m_outputFormat = format; } } + if(it->persistent && !it->read_texture) + it->read_texture = make_tex("_prev"); + it->binding = bindingIndex; if(it->access == "write_only" && it->texture) { bindings.append( @@ -2454,12 +3196,47 @@ void RenderedCSFNode::initComputePass( } else { + if(!it->texture) + qWarning() << "CSF: missing storage-image texture for" + << QString::fromStdString(input.name); bindingIndex++; // keep indices synchronized with shader layout } + + // Persistent pair: `_prev` readonly at the adjacent slot. + // First frame aliases back to `texture` (no prior frame to read). + if(it->persistent) + { + QRhiTexture* prev_tex + = it->pending_initial_copy ? it->texture : it->read_texture; + if(!prev_tex) + prev_tex = it->texture; + it->prev_binding = bindingIndex; + if(prev_tex) + { + bindings.append( + QRhiShaderResourceBinding::imageLoad( + bindingIndex++, QRhiShaderResourceBinding::ComputeStage, + prev_tex, 0)); + } + else + { + qWarning() << "CSF: missing persistent _prev texture for" + << QString::fromStdString(input.name); + bindingIndex++; + } + } output_port_index++; output_image_index++; } } + else + { + qWarning() << "CSF: storage image not found for" + << QString::fromStdString(input.name); + bindingIndex++; + if(image->persistent) + bindingIndex++; + } } // Geometry inputs: bind per-attribute SSBOs else if(auto* geo_input = ossia::get_if(&input.data)) @@ -2505,15 +3282,16 @@ void RenderedCSFNode::initComputePass( if(!ssbo.buffer) { - // Create a minimal fallback buffer so we don't crash - const int elem_size = glslTypeSizeBytes(req.type); + // Create a minimal fallback buffer so we don't skip a binding + // index. Same fallback shape for both init and re-emit paths + // (the buffer name encodes the call site for debug clarity). + const int64_t elem_stride = std430ArrayStride(req.type, n.m_descriptor); ssbo.buffer = rhi.newBuffer( QRhiBuffer::Static, - QRhiBuffer::StorageBuffer | QRhiBuffer::VertexBuffer, elem_size); - qWarning() << "CSF ALLOC [geomInit]" << req.name.c_str() << "size=" << elem_size; - ssbo.buffer->setName(QByteArray("CSF_GeomInit_") + req.name.c_str()); + QRhiBuffer::StorageBuffer | QRhiBuffer::VertexBuffer, elem_stride); + ssbo.buffer->setName(QByteArray("CSF_GeomFB_") + req.name.c_str()); ssbo.buffer->create(); - ssbo.size = elem_size; + ssbo.size = elem_stride; ssbo.owned = true; } @@ -2525,7 +3303,7 @@ void RenderedCSFNode::initComputePass( { // On the first feedback frame (pending_initial_copy), use the same // buffer for both _in and _out so the shader can init + simulate - // in the same frame. After the frame we copy buffer→read_buffer. + // in the same frame. After the frame we copy buffer->read_buffer. QRhiBuffer* read_buf = (ssbo.read_buffer && !binding.pending_initial_copy) ? ssbo.read_buffer : ssbo.buffer; if(read_buf == ssbo.buffer) @@ -2551,56 +3329,158 @@ void RenderedCSFNode::initComputePass( { if(!aux.buffer) { - // Create a minimal fallback buffer so we don't skip a binding index + // Create a minimal fallback buffer so we don't skip a binding + // index. Usage flag must match the aux kind — binding a + // StorageBuffer-only buffer as a UBO (or vice versa) is + // rejected by the Vulkan validation layer. + const auto fallback_usage = aux.is_uniform + ? QRhiBuffer::UniformBuffer + : QRhiBuffer::StorageBuffer; + const quint32 fallback_size = aux.is_uniform ? 256u : 16u; aux.buffer = rhi.newBuffer( - QRhiBuffer::Static, QRhiBuffer::StorageBuffer, 16); - qWarning() << "CSF ALLOC [auxInit]" << aux.name.c_str() << "size=16"; - aux.buffer->setName(QByteArray("CSF_AuxInit_") + aux.name.c_str()); + QRhiBuffer::Static, fallback_usage, fallback_size); + aux.buffer->setName(QByteArray("CSF_AuxFB_") + aux.name.c_str()); aux.buffer->create(); - aux.size = 16; + aux.size = fallback_size; aux.owned = true; } - appendBufBinding(aux.buffer, aux.access); + if(aux.is_uniform) + { + // std140 UBO kind: bind as uniform, not load/store. Access + // field is ignored (UBOs are read-only in GLSL). + bindings.append( + QRhiShaderResourceBinding::uniformBuffer( + bindingIndex++, QRhiShaderResourceBinding::ComputeStage, + aux.buffer)); + } + else + { + appendBufBinding(aux.buffer, aux.access); + } } -#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0) - // Bind indirect draw buffer as read-write SSBO - if(binding.uses_indirect_draw && binding.indirectDrawBuffer) + // Auxiliary textures for this geometry input — placed right + // after aux SSBOs, matching the GLSL emission order in + // parse_csf. Sampled entries → sampledTexture binding; storage + // entries → imageLoad / imageStore / imageLoadStore per access. + for(auto& at : binding.auxiliary_textures) + { + if(!at.texture) + at.texture = at.placeholder; + + QRhiShaderResourceBinding b; + if(at.is_storage) + { + if(at.access == "read_only") + b = QRhiShaderResourceBinding::imageLoad( + bindingIndex, QRhiShaderResourceBinding::ComputeStage, + at.texture, 0); + else if(at.access == "write_only") + b = QRhiShaderResourceBinding::imageStore( + bindingIndex, QRhiShaderResourceBinding::ComputeStage, + at.texture, 0); + else + b = QRhiShaderResourceBinding::imageLoadStore( + bindingIndex, QRhiShaderResourceBinding::ComputeStage, + at.texture, 0); + } + else + { + b = QRhiShaderResourceBinding::sampledTexture( + bindingIndex, QRhiShaderResourceBinding::ComputeStage, + at.texture, at.sampler); + } + bindings.append(b); + at.binding = bindingIndex; + bindingIndex++; + } + + if(binding.uses_indirect_draw && binding.indirectBuffer) { bindings.append(QRhiShaderResourceBinding::bufferLoadStore( bindingIndex++, QRhiShaderResourceBinding::ComputeStage, - binding.indirectDrawBuffer)); + binding.indirectBuffer)); } -#endif geo_binding_index++; } - // Inlet port if any attribute reads from upstream - for(const auto& attr : geo_input->attributes) - if(attr.access == "read_only" || attr.access == "read_write") { input_port_index++; break; } + // Inlet port for upstream geometry. Two cases create one: + // - Empty ATTRIBUTES => pure pass-through: ISFNode unconditionally + // pushes an input port (the visitor at ISFNode.cpp's + // `if(in.attributes.empty())` branch). + // - Non-empty ATTRIBUTES with at least one read_only / read_write + // attribute => an upstream-feeding inlet. + // Either way the geometry input owns ONE entry in node.input, + // which subsequent storage_input / texture_input / etc. address by + // position. Without this increment the very next read_only + // storage_input picks up node.input[0] (the geometry port) by + // mistake — its edges point to upstream geometry, bufferForInput + // returns empty, and the storage_input falls back to its own + // zero-initialised dummy buffer. Symptom: storage data from the + // upstream cable never reaches the compute shader. + bool geo_creates_inlet = geo_input->attributes.empty(); + if(!geo_creates_inlet) + { + for(const auto& attr : geo_input->attributes) + { + if(attr.access == "read_only" || attr.access == "read_write") + { + geo_creates_inlet = true; + break; + } + } + } + if(geo_creates_inlet) + input_port_index++; // Skip $USER ports for this geometry input if(geo_input->vertex_count.find("$USER") != std::string::npos) input_port_index++; if(geo_input->instance_count.find("$USER") != std::string::npos) input_port_index++; for(const auto& aux : geo_input->auxiliary) if(aux.size.find("$USER") != std::string::npos) input_port_index++; + if(geo_input->indirect && geo_input->indirect->count.find("$USER") != std::string::npos) input_port_index++; } else { input_port_index++; } } +} + +void RenderedCSFNode::initComputeSRBAndPasses( + RenderList& renderer, QRhiResourceUpdateBatch& res) +{ + QRhi& rhi = *renderer.state.rhi; + + if(!m_computePipeline) + { + createComputePipeline(renderer); + } + + if(!m_computePipeline) + return; + + // Ensure storage buffers are created before setting up bindings + updateStorageBuffers(renderer, res); + + // Eagerly populate geometry bindings so we can detect buffer aliasing across + // attribute/auxiliary SSBOs (caused by feedback edges sharing the same + // physical buffer with conflicting access modes) BEFORE we emit any binding. + updateGeometryBindings(renderer, res); + + // Single source of truth for the bindings list (also used by + // recreateShaderResourceBindings — see buildComputeSrbBindings). + QList bindings; + buildComputeSrbBindings(renderer, res, bindings); // Set the SRB on the pipeline and create it { - QRhiShaderResourceBindings* passSRB{}; // Create one ComputePass entry for each CSF pass, each with their own pipeline, ProcessUBO and SRB for(std::size_t passIdx = 0; passIdx < n.m_descriptor.csf_passes.size(); passIdx++) { // Create a separate ProcessUBO for this pass QRhiBuffer* passProcessUBO = rhi.newBuffer( QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, sizeof(ProcessUBO)); - qWarning() << "CSF ALLOC [passProcessUBO] pass=" << passIdx << "size=" << sizeof(ProcessUBO); passProcessUBO->setName(QStringLiteral("RenderedCSFNode::pass%1::processUBO") .arg(passIdx) .toLocal8Bit()); @@ -2612,8 +3492,7 @@ void RenderedCSFNode::initComputePass( } // Create separate SRB for this pass with the specific ProcessUBO - passSRB = rhi.newShaderResourceBindings(); - qWarning() << "CSF ALLOC [passSRB] pass=" << passIdx; + QRhiShaderResourceBindings* passSRB = rhi.newShaderResourceBindings(); passSRB->setName(QString("passSRB.%1").arg(passIdx).toUtf8()); // Replace the ProcessUBO binding (binding 1) with this pass's ProcessUBO @@ -2629,7 +3508,6 @@ void RenderedCSFNode::initComputePass( qWarning() << "Failed to create SRB for CSF pass" << passIdx; delete passSRB; delete passProcessUBO; - passSRB = nullptr; continue; } @@ -2648,510 +3526,617 @@ void RenderedCSFNode::initComputePass( } m_computePasses.emplace_back( - &edge, ComputePass{passPipeline, passSRB, passProcessUBO}); - } - - if(rt.renderTarget) - { - // Create the graphics pass for rendering this output to the render target - createGraphicsPass(rt, renderer, edge, res); + nullptr, ComputePass{passPipeline, passSRB, passProcessUBO}); } } } -void RenderedCSFNode::createGraphicsPass( - const TextureRenderTarget& rt, RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) +void RenderedCSFNode::initState(RenderList& renderer, QRhiResourceUpdateBatch& res) { - // Create a graphics pass to render our compute output texture to the render target - static const constexpr auto vertex_shader = R"_(#version 450 -layout(location = 0) in vec2 position; -layout(location = 1) in vec2 texcoord; - -layout(location = 0) out vec2 v_texcoord; + QRhi& rhi = *renderer.state.rhi; -layout(std140, binding = 0) uniform renderer_t { - mat4 clipSpaceCorrMatrix; - vec2 renderSize; -} renderer; + // Reset the "first frame" gate so that generateMips() in update() waits + // for the upstream pass to actually write the input textures before being + // called -- see the matching comment in update(). + m_inputsHaveBeenWritten = false; -out gl_PerVertex { vec4 gl_Position; }; + // Check for compute support + if(!rhi.isFeatureSupported(QRhi::Compute)) + { + qWarning() << "Compute shaders not supported on this backend"; + return; + } -void main() -{ - v_texcoord = texcoord; - gl_Position = renderer.clipSpaceCorrMatrix * vec4(position.xy, 0.0, 1.); -#if defined(QSHADER_SPIRV) || defined(QSHADER_HLSL) || defined(QSHADER_MSL) - gl_Position.y = - gl_Position.y; -#endif -} -)_"; + // ProcessUBO will be created per-pass in initComputeSRBAndPasses - static const constexpr auto fragment_shader_rgba = R"_(#version 450 -layout(std140, binding = 0) uniform renderer_t { - mat4 clipSpaceCorrMatrix; - vec2 renderSize; -} renderer; + // Initialize GPU buffer scatter for format conversion + m_gpuScatterAvailable = m_gpuScatter.init(renderer.state); -layout(binding = 3) uniform sampler2D outputTexture; + // Create the material UBO + m_materialSize = n.m_materialSize; + if(m_materialSize > 0) + { + m_materialUBO = rhi.newBuffer( + QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, m_materialSize); + m_materialUBO->setName("RenderedCSFNode::init::m_materialUBO"); + if(!m_materialUBO->create()) + { + qWarning() << "Failed to create uniform buffer"; + delete m_materialUBO; + m_materialUBO = nullptr; + } + else if(n.m_material_data) + { + res.updateDynamicBuffer(m_materialUBO, 0, m_materialSize, n.m_material_data.get()); + } + } -layout(location = 0) in vec2 v_texcoord; -layout(location = 0) out vec4 fragColor; + // Initialize input samplers + SCORE_ASSERT(m_computePasses.empty()); + SCORE_ASSERT(m_inputSamplers.empty()); -void main() { fragColor = texture(outputTexture, v_texcoord); } -)_"; - static const constexpr auto fragment_shader_r = R"_(#version 450 -layout(std140, binding = 0) uniform renderer_t { - mat4 clipSpaceCorrMatrix; - vec2 renderSize; -} renderer; + // Create samplers for input textures + m_inputSamplers = initInputSamplers(this->n, renderer, n.input, &n.descriptor()); -layout(binding = 3) uniform sampler2D outputTexture; + // Parse descriptor to create storage buffers and determine output texture requirements. + // We also track the input port index to build the geometry-binding-to-port mapping. + // The input port index mirrors the order in which ISFNode's visitor calls + // self.input.push_back() for each descriptor input. + int sb_index = 0; + int outlet_index = 0; + int input_port_index = 0; // tracks which input port we're at + auto& outlets = n.output; + for(const auto& input : n.m_descriptor.inputs) + { + // Handle storage buffers + if(auto* storage = ossia::get_if(&input.data)) + { + // Create storage buffer entry - actual buffer will be created/sized in updateStorageBuffers + StorageBuffer sb; + sb.buffer = nullptr; // Will be created in updateStorageBuffers + sb.size = 0; + sb.lastKnownSize = 0; // Force initial creation + sb.name = QString::fromStdString(input.name); + sb.buffer_usage = storage->buffer_usage; + sb.access = QString::fromStdString(storage->access); + sb.layout = storage->layout; // Store layout for size calculation + m_storageBuffers.push_back(sb); -layout(location = 0) in vec2 v_texcoord; -layout(location = 0) out vec4 fragColor; + if(sb.access.contains("write")) { + m_outStorageBuffers.push_back({outlets[outlet_index], sb_index}); + outlet_index++; + } + // read_only storage creates an input port + if(storage->access == "read_only") + input_port_index++; + sb_index++; + } + // Handle CSF images + else if(auto* image = ossia::get_if(&input.data)) + { + QRhiTexture::Format format = getTextureFormat(QString::fromStdString(image->format)); + StorageImage si; + si.name = QString::fromStdString(input.name); + si.access = QString::fromStdString(image->access); + si.format = format; + si.is3D = image->is3D(); + si.isCube = image->isCube(); + si.persistent = image->persistent; + si.pending_initial_copy = image->persistent; + // generateMips is only meaningful on plain 2D images — QRhi doesn't + // define a mip chain for 3D, cubemaps would need per-face generation + // that QRhi::generateMips doesn't promise across backends, and 2D + // arrays similarly have per-layer semantics that aren't guaranteed. + // Silently disable the flag outside of plain 2D so downstream samplers + // don't hit a no-op they might have expected to work. + si.generate_mips = image->generate_mips && !image->is3D() + && !image->isCube() && !image->is_array; + m_storageImages.push_back(si); -void main() { fragColor = vec4(texture(outputTexture, v_texcoord).rrr, 1.0); } -)_"; + if(m_storageImages.back().access.contains("write")) { + int img_index = (int)m_storageImages.size() - 1; + m_outStorageImages.push_back({outlets[outlet_index], img_index}); + outlet_index++; + } + // read_only CSF image creates an input port + if(image->access == "read_only") + input_port_index++; + } + // Handle geometry inputs + else if(auto* geo = ossia::get_if(&input.data)) + { + // Determine if this geometry_input creates an input port + // (mirrors ISFNode visitor logic: input port if any attribute is read_only or read_write) + bool needs_input = geo->attributes.empty(); // empty = pass-through, always has input + if(!needs_input) + { + for(const auto& attr : geo->attributes) + if(attr.access == "read_only" || attr.access == "read_write") + { needs_input = true; break; } + } - // Get the mesh for rendering a fullscreen quad - const auto& mesh = renderer.defaultTriangle(); + GeometryBinding binding; + binding.input_name = input.name; + binding.input_port_index = needs_input ? input_port_index : -1; + binding.has_output = geo->attributes.empty(); // Empty attributes = pure pass-through with output + binding.has_vertex_count_spec = !geo->vertex_count.empty(); + binding.has_instance_count_spec = !geo->instance_count.empty(); - // Find the texture for the specific output port this edge is connected to - QRhiTexture* textureToRender = textureForOutput(*edge.source); - // If we still don't have a texture, we can't create the graphics pass - if(!textureToRender) - { - qWarning() << "No output texture available for graphics pass"; - return; - } + for(const auto& attr : geo->attributes) + { + GeometryBinding::AttributeSSBO ssbo; + ssbo.name = attr.name; + ssbo.access = attr.access; + ssbo.per_instance = (attr.rate == "instance"); + binding.attribute_ssbos.push_back(std::move(ssbo)); - auto fmt = textureToRender->format(); - const char* fragment_shader{}; - switch(fmt) - { - case QRhiTexture::Format::R8: - case QRhiTexture::Format::RED_OR_ALPHA8: -#if QT_VERSION >= QT_VERSION_CHECK(6, 9, 0) - case QRhiTexture::Format::R8UI: - case QRhiTexture::Format::R32UI: -#endif - case QRhiTexture::Format::R16: - case QRhiTexture::Format::R16F: - case QRhiTexture::Format::R32F: - case QRhiTexture::Format::D16: -#if QT_VERSION >= QT_VERSION_CHECK(6, 4, 0) - case QRhiTexture::Format::D24: - case QRhiTexture::Format::D24S8: -#endif - case QRhiTexture::Format::D32F: -#if QT_VERSION >= QT_VERSION_CHECK(6, 9, 0) - case QRhiTexture::Format::D32FS8: -#endif - fragment_shader = fragment_shader_r; - break; - default: - fragment_shader = fragment_shader_rgba; - break; - } + if(attr.access != "read_only" && attr.access != "none") + binding.has_output = true; + } - // Compile shaders - auto [vertexS, fragmentS] = score::gfx::makeShaders(renderer.state, vertex_shader, fragment_shader); + // If vertex_count is specified, resolve and pre-allocate attribute SSBOs + if(binding.has_vertex_count_spec) + { + int count = resolveCountExpression(geo->vertex_count, *geo, "vertex_count"); + if(count > 0) + binding.vertex_count = count; + } - // Create a sampler for our output texture - QRhiSampler* outputSampler = renderer.state.rhi->newSampler( - QRhiSampler::Linear, QRhiSampler::Linear, QRhiSampler::None, - QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge); - outputSampler->setName("RenderedCSFNode::OutputSampler"); - outputSampler->create(); - - // Initialize mesh buffers - MeshBuffers meshBuffers = renderer.initMeshBuffer(mesh, res); - - // Build the pipeline to render our compute result - auto pip = score::gfx::buildPipeline( - renderer, mesh, vertexS, fragmentS, rt, nullptr, nullptr, - std::array{Sampler{outputSampler, textureToRender}}); - - if(pip.pipeline) - { - m_graphicsPasses.emplace_back(&edge, GraphicsPass{pip, outputSampler, meshBuffers}); - } - else - { - delete outputSampler; - } -} + // Resolve instance_count if specified + if(binding.has_instance_count_spec) + { + int ic = resolveCountExpression(geo->instance_count, *geo, "instance_count"); + if(ic > 0) + binding.instance_count = ic; + } -QString RenderedCSFNode::updateShaderWithImageFormats(QString current) -{ - int sampler_index = 0; - for(const auto& input : n.m_descriptor.inputs) - { - if(auto tex_input = ossia::get_if(&input.data)) - { - sampler_index++; - } - if(auto image = ossia::get_if(&input.data)) - { - if(image->access == "read_only") + // Pre-allocate attribute SSBOs using the correct count based on rate { - SCORE_ASSERT(sampler_index < m_inputSamplers.size()); - auto tex_n = m_inputSamplers[sampler_index].texture; - if(!tex_n) - return current; + for(int attr_idx = 0; attr_idx < (int)geo->attributes.size(); attr_idx++) + { + if(attr_idx >= (int)binding.attribute_ssbos.size()) + break; + auto& ssbo = binding.attribute_ssbos[attr_idx]; + if(ssbo.access == "none") + continue; + const int count = ssbo.per_instance ? binding.instance_count : binding.vertex_count; + if(count <= 0) + continue; + const int64_t elem_stride = std430ArrayStride(geo->attributes[attr_idx].type, n.m_descriptor); + const int64_t needed = elem_stride * count; + auto* buf = rhi.newBuffer( + QRhiBuffer::Static, + QRhiBuffer::StorageBuffer | QRhiBuffer::VertexBuffer, needed); + buf->setName(QByteArray("CSF_GeomSpec_") + ssbo.name.c_str()); + buf->create(); + QByteArray zero(needed, 0); + res.uploadStaticBuffer(buf, 0, needed, zero.constData()); + ssbo.buffer = buf; + ssbo.size = needed; + ssbo.owned = true; + } + } - const auto fmt = tex_n->format(); - const auto layout_fmt = rhiTextureFormatToShaderLayoutFormatString(fmt); + for(const auto& aux : geo->auxiliary) + { + // COPY_FROM auxiliaries are forwarded in pushOutputGeometry, no SSBO needed + if(aux.forward) + continue; - const auto before = QStringLiteral(", rgba8) readonly uniform image2D %1;").arg(input.name.c_str()); - const auto after = QStringLiteral(", %1) readonly uniform image2D %2;").arg(layout_fmt).arg(input.name.c_str()); + GeometryBinding::AuxiliarySSBO ssbo; + ssbo.name = aux.name; + ssbo.access = aux.access; + ssbo.is_uniform = aux.is_uniform; + ssbo.layout = aux.layout; + ssbo.size_expr = aux.size; - current.replace(before, after); - sampler_index++; + // Create the buffer immediately so it's available for the first dispatch. + // Usage flag matches the aux kind — UBO path uses UniformBuffer, + // SSBO path uses StorageBuffer. Using the wrong usage flag is a + // Vulkan validation error at bind time. + int arrayCount = 0; + if(!aux.size.empty()) + arrayCount = resolveCountExpression(aux.size, *geo, aux.name); + + const int64_t requiredSize = score::gfx::calculateStorageBufferSize( + aux.layout, arrayCount, this->n.descriptor()); + if(requiredSize > 0) + { + const auto usage = aux.is_uniform ? QRhiBuffer::UniformBuffer + : QRhiBuffer::StorageBuffer; + auto* buf = rhi.newBuffer(QRhiBuffer::Static, usage, requiredSize); + buf->setName(QByteArray("CSF_GeoAux_") + aux.name.c_str()); + buf->create(); + QByteArray zero(requiredSize, 0); + res.uploadStaticBuffer(buf, 0, requiredSize, zero.constData()); + ssbo.buffer = buf; + ssbo.size = requiredSize; + ssbo.owned = true; + } + + binding.auxiliary_ssbos.push_back(std::move(ssbo)); + + // UBOs are inherently read-only from GLSL, so they never flag + // has_output. For SSBOs, any non-read_only access opts in. + if(!aux.is_uniform && aux.access != "read_only") + binding.has_output = true; } - } - } - return current; -} + // Auxiliary textures: one entry per geometry_input AUXILIARY + // texture declaration. Sampler allocated now (or skipped for + // storage-image entries); placeholder texture picked from the + // RenderList empties so the SRB is always valid even before an + // upstream resolution happens. Per-frame resolution against + // ossia::geometry::auxiliary_textures happens in + // updateGeometryBindings. + // + // For write_only / read_write storage-image entries this binding + // ALSO allocates the actual texture itself (analog of the + // m_storageImages allocation that top-level csf_image_input + // entries get). Without this auto-alloc the binding stays glued + // to the RGBA8-typed sample-only emptyTexture3D placeholder and + // any imageStore / imageAtomicOr against an integer-formatted + // shader (uimage3D r32ui) trips Vulkan validation 00339 (no + // STORAGE_BIT) + 07753 (UINT vs UNORM) + 02691 (no atomic + // format feature). + for(const auto& atx : geo->auxiliary_textures) + { + RenderedCSFNode::GeometryBinding::AuxiliaryTexture at; + at.name = atx.name; + at.is_storage = atx.is_storage; + at.access = atx.access; -void RenderedCSFNode::createComputePipeline(RenderList& renderer) -{ - QRhi& rhi = *renderer.state.rhi; - - if(!rhi.isFeatureSupported(QRhi::Compute)) - { - qWarning() << "Compute shaders not supported on this backend"; - return; - } - - try - { - // Prepare the shader template with image format substitution. - // LOCAL_SIZE placeholders will be substituted per-pass below. - m_computeShaderSource = updateShaderWithImageFormats(n.m_computeS); + if(!atx.is_storage) + { + at.sampler = score::gfx::makeSampler(rhi, atx.sampler); + at.sampler->setName( + QByteArray("CSF_AuxTex_sampler::") + atx.name.c_str()); + } - // Compile one pipeline per unique LOCAL_SIZE, reuse when passes share the same size. - m_perPassPipelines.clear(); - std::map, QRhiComputePipeline*> pipelineCache; + if(atx.is_cubemap) + at.placeholder = &renderer.emptyTextureCube(); + else if(atx.dimensions == 3) + at.placeholder = &renderer.emptyTexture3D(); + else if(atx.is_array) + at.placeholder = &renderer.emptyTextureArray(); + else + at.placeholder = &renderer.emptyTexture(); + at.texture = at.placeholder; - for(std::size_t passIdx = 0; passIdx < n.m_descriptor.csf_passes.size(); passIdx++) - { - const auto& passDesc = n.m_descriptor.csf_passes[passIdx]; - const auto key = passDesc.local_size; + // Auto-allocate writable storage image. Resolves the size + // expressions (WIDTH/HEIGHT/DEPTH/LAYERS) the same way + // computeTextureSize does for top-level csf_image_input entries. + if(atx.is_storage && atx.access != "read_only") + { + QRhiTexture::Format format = getTextureFormat( + QString::fromStdString(atx.format)); + + int w = !atx.width_expression.empty() + ? std::max(1, resolveDispatchExpression(atx.width_expression)) + : renderer.state.renderSize.width(); + int h = !atx.height_expression.empty() + ? std::max(1, resolveDispatchExpression(atx.height_expression)) + : renderer.state.renderSize.height(); + + QRhiTexture* alloc = nullptr; + if(atx.is_cubemap) + { + const int edge = std::max(w, h); + alloc = rhi.newTexture( + format, QSize(edge, edge), 1, + QRhiTexture::CubeMap | QRhiTexture::UsedWithLoadStore); + } + else if(atx.dimensions == 3) + { + int d = !atx.depth_expression.empty() + ? std::max(1, resolveDispatchExpression(atx.depth_expression)) + : h; // square cube fallback + alloc = rhi.newTexture( + format, w, h, d, 1, + QRhiTexture::ThreeDimensional | QRhiTexture::UsedWithLoadStore); + } + else if(atx.is_array) + { + int layers = !atx.layers_expression.empty() + ? std::max(1, resolveDispatchExpression(atx.layers_expression)) + : 1; + alloc = rhi.newTextureArray( + format, layers, QSize(w, h), 1, + QRhiTexture::UsedWithLoadStore); + } + else + { + alloc = rhi.newTexture( + format, QSize(w, h), 1, + QRhiTexture::UsedWithLoadStore); + } - auto it = pipelineCache.find(key); - if(it != pipelineCache.end()) - { - // Reuse existing pipeline - m_perPassPipelines.push_back(it->second); + if(alloc) + { + alloc->setName( + ("CSF::auxStorageImage::" + atx.name).c_str()); + if(alloc->create()) + { + at.texture = alloc; + at.owned = true; + } + else + { + delete alloc; + } + } + } + + binding.auxiliary_textures.push_back(std::move(at)); } - else + + if(geo->indirect) { - // Compile new pipeline for this local_size - QString src = m_computeShaderSource; - src.replace("ISF_LOCAL_SIZE_X", QString::number(key[0])); - src.replace("ISF_LOCAL_SIZE_Y", QString::number(key[1])); - src.replace("ISF_LOCAL_SIZE_Z", QString::number(key[2])); + binding.uses_indirect_draw = true; + binding.indirectCountExpr = geo->indirect->count; - QShader compiled = score::gfx::makeCompute(renderer.state, src); + int count = resolveCountExpression(geo->indirect->count, *geo, "__indirect_count__"); + if(count <= 0) count = 1; + binding.indirectCountResult = count; - auto* pipeline = rhi.newComputePipeline(); - pipeline->setShaderStage(QRhiShaderStage(QRhiShaderStage::Compute, compiled)); + const int64_t indirectSize = (int64_t)count * 5 * sizeof(uint32_t); - pipelineCache[key] = pipeline; - m_perPassPipelines.push_back(pipeline); + QRhiBuffer::UsageFlags usageFlags = QRhiBuffer::StorageBuffer; + usageFlags = usageFlags | QRhiBuffer::StorageBuffer; +#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0) + usageFlags = usageFlags | QRhiBuffer::IndirectBuffer; +#endif + + auto* buf = rhi.newBuffer(QRhiBuffer::Static, usageFlags, indirectSize); + buf->setName(QByteArray("CSF_Indirect_") + input.name.c_str()); + buf->create(); + + QByteArray zero(indirectSize, 0); + res.uploadStaticBuffer(buf, 0, indirectSize, zero.constData()); + + binding.indirectBuffer = buf; + binding.indirectBufferSize = indirectSize; } - } - // Store unique pipelines for cleanup - m_ownedPipelines.clear(); - for(auto& [k, v] : pipelineCache) - m_ownedPipelines.push_back(v); + const bool geo_has_output = binding.has_output; + m_geometryBindings.push_back(std::move(binding)); - // For backward compat - m_computePipeline = m_perPassPipelines.empty() ? nullptr : m_perPassPipelines[0]; - if(!m_perPassPipelines.empty()) - m_computeShader = m_perPassPipelines[0]->shaderStage().shader(); - } - catch(const std::exception& e) - { - qWarning() << "Failed to create compute shader:" << e.what(); - m_computePipeline = nullptr; + if(needs_input) + input_port_index++; + if(geo_has_output) + outlet_index++; + + // $USER ports also create input ports (IntSpinBox), track them + if(geo->vertex_count.find("$USER") != std::string::npos) + input_port_index++; + if(geo->instance_count.find("$USER") != std::string::npos) + input_port_index++; + for(const auto& aux : geo->auxiliary) + if(aux.size.find("$USER") != std::string::npos) + input_port_index++; + if(geo->indirect && geo->indirect->count.find("$USER") != std::string::npos) + input_port_index++; + } + else + { + // All other input types (float, long, bool, event, color, point2D, point3D, + // image, audio, audioFFT, audioHist, cubemap, texture) create one input port each. + input_port_index++; + } } + + m_outputTexture = nullptr; + + // Create the compute passes (edge-independent: SRB, pipelines, processUBOs) + initComputeSRBAndPasses(renderer, res); + + m_initialized = true; } -void RenderedCSFNode::init(RenderList& renderer, QRhiResourceUpdateBatch& res) +void RenderedCSFNode::addOutputPass( + RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) { - QRhi& rhi = *renderer.state.rhi; + if(!m_initialized) + return; - // Reset the "first frame" gate so that generateMips() in update() waits - // for the upstream pass to actually write the input textures before being - // called — see the matching comment in update(). - m_inputsHaveBeenWritten = false; + const auto& rt = renderer.renderTargetForOutput(edge); + if(rt.renderTarget) + { + createGraphicsPass(rt, renderer, edge, res); + } +} - // Check for compute support - if(!rhi.isFeatureSupported(QRhi::Compute)) +void RenderedCSFNode::removeOutputPass(RenderList& renderer, Edge& edge) +{ + auto it = ossia::find_if( + m_graphicsPasses, [&](const auto& p) { return p.first == &edge; }); + if(it != m_graphicsPasses.end()) { - qWarning() << "Compute shaders not supported on this backend"; - return; + it->second.pipeline.release(); + delete it->second.outputSampler; + m_graphicsPasses.erase(it); } - - // ProcessUBO will be created per-pass in initComputePass +} - // Initialize GPU buffer scatter for format conversion - m_gpuScatterAvailable = m_gpuScatter.init(renderer.state); +bool RenderedCSFNode::hasOutputPassForEdge(Edge& edge) const +{ + return ossia::find_if( + m_graphicsPasses, [&](const auto& p) { return p.first == &edge; }) + != m_graphicsPasses.end(); +} - // Create the material UBO - m_materialSize = n.m_materialSize; - if(m_materialSize > 0) +void RenderedCSFNode::releaseState(RenderList& r) +{ + if(!m_initialized) + return; + + // Clean up remaining graphics passes + for(auto& [edge, pass] : m_graphicsPasses) { - m_materialUBO = rhi.newBuffer( - QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, m_materialSize); - qWarning() << "CSF ALLOC [materialUBO] size=" << m_materialSize; - m_materialUBO->setName("RenderedCSFNode::init::m_materialUBO"); - if(!m_materialUBO->create()) + pass.pipeline.release(); + delete pass.outputSampler; + } + m_graphicsPasses.clear(); + + // Clean up compute passes + for(auto& [edge, pass] : m_computePasses) + { + delete pass.srb; + if(pass.processUBO) { - qWarning() << "Failed to create uniform buffer"; - delete m_materialUBO; - m_materialUBO = nullptr; + pass.processUBO->deleteLater(); } } + m_computePasses.clear(); - // Initialize input samplers - SCORE_ASSERT(m_computePasses.empty()); - SCORE_ASSERT(m_inputSamplers.empty()); + // Clean up pipelines (m_ownedPipelines has unique entries, m_perPassPipelines may have duplicates) + for(auto* pip : m_ownedPipelines) + delete pip; + m_ownedPipelines.clear(); + m_perPassPipelines.clear(); + m_computePipeline = nullptr; - // Create samplers for input textures - m_inputSamplers = initInputSamplers(this->n, renderer, n.input); + // Clean up storage buffers + for(auto& storageBuffer : m_storageBuffers) + { + if(storageBuffer.owned) + r.releaseBuffer(storageBuffer.buffer); + } + m_storageBuffers.clear(); - // Parse descriptor to create storage buffers and determine output texture requirements. - // We also track the input port index to build the geometry-binding-to-port mapping. - // The input port index mirrors the order in which ISFNode's visitor calls - // self.input.push_back() for each descriptor input. - int sb_index = 0; - int outlet_index = 0; - int input_port_index = 0; // tracks which input port we're at - auto& outlets = n.output; - for(const auto& input : n.m_descriptor.inputs) + // Clean up GPU scatter + m_gpuScatter.release(); + m_gpuScatterAvailable = false; + + // Clean up geometry bindings + for(auto& binding : m_geometryBindings) { - // Handle storage buffers - if(auto* storage = ossia::get_if(&input.data)) + for(auto& ssbo : binding.attribute_ssbos) { - // Create storage buffer entry - actual buffer will be created/sized in updateStorageBuffers - StorageBuffer sb; - sb.buffer = nullptr; // Will be created in updateStorageBuffers - sb.size = 0; - sb.lastKnownSize = 0; // Force initial creation - sb.name = QString::fromStdString(input.name); - sb.buffer_usage = storage->buffer_usage; - sb.access = QString::fromStdString(storage->access); - sb.layout = storage->layout; // Store layout for size calculation - m_storageBuffers.push_back(sb); - - if(sb.access.contains("write")) { - m_outStorageBuffers.push_back({outlets[outlet_index], sb_index}); - outlet_index++; + if(ssbo.read_buffer) + { + r.releaseBuffer(ssbo.read_buffer); + ssbo.read_buffer = nullptr; } - // read_only storage creates an input port - if(storage->access == "read_only") - input_port_index++; - sb_index++; - } - // Handle CSF images - else if(auto* image = ossia::get_if(&input.data)) - { - QRhiTexture::Format format = getTextureFormat(QString::fromStdString(image->format)); - m_storageImages.push_back( - StorageImage{ - nullptr, QString::fromStdString(input.name), - QString::fromStdString(image->access), format}); - - if(m_storageImages.back().access.contains("write")) { - int img_index = (int)m_storageImages.size() - 1; - m_outStorageImages.push_back({outlets[outlet_index], img_index}); - outlet_index++; + if(ssbo.owned && ssbo.buffer) + { + r.releaseBuffer(ssbo.buffer); } - // read_only CSF image creates an input port - if(image->access == "read_only") - input_port_index++; + ssbo.buffer = nullptr; + delete ssbo.scatterStaging; + ssbo.scatterStaging = nullptr; + delete ssbo.scatterOp.srb; + ssbo.scatterOp.srb = nullptr; + delete ssbo.scatterOp.paramsUBO; + ssbo.scatterOp.paramsUBO = nullptr; } - // Handle geometry inputs - else if(auto* geo = ossia::get_if(&input.data)) + for(auto& aux : binding.auxiliary_ssbos) { - // Determine if this geometry_input creates an input port - // (mirrors ISFNode visitor logic: input port if any attribute is read_only or read_write) - bool needs_input = geo->attributes.empty(); // empty = pass-through, always has input - if(!needs_input) + if(aux.owned && aux.buffer) { - for(const auto& attr : geo->attributes) - if(attr.access == "read_only" || attr.access == "read_write") - { needs_input = true; break; } + r.releaseBuffer(aux.buffer); } + aux.buffer = nullptr; + } + for(auto& at : binding.auxiliary_textures) + { + if(at.sampler) + at.sampler->deleteLater(); + at.sampler = nullptr; + // For owned textures (auto-allocated writable storage images), + // we created the QRhiTexture and must release it here. Sampled + // entries point to either a RenderList-owned placeholder or an + // upstream-geometry-owned handle — those we don't free. + if(at.owned && at.texture) + at.texture->deleteLater(); + at.texture = nullptr; + at.owned = false; + } + binding.auxiliary_textures.clear(); + for(auto* buf : binding.copyFromBuffers) + r.releaseBuffer(buf); + binding.copyFromBuffers.clear(); + if(binding.indirectBuffer) + { + r.releaseBuffer(binding.indirectBuffer); + binding.indirectBuffer = nullptr; + } + } + m_geometryBindings.clear(); - GeometryBinding binding; - binding.input_port_index = needs_input ? input_port_index : -1; - binding.has_output = geo->attributes.empty(); // Empty attributes = pure pass-through with output - binding.has_vertex_count_spec = !geo->vertex_count.empty(); - binding.has_instance_count_spec = !geo->instance_count.empty(); + // Clean up storage images (including persistent ping-pong pair) + for(auto& storageImage : m_storageImages) + { + if(storageImage.texture) + storageImage.texture->deleteLater(); + if(storageImage.read_texture) + storageImage.read_texture->deleteLater(); + } + m_storageImages.clear(); - for(const auto& attr : geo->attributes) - { - GeometryBinding::AttributeSSBO ssbo; - ssbo.name = attr.name; - ssbo.access = attr.access; - ssbo.per_instance = (attr.rate == "instance"); - binding.attribute_ssbos.push_back(std::move(ssbo)); + m_outStorageImages.clear(); + m_outStorageBuffers.clear(); + m_outputTexture = nullptr; - if(attr.access != "read_only" && attr.access != "none") - binding.has_output = true; - } + // Clean up buffers and textures + delete m_materialUBO; + m_materialUBO = nullptr; - // If vertex_count is specified, resolve and pre-allocate attribute SSBOs - if(binding.has_vertex_count_spec) - { - int count = resolveCountExpression(geo->vertex_count, *geo, "vertex_count"); - if(count > 0) - binding.vertex_count = count; - } + // Clean up samplers + for(auto sampler : m_inputSamplers) + { + delete sampler.sampler; + // texture is deleted elsewhere + } + m_inputSamplers.clear(); - // Resolve instance_count if specified - if(binding.has_instance_count_spec) - { - int ic = resolveCountExpression(geo->instance_count, *geo, "instance_count"); - if(ic > 0) - binding.instance_count = ic; - } + m_initialized = false; +} - // Pre-allocate attribute SSBOs using the correct count based on rate +void RenderedCSFNode::addInputEdge( + RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) +{ + if(edge.sink->type == Types::Image) + { + // Find upstream texture + if(auto it = edge.source->node->renderedNodes.find(&renderer); + it != edge.source->node->renderedNodes.end()) + { + if(auto* tex = it->second->textureForOutput(*edge.source)) { - for(int attr_idx = 0; attr_idx < (int)geo->attributes.size(); attr_idx++) - { - if(attr_idx >= (int)binding.attribute_ssbos.size()) - break; - auto& ssbo = binding.attribute_ssbos[attr_idx]; - if(ssbo.access == "none") - continue; - const int count = ssbo.per_instance ? binding.instance_count : binding.vertex_count; - if(count <= 0) - continue; - const int elem_size = glslTypeSizeBytes(geo->attributes[attr_idx].type); - const int64_t needed = (int64_t)elem_size * count; - auto* buf = rhi.newBuffer( - QRhiBuffer::Static, - QRhiBuffer::StorageBuffer | QRhiBuffer::VertexBuffer, needed); - qWarning() << "CSF ALLOC [geomSpecInit]" << ssbo.name.c_str() << "size=" << needed; - buf->setName(QByteArray("CSF_GeomSpec_") + ssbo.name.c_str()); - buf->create(); - QByteArray zero(needed, 0); - res.uploadStaticBuffer(buf, 0, needed, zero.constData()); - ssbo.buffer = buf; - ssbo.size = needed; - ssbo.owned = true; - } + auto rt = renderer.renderTargetForInputPort(*edge.sink); + updateInputTexture(*edge.sink, tex, rt.depthTexture); } + } + } + // Geometry input edges will be picked up by updateGeometryBindings in update() +} - for(const auto& aux : geo->auxiliary) - { - // COPY_FROM auxiliaries are forwarded in pushOutputGeometry, no SSBO needed - if(aux.forward) - continue; - - GeometryBinding::AuxiliarySSBO ssbo; - ssbo.name = aux.name; - ssbo.access = aux.access; - ssbo.layout = aux.layout; - ssbo.size_expr = aux.size; - - // Create the buffer immediately so it's available for the first dispatch - int arrayCount = 0; - if(!aux.size.empty()) - arrayCount = resolveCountExpression(aux.size, *geo, aux.name); - - const int64_t requiredSize = score::gfx::calculateStorageBufferSize( - aux.layout, arrayCount, this->n.descriptor()); - if(requiredSize > 0) - { - auto* buf = rhi.newBuffer( - QRhiBuffer::Static, - QRhiBuffer::StorageBuffer, requiredSize); - qWarning() << "CSF ALLOC [geoAuxInit]" << aux.name.c_str() << "size=" << requiredSize; - buf->setName(QByteArray("CSF_GeoAux_") + aux.name.c_str()); - buf->create(); - QByteArray zero(requiredSize, 0); - res.uploadStaticBuffer(buf, 0, requiredSize, zero.constData()); - ssbo.buffer = buf; - ssbo.size = requiredSize; - ssbo.owned = true; - } - - binding.auxiliary_ssbos.push_back(std::move(ssbo)); - - if(aux.access != "read_only") - binding.has_output = true; - } - -#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0) - // Allocate indirect draw buffer if requested - if(geo->indirect_draw && renderer.state.caps.drawIndirect) - { - binding.uses_indirect_draw = true; - binding.indirect_draw_indexed = (geo->indirect_draw_type == "draw_indexed"); - - const int64_t indirectSize = binding.indirect_draw_indexed - ? (int64_t)sizeof(QRhiIndexedIndirectDrawCommand) - : (int64_t)sizeof(QRhiIndirectDrawCommand); - - auto* buf = rhi.newBuffer( - QRhiBuffer::Static, - QRhiBuffer::StorageBuffer | QRhiBuffer::IndirectBuffer, - indirectSize); - qWarning() << "CSF ALLOC [indirectDraw]" << input.name.c_str() << "size=" << indirectSize; - buf->setName(QByteArray("CSF_IndirectDraw_") + input.name.c_str()); - buf->create(); - - // Initialize with zeros (vertexCount=0, instanceCount=0) - QByteArray zero(indirectSize, 0); - res.uploadStaticBuffer(buf, 0, indirectSize, zero.constData()); - - binding.indirectDrawBuffer = buf; - } -#endif - - const bool geo_has_output = binding.has_output; - m_geometryBindings.push_back(std::move(binding)); - - if(needs_input) - input_port_index++; - if(geo_has_output) - outlet_index++; - - // $USER ports also create input ports (IntSpinBox), track them - if(geo->vertex_count.find("$USER") != std::string::npos) - input_port_index++; - if(geo->instance_count.find("$USER") != std::string::npos) - input_port_index++; - for(const auto& aux : geo->auxiliary) - if(aux.size.find("$USER") != std::string::npos) - input_port_index++; - } - else - { - // All other input types (float, long, bool, event, color, point2D, point3D, - // image, audio, audioFFT, audioHist, cubemap, texture) create one input port each. - input_port_index++; - } +void RenderedCSFNode::removeInputEdge(RenderList& renderer, Edge& edge) +{ + if(edge.sink->type == Types::Image) + { + // See SimpleRenderedISFNode::removeInputEdge — same dangling-depth- + // sampler issue applies here when DEPTH: true inputs get disconnected. + const bool hasDepthCompanion + = (edge.sink->flags & Flag::SamplableDepth) == Flag::SamplableDepth; + QRhiTexture* depthFallback + = hasDepthCompanion ? &renderer.emptyTexture() : nullptr; + updateInputTexture(*edge.sink, &renderer.emptyTexture(), depthFallback); } + // Geometry input edges will be picked up by updateGeometryBindings in update() +} - m_outputTexture = nullptr; +void RenderedCSFNode::init(RenderList& renderer, QRhiResourceUpdateBatch& res) +{ + initState(renderer, res); - // Create the compute passes for each output edge (across all output ports) + // Create graphics passes for each output edge for(auto* output_port : n.output) { for(Edge* edge : output_port->edges) { - const auto& rt = renderer.renderTargetForOutput(*edge); - initComputePass(rt, renderer, *edge, res); + addOutputPass(renderer, *edge, res); } } } @@ -3203,9 +4188,13 @@ void RenderedCSFNode::update( if(m_materialUBO && n.m_material_data) { res.updateDynamicBuffer(m_materialUBO, 0, m_materialSize, n.m_material_data.get()); + // CSF uploads the material UBO every frame (no materialChanged gate), + // so resetting event ports here is enough — the zero value will + // propagate to the GPU on the next frame's update(). + (void)n.resetEventPortsAfterFrame(); } - for(auto& [sampler, texture] : this->m_inputSamplers) + for(auto& [sampler, texture, fb_] : this->m_inputSamplers) { // Skip generateMips on textures that have not yet been written to. // Their Vulkan layout is still VK_IMAGE_LAYOUT_PREINITIALIZED, and Qt RHI's @@ -3230,332 +4219,62 @@ void RenderedCSFNode::update( // TODO: Check if texture size inputs have changed and recreate texture if needed } -void RenderedCSFNode::recreateShaderResourceBindings(RenderList& renderer, QRhiResourceUpdateBatch& res) +// Hash the bindings list to detect frame-to-frame drift. Two binding +// lists hash to the same value iff every entry's descriptor identity +// matches — recreateShaderResourceBindings then skips the +// destroy+setBindings+create dance when the per-pass binding list +// hasn't actually changed since the previous frame (steady state for +// a static scene; every frame would otherwise thrash the SRB pool slot). +// Use Qt's own qHash(QRhiShaderResourceBinding) so the equivalence +// matches QRhi's internal canonical representation — no need to pack +// the private Data union by hand and risk drift on a Qt minor update. +// Per-binding hashes are seeded by the binding's index so two +// otherwise-equal bindings at different slots hash differently; +// combined via ossia::hash_bytes over the per-binding hash vector. +namespace { - QRhi& rhi = *renderer.state.rhi; - - // Pre-pass: collect physical buffers used with conflicting access modes - // (read on one binding, write on another) so we can promote them to - // bufferLoadStore. The Qt RHI / Vulkan validation layer rejects bindings - // that reference the same buffer with different access flags within a pass. - // (geometry bindings are assumed up-to-date here — recreateShaderResourceBindings - // is called after the geometry update path) - std::unordered_set aliased_buffers; - { - std::unordered_map access_flags; // 1=read, 2=write, 3=both - int gb_idx = 0; - for(const auto& inp : n.m_descriptor.inputs) - { - auto* g = ossia::get_if(&inp.data); - if(!g) - continue; - if(gb_idx >= (int)m_geometryBindings.size()) - break; - const auto& gb = m_geometryBindings[gb_idx++]; - - for(int ai = 0; ai < (int)g->attributes.size() && ai < (int)gb.attribute_ssbos.size(); ai++) - { - const auto& req = g->attributes[ai]; - const auto& ssbo = gb.attribute_ssbos[ai]; - if(req.access == "none" || !ssbo.buffer) - continue; - int f = (req.access == "read_only") ? 1 : (req.access == "write_only") ? 2 : 3; - access_flags[ssbo.buffer] |= f; - if(req.access == "read_write" && ssbo.read_buffer && ssbo.read_buffer != ssbo.buffer) - access_flags[ssbo.read_buffer] |= 1; - } - for(const auto& aux : gb.auxiliary_ssbos) - { - if(!aux.buffer) - continue; - int f = (aux.access == "read_only") ? 1 : (aux.access == "write_only") ? 2 : 3; - access_flags[aux.buffer] |= f; - if(aux.read_buffer && aux.read_buffer != aux.buffer) - access_flags[aux.read_buffer] |= 1; - } - } - for(const auto& [buf, flags] : access_flags) - if(flags == 3) - aliased_buffers.insert(buf); - } - - // Build the bindings list (same as in initComputePass) - QList bindings; - - // Binding 0: Renderer UBO - bindings.append(QRhiShaderResourceBinding::uniformBuffer( - 0, QRhiShaderResourceBinding::ComputeStage, &renderer.outputUBO())); - - // Binding 1: Process UBO (will be set per-pass) - bindings.append( - QRhiShaderResourceBinding::uniformBuffer( - 1, QRhiShaderResourceBinding::ComputeStage, nullptr)); - - // Binding 2: Material UBO (custom inputs) - int bindingIndex = 2; - if(m_materialUBO) - { - bindings.append(QRhiShaderResourceBinding::uniformBuffer( - bindingIndex++, QRhiShaderResourceBinding::ComputeStage, m_materialUBO)); - } - - int input_port_index = 0; - int input_image_index = 0; - int output_port_index = 0; - int output_image_index = 0; - int geo_binding_index = 0; - - // Process all resources in the order they appear in the descriptor - for(const auto& input : n.m_descriptor.inputs) - { - // Storage buffers - if(ossia::get_if(&input.data)) - { - // Find the corresponding storage buffer - auto it = std::find_if(m_storageBuffers.begin(), m_storageBuffers.end(), - [&input](const StorageBuffer& sb) { - return sb.name == QString::fromStdString(input.name); - }); - - if(it != m_storageBuffers.end() && it->buffer) - { - if(it->access == "read_only") - { - QRhiBuffer* buf = it->buffer; // Default dummy buffer - auto port = this->node.input[input_port_index]; - if(!port->edges.empty()) - { - auto input_buf = renderer.bufferForInput(*port->edges.front()); - if(input_buf) - { - buf = input_buf.handle; - } - } - bindings.append( - QRhiShaderResourceBinding::bufferLoad( - bindingIndex++, QRhiShaderResourceBinding::ComputeStage, buf)); - input_port_index++; - } - else if(it->access == "write_only") - { - bindings.append(QRhiShaderResourceBinding::bufferStore( - bindingIndex++, QRhiShaderResourceBinding::ComputeStage, - it->buffer)); - output_port_index++; - } - else // read_write - { - bindings.append(QRhiShaderResourceBinding::bufferLoadStore( - bindingIndex++, QRhiShaderResourceBinding::ComputeStage, - it->buffer)); - output_port_index++; - } - } - else - { - bindingIndex++; // keep indices synchronized with shader layout - } - } - // Regular textures (sampled) - else if(ossia::get_if(&input.data)) - { - // Regular sampled textures from m_inputSamplers - if(input_image_index < m_inputSamplers.size()) - { - auto [sampler, tex] = m_inputSamplers[input_image_index]; - if(sampler && tex) - { - bindings.append( - QRhiShaderResourceBinding::sampledTexture( - bindingIndex++, QRhiShaderResourceBinding::ComputeStage, tex, sampler)); - } - } - input_port_index++; - input_image_index++; - } - // CSF storage images - else if(auto image = ossia::get_if(&input.data)) - { - // Find the corresponding storage image - auto it = std::find_if(m_storageImages.begin(), m_storageImages.end(), - [&input](const StorageImage& si) { - return si.name == QString::fromStdString(input.name); - }); - - if(it != m_storageImages.end()) - { - if(it->access == "read_only") - { - if(input_image_index < m_inputSamplers.size()) - { - auto [sampler, tex] = m_inputSamplers[input_image_index]; - if(sampler && tex) - { - bindings.append( - QRhiShaderResourceBinding::imageLoad( - bindingIndex++, QRhiShaderResourceBinding::ComputeStage, tex, 0)); - } - } - input_port_index++; - input_image_index++; - } - else if(it->texture) - { - if(it->access == "write_only") - { - bindings.append( - QRhiShaderResourceBinding::imageStore( - bindingIndex++, QRhiShaderResourceBinding::ComputeStage, it->texture, - 0)); - } - else if(it->access == "read_write") - { - bindings.append( - QRhiShaderResourceBinding::imageLoadStore( - bindingIndex++, QRhiShaderResourceBinding::ComputeStage, it->texture, - 0)); - } - output_port_index++; - output_image_index++; - } - else - { - bindingIndex++; // keep indices synchronized with shader layout - output_port_index++; - output_image_index++; - } - } - } - // Geometry inputs: rebind per-attribute SSBOs - else if(auto* geo_input = ossia::get_if(&input.data)) - { - if(geo_binding_index < (int)m_geometryBindings.size()) - { - auto& binding = m_geometryBindings[geo_binding_index]; - - // Helper: emit a binding for buf with the given access mode, promoting - // to bufferLoadStore when the buffer is aliased across multiple bindings - // with conflicting accesses (avoids Vulkan validation warnings). - auto appendBufBinding = [&](QRhiBuffer* buf, const std::string& access) - { - const bool aliased = aliased_buffers.count(buf) > 0; - if(access == "read_write" || aliased) - { - bindings.append(QRhiShaderResourceBinding::bufferLoadStore( - bindingIndex++, QRhiShaderResourceBinding::ComputeStage, buf)); - } - else if(access == "read_only") - { - bindings.append(QRhiShaderResourceBinding::bufferLoad( - bindingIndex++, QRhiShaderResourceBinding::ComputeStage, buf)); - } - else // write_only - { - bindings.append(QRhiShaderResourceBinding::bufferStore( - bindingIndex++, QRhiShaderResourceBinding::ComputeStage, buf)); - } - }; - - for(int attr_idx = 0; attr_idx < (int)geo_input->attributes.size(); attr_idx++) - { - if(attr_idx >= (int)binding.attribute_ssbos.size()) - break; - - const auto& req = geo_input->attributes[attr_idx]; - auto& ssbo = binding.attribute_ssbos[attr_idx]; - - // "none" access: forwarded via COPY_FROM, no binding needed - if(req.access == "none") - continue; - - if(!ssbo.buffer) - { - // Create a minimal fallback buffer so we don't skip a binding index - const int elem_size = glslTypeSizeBytes(req.type); - ssbo.buffer = rhi.newBuffer( - QRhiBuffer::Static, - QRhiBuffer::StorageBuffer | QRhiBuffer::VertexBuffer, elem_size); - qWarning() << "CSF ALLOC [geomFBFallback]" << req.name.c_str() << "size=" << elem_size; - ssbo.buffer->setName(QByteArray("CSF_GeomFB_") + req.name.c_str()); - ssbo.buffer->create(); - ssbo.size = elem_size; - ssbo.owned = true; - } - - if(req.access == "read_only" || req.access == "write_only") - { - appendBufBinding(ssbo.buffer, req.access); - } - else // read_write -> 2 bindings: _in (readonly) + _out (read-write) - { - QRhiBuffer* read_buf = (ssbo.read_buffer && !binding.pending_initial_copy) - ? ssbo.read_buffer : ssbo.buffer; - if(read_buf == ssbo.buffer) - { - // Same physical buffer for both _in and _out (non-feedback in-place). - bindings.append(QRhiShaderResourceBinding::bufferLoadStore( - bindingIndex++, QRhiShaderResourceBinding::ComputeStage, ssbo.buffer)); - bindings.append(QRhiShaderResourceBinding::bufferLoadStore( - bindingIndex++, QRhiShaderResourceBinding::ComputeStage, ssbo.buffer)); - } - else - { - // Distinct buffers (feedback receiver): _in readonly, _out read-write - appendBufBinding(read_buf, "read_only"); - bindings.append(QRhiShaderResourceBinding::bufferLoadStore( - bindingIndex++, QRhiShaderResourceBinding::ComputeStage, ssbo.buffer)); - } - } - } - - // Auxiliary SSBOs for this geometry input - for(auto& aux : binding.auxiliary_ssbos) - { - if(!aux.buffer) - { - // Create a minimal fallback buffer so we don't skip a binding index - aux.buffer = rhi.newBuffer( - QRhiBuffer::Static, QRhiBuffer::StorageBuffer, 16); - qWarning() << "CSF ALLOC [auxFBFallback]" << aux.name.c_str() << "size=16"; - aux.buffer->setName(QByteArray("CSF_AuxFB_") + aux.name.c_str()); - aux.buffer->create(); - aux.size = 16; - aux.owned = true; - } - - appendBufBinding(aux.buffer, aux.access); - } +uint64_t hashBindings(const QList& bindings) noexcept +{ + std::vector per; + per.reserve(bindings.size()); + size_t i = 0; + for(const auto& b : bindings) + per.push_back(qHash(b, /*seed=*/i++)); + return ossia::hash_bytes(per.data(), per.size() * sizeof(size_t)); +} +} // namespace -#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0) - // Rebind indirect draw buffer - if(binding.uses_indirect_draw && binding.indirectDrawBuffer) - { - bindings.append(QRhiShaderResourceBinding::bufferLoadStore( - bindingIndex++, QRhiShaderResourceBinding::ComputeStage, - binding.indirectDrawBuffer)); - } -#endif +void RenderedCSFNode::recreateShaderResourceBindings(RenderList& renderer, QRhiResourceUpdateBatch& res) +{ + QRhi& rhi = *renderer.state.rhi; - geo_binding_index++; - } - // Inlet port if any attribute reads from upstream - for(const auto& attr : geo_input->attributes) - if(attr.access == "read_only" || attr.access == "read_write") { input_port_index++; break; } - // Skip $USER ports for this geometry input - if(geo_input->vertex_count.find("$USER") != std::string::npos) input_port_index++; - if(geo_input->instance_count.find("$USER") != std::string::npos) input_port_index++; - for(const auto& aux : geo_input->auxiliary) - if(aux.size.find("$USER") != std::string::npos) input_port_index++; - } - else + // Single source of truth for the bindings list (also used by + // initComputeSRBAndPasses — see buildComputeSrbBindings). Geometry bindings + // are assumed up-to-date here: the caller (update()) runs + // updateGeometryBindings before calling this function. + QList bindings; + buildComputeSrbBindings(renderer, res, bindings); + + // Recreate SRBs for each compute pass — but only when the per-pass + // binding list actually changed. Hash the bindings (post per-pass + // ProcessUBO patch) and compare to the cached hash from the previous + // frame: identical → skip the destroy+setBindings+create cycle, which + // would otherwise thrash the QRhi SRB pool slot every frame on a + // static scene. + for(auto& [edge, pass] : m_computePasses) + { + // Set the ProcessUBO binding for this pass — must happen BEFORE + // hashing so a change in pass.processUBO triggers a rebuild. + if(pass.processUBO) { - input_port_index++; + bindings[1] = QRhiShaderResourceBinding::uniformBuffer( + 1, QRhiShaderResourceBinding::ComputeStage, pass.processUBO); } - } - // Recreate SRBs for each compute pass - for(auto& [edge, pass] : m_computePasses) - { + const uint64_t newHash = hashBindings(bindings); + if(pass.srb && pass.srbBindingsHash == newHash && newHash != 0) + continue; // bindings unchanged from last frame + if(pass.srb) { // Delete old SRB @@ -3565,25 +4284,20 @@ void RenderedCSFNode::recreateShaderResourceBindings(RenderList& renderer, QRhiR { // Create new SRB pass.srb = rhi.newShaderResourceBindings(); - qWarning() << "CSF ALLOC [recreateSRB] new SRB for pass"; } - // Set the ProcessUBO binding for this pass - if(pass.processUBO) - { - bindings[1] = QRhiShaderResourceBinding::uniformBuffer( - 1, QRhiShaderResourceBinding::ComputeStage, pass.processUBO); - } - pass.srb->setBindings(bindings.cbegin(), bindings.cend()); if(!pass.srb->create()) { qWarning() << "Failed to recreate SRB for compute pass"; delete pass.srb; pass.srb = nullptr; + pass.srbBindingsHash = 0; + continue; } + pass.srbBindingsHash = newHash; } - + // Update the pipeline with one of the SRBs (they're all compatible) if(!m_computePasses.empty() && m_computePasses[0].second.srb) { @@ -3593,111 +4307,7 @@ void RenderedCSFNode::recreateShaderResourceBindings(RenderList& renderer, QRhiR void RenderedCSFNode::release(RenderList& r) { - // Clean up compute passes - for(auto& [edge, pass] : m_computePasses) - { - delete pass.srb; - if(pass.processUBO) - { - pass.processUBO->deleteLater(); - } - } - m_computePasses.clear(); - - // Clean up graphics passes - for(auto& [edge, pass] : m_graphicsPasses) - { - pass.pipeline.release(); - delete pass.outputSampler; - } - m_graphicsPasses.clear(); - - // Clean up pipelines (m_ownedPipelines has unique entries, m_perPassPipelines may have duplicates) - for(auto* pip : m_ownedPipelines) - delete pip; - m_ownedPipelines.clear(); - m_perPassPipelines.clear(); - m_computePipeline = nullptr; - - // Clean up storage buffers - for(auto& storageBuffer : m_storageBuffers) - { - if(storageBuffer.owned) - r.releaseBuffer(storageBuffer.buffer); - } - m_storageBuffers.clear(); - - // Clean up GPU scatter - m_gpuScatter.release(); - m_gpuScatterAvailable = false; - - // Clean up geometry bindings - for(auto& binding : m_geometryBindings) - { - for(auto& ssbo : binding.attribute_ssbos) - { - if(ssbo.read_buffer) - { - r.releaseBuffer(ssbo.read_buffer); - ssbo.read_buffer = nullptr; - } - if(ssbo.owned && ssbo.buffer) - { - r.releaseBuffer(ssbo.buffer); - } - ssbo.buffer = nullptr; - delete ssbo.scatterStaging; - ssbo.scatterStaging = nullptr; - delete ssbo.scatterOp.srb; - ssbo.scatterOp.srb = nullptr; - delete ssbo.scatterOp.paramsUBO; - ssbo.scatterOp.paramsUBO = nullptr; - } - for(auto& aux : binding.auxiliary_ssbos) - { - if(aux.owned && aux.buffer) - { - r.releaseBuffer(aux.buffer); - } - aux.buffer = nullptr; - } - for(auto* buf : binding.copyFromBuffers) - r.releaseBuffer(buf); - binding.copyFromBuffers.clear(); -#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0) - if(binding.indirectDrawBuffer) - { - r.releaseBuffer(binding.indirectDrawBuffer); - binding.indirectDrawBuffer = nullptr; - } -#endif - } - m_geometryBindings.clear(); - - // Clean up storage images - for(auto& storageImage : m_storageImages) - { - if(storageImage.texture) - { - storageImage.texture->deleteLater(); - } - } - m_storageImages.clear(); - m_outStorageImages.clear(); - m_outStorageBuffers.clear(); - m_outputTexture = nullptr; - - // Clean up buffers and textures - delete m_materialUBO; - m_materialUBO = nullptr; - - // Clean up samplers - for(auto sampler : m_inputSamplers) - { - delete sampler.sampler; - // texture isdeleted elsewhere - } - m_inputSamplers.clear(); + releaseState(r); } void RenderedCSFNode::runRenderPass( @@ -3730,6 +4340,14 @@ void RenderedCSFNode::runInitialPasses( RenderList& renderer, QRhiCommandBuffer& commands, QRhiResourceUpdateBatch*& res, Edge& edge) { + // Plan 09 S6: debug marker for capture-tool readability. + commands.debugMarkBegin(QByteArrayLiteral("CSF")); + struct MarkEnd + { + QRhiCommandBuffer* c; + ~MarkEnd() { c->debugMarkEnd(); } + } _me{&commands}; + // Dispatch pending GPU scatter operations (format conversion) before user passes. // These convert raw CPU data (e.g. float3) uploaded to staging SSBOs into the // format expected by the CSF shader (e.g. vec4), entirely on the GPU. @@ -3789,24 +4407,11 @@ void RenderedCSFNode::runInitialPasses( const auto& pass = m_computePasses[passIndex].second; - // Begin compute pass with ExternalContent flag so we can insert - // native memory barriers between dispatches via beginExternal/endExternal. - commands.beginComputePass(res, QRhiCommandBuffer::BeginPassFlag::ExternalContent); - res = nullptr; - - // Set compute pipeline - commands.setComputePipeline(pass.pipeline); - - // Set shader resources - commands.setShaderResources(pass.srb); - - // Calculate dispatch size based on pass configuration - // Use pass-specific local sizes int localX = passDesc.local_size[0]; int localY = passDesc.local_size[1]; int localZ = passDesc.local_size[2]; - + int dispatchX{}, dispatchY{}, dispatchZ{}; // Resolve per-axis stride expressions @@ -3814,22 +4419,62 @@ void RenderedCSFNode::runInitialPasses( const int strideY = resolveDispatchExpression(passDesc.stride[1]); const int strideZ = resolveDispatchExpression(passDesc.stride[2]); + // Resolve the texture that drives 2D_IMAGE / 3D_IMAGE dispatch sizing. + // Priority: pass's explicit TARGET (matches by name against both storage + // images and input samplers) → m_outputTexture fallback. + auto resolveDispatchTexture + = [&]() -> QRhiTexture* { + const auto& target = passDesc.target_resource; + if(!target.empty()) + { + const QString qtarget = QString::fromStdString(target); + for(const auto& si : m_storageImages) + if(si.name == qtarget && si.texture) + return si.texture; + + // INPUTS entry: walk descriptor.inputs looking for a named image/texture + // input and map it to the corresponding sampled texture. + const auto& desc = n.descriptor(); + int input_image_index = 0; + for(const auto& inp : desc.inputs) + { + const bool is_texture = ossia::get_if(&inp.data); + const auto* ci = ossia::get_if(&inp.data); + const bool is_img_sampled = ci && ci->access == "read_only"; + if(is_texture || is_img_sampled) + { + if(inp.name == target + && input_image_index < (int)m_inputSamplers.size() + && m_inputSamplers[input_image_index].texture) + return m_inputSamplers[input_image_index].texture; + input_image_index++; + } + else if(ossia::get_if(&inp.data)) + { + // ISF image_input is also bound as a sampler + input_image_index++; + } + } + } + return m_outputTexture; + }; + // Calculate dispatch size based on execution model if(passDesc.execution_type == "2D_IMAGE") { - // For 2D image execution, dispatch based on image size, workgroup size and stride - QSize textureSize = m_outputTexture ? m_outputTexture->pixelSize() : QSize(1280, 720); + QRhiTexture* tex = resolveDispatchTexture(); + QSize textureSize = tex ? tex->pixelSize() : QSize(1280, 720); dispatchX = (textureSize.width() + localX * strideX - 1) / (localX * strideX); dispatchY = (textureSize.height() + localY * strideY - 1) / (localY * strideY); dispatchZ = 1; } else if(passDesc.execution_type == "3D_IMAGE") { - // For 3D image execution, dispatch based on volume dimensions and strides - if(m_outputTexture) + QRhiTexture* tex = resolveDispatchTexture(); + if(tex) { - QSize sz = m_outputTexture->pixelSize(); - int depth = m_outputTexture->depth(); + QSize sz = tex->pixelSize(); + int depth = std::max(1, tex->depth()); dispatchX = (sz.width() + localX * strideX - 1) / (localX * strideX); dispatchY = (sz.height() + localY * strideY - 1) / (localY * strideY); dispatchZ = (depth + localZ * strideZ - 1) / (localZ * strideZ); @@ -3873,48 +4518,143 @@ void RenderedCSFNode::runInitialPasses( { int n = 1; - if(passDesc.execution_type == "PER_VERTEX") + if(passDesc.execution_type == "PER_VERTEX" + || passDesc.execution_type == "PER_INSTANCE") { - // Dispatch one thread per vertex in the target geometry - for(const auto& geo_bind : m_geometryBindings) + const bool per_instance = (passDesc.execution_type == "PER_INSTANCE"); + const std::string& tgt = passDesc.target_resource; + auto count_of = [per_instance](const auto& b) { + return per_instance ? b.instance_count : b.vertex_count; + }; + + // Recommended: TARGET names the geometry resource explicitly. + // Order-independent and self-documenting; should be set on every + // bundled preset (presets without it fall through to the legacy + // first-binding-with-positive-count form below). + bool resolved = false; + if(!tgt.empty()) { - if(geo_bind.vertex_count > 0) + for(const auto& geo_bind : m_geometryBindings) { - n = geo_bind.vertex_count; - break; + if(geo_bind.input_name == tgt) + { + const int c = count_of(geo_bind); + if(c > 0) + { + n = c; + resolved = true; + } + break; + } + } + if(!resolved) + { + qWarning() << "CSF" << passDesc.execution_type.c_str() + << "TARGET" << tgt.c_str() + << "not found among geometry bindings, or has zero" + << (per_instance ? "instance_count" : "vertex_count"); } } - } - else if(passDesc.execution_type == "PER_INSTANCE") - { - // Dispatch one thread per instance in the target geometry - for(const auto& geo_bind : m_geometryBindings) + + // Legacy / TARGET-less fallback: first binding with count > 0. + if(!resolved) { - if(geo_bind.instance_count > 0) + for(const auto& geo_bind : m_geometryBindings) { - n = geo_bind.instance_count; - break; + const int c = count_of(geo_bind); + if(c > 0) + { + n = c; + break; + } } } } else { - // 1D_BUFFER: try storage buffer size first, then geometry element count - for(auto& [port, index] : this->m_outStorageBuffers) { - if(port == edge.source) { - n = this->m_storageBuffers[index].size; - break; + // 1D_BUFFER resolution has three forms, chosen by what the shader + // author wrote as TARGET: + // + // TARGET = "$expression" or "literal * literal" or "literal": + // Treat as an expression. Evaluate through the common resolver + // (same variables as SIZE / WIDTH / HEIGHT / STRIDE_*, including + // the new $COUNT_ / $BYTESIZE_ surface). The + // result is the total thread count `n`, which the spreading + // logic below distributes across x/y/z workgroups — behaves + // like MANUAL but without making the user pick an axis split. + // + // TARGET = "bufferName" (a bare identifier, legacy form): + // Dispatch over the buffer's element count. Equivalent to + // "$COUNT_bufferName" but kept as shorthand and for backward + // compatibility with any existing score that wrote a plain + // buffer name. + // + // TARGET empty (no TARGET key in JSON, or empty string): + // Fall back to the legacy behaviour — size by the output + // storage buffer matching the current edge (in BYTES, which + // is a long-standing quirk: dispatches over raw bytes rather + // than elements), then by the first geometry's vertex_count. + // Left unchanged so existing scores without explicit TARGET + // still dispatch the same as before. + const std::string& target = passDesc.target_resource; + + auto looks_like_expression = [&]() -> bool { + if(target.empty()) + return false; + for(char c : target) + { + if(c == '$' || c == '+' || c == '-' || c == '*' || c == '/' + || c == '%' || c == '(' || c == ')') + return true; } - } + // Pure integer literal counts as an expression (evaluator's + // fast-path handles it). Anything else that's a valid identifier + // character stream is treated as a bare buffer name. + bool all_numeric = !target.empty(); + for(char c : target) + { + if(!std::isdigit((unsigned char)c) + && !std::isspace((unsigned char)c)) + { + all_numeric = false; + break; + } + } + return all_numeric; + }; - if(n <= 1) + if(looks_like_expression()) { - for(const auto& geo_bind : m_geometryBindings) + n = resolveDispatchExpression(target); + } + else if(!target.empty()) + { + // Bare buffer name → resolve as "$COUNT_". The common + // resolver will look it up in m_storageBuffers / auxiliary_ssbos + // and return the element count. Falls back to 1 on miss. + const std::string count_expr = "$COUNT_" + target; + n = resolveDispatchExpression(count_expr); + } + else + { + // Legacy empty-TARGET fallback — preserved verbatim for + // compatibility with existing scores. + for(auto& [port, index] : this->m_outStorageBuffers) { + if(port == edge.source) { + n = this->m_storageBuffers[index].size; + break; + } + } + + if(n <= 1) { - if(geo_bind.vertex_count > 0) + for(const auto& geo_bind : m_geometryBindings) { - n = geo_bind.vertex_count; - break; + if(geo_bind.vertex_count > 0) + { + n = geo_bind.vertex_count; + break; + } } } } @@ -3928,8 +4668,14 @@ void RenderedCSFNode::runInitialPasses( if(totalWorkgroups > maxWorkgroups * maxWorkgroups * maxWorkgroups) { - commands.endComputePass(); - return; + // Workgroup count overflow: skip THIS pass only. We haven't yet + // opened a compute pass at this point (the begin/end for this + // dispatch is now hoisted *after* the size calculation), so + // there is nothing to close — continue to the next pass. Using + // `return` here aborted every remaining pass and desynced the + // ping-pong buffer swaps; mirror the dispatch(0,0,0) guard below + // which already uses `continue`. + continue; } if(totalWorkgroups > maxWorkgroups * maxWorkgroups) { @@ -3960,24 +4706,46 @@ void RenderedCSFNode::runInitialPasses( dispatchZ = 1; } - // Guard against dispatch(0,0,0) which is invalid per Vulkan spec + // Guard against dispatch(0,0,0) which is invalid per Vulkan spec. + // Pass not yet opened, so we just skip without closing anything. if(dispatchX <= 0 || dispatchY <= 0 || dispatchZ <= 0) - { - commands.endComputePass(); continue; - } - // Dispatch compute shader - commands.dispatch(dispatchX, dispatchY, dispatchZ); + // Publish the workgroup count to the per-pass ProcessUBO so the + // shader can read gl_NumWorkGroups via the libisf-injected + // uniform alias. SPIRV-Cross's HLSL backend cannot emit code for + // the GLSL NumWorkgroups built-in directly (D3D11/D3D12 bake fails + // outright), so this routing is what makes compute shaders that + // reference gl_NumWorkGroups portable across all backends. + // + // Must happen before beginComputePass — updateDynamicBuffer is + // applied as part of the resource update batch that beginComputePass + // consumes; mid-pass updates are not allowed. + if(pass.processUBO) + { + if(!res) + res = renderer.state.rhi->nextResourceUpdateBatch(); + n.standardUBO.passIndex = static_cast(passIndex); + n.standardUBO.numWorkgroups[0] = static_cast(dispatchX); + n.standardUBO.numWorkgroups[1] = static_cast(dispatchY); + n.standardUBO.numWorkgroups[2] = static_cast(dispatchZ); + res->updateDynamicBuffer( + pass.processUBO, 0, sizeof(ProcessUBO), &n.standardUBO); + } - // End compute pass + // Each CSF pass issues exactly ONE dispatch in its own begin/endComputePass. + // QRhi automatically inserts the compute→compute memory barrier between + // consecutive passes that touch the same SSBO/image, so the previous + // per-pass ExternalContent flag + native barrier was redundant here — and + // ExternalContent needlessly forced Vulkan secondary command buffers. The + // native-barrier path stays for the genuinely multi-dispatch scatter loop + // (above), which issues several dispatches inside a single pass. + commands.beginComputePass(res); + res = nullptr; - // Insert a compute→compute memory barrier so that SSBO writes from - // this dispatch are visible to the next dispatch. QRhi does not - // insert these automatically between consecutive compute passes. - commands.beginExternal(); - insertComputeBarrier(*renderer.state.rhi, commands); - commands.endExternal(); + commands.setComputePipeline(pass.pipeline); + commands.setShaderResources(pass.srb); + commands.dispatch(dispatchX, dispatchY, dispatchZ); commands.endComputePass(); } @@ -4017,9 +4785,136 @@ void RenderedCSFNode::runInitialPasses( if(geo_input->attributes[ai].access == "read_write" && ssbo.read_buffer) std::swap(ssbo.buffer, ssbo.read_buffer); } + for(auto& aux : gb.auxiliary_ssbos) + { + if(aux.access == "read_write" && aux.read_buffer) + std::swap(aux.buffer, aux.read_buffer); + } } gb_idx++; } } + + // Ping-pong swap for persistent storage images: the primary binding + // holds the current-frame target, the `_prev` binding reads the + // previous frame's data. After the frame renders, swap pointers so the + // next frame reads what we just wrote, and patch every compute SRB + // that holds these bindings via the indices recorded at build time. + { + bool any_swap = false; + for(auto& si : m_storageImages) + { + if(!si.persistent || !si.texture || !si.read_texture) + continue; + std::swap(si.texture, si.read_texture); + si.pending_initial_copy = false; + any_swap = true; + } + if(any_swap) + { + for(auto& [e, cp] : m_computePasses) + { + if(!cp.srb) + continue; + for(const auto& si : m_storageImages) + { + if(!si.persistent) + continue; + if(si.binding >= 0 && si.texture) + score::gfx::replaceTexture(*cp.srb, si.binding, si.texture); + if(si.prev_binding >= 0 && si.read_texture) + score::gfx::replaceTexture(*cp.srb, si.prev_binding, si.read_texture); + } + // No trailing create() — replaceTexture's updateResources() fast + // path already refreshes the backend descriptor state. + } + + // Diagnostic 014: graphics passes that visualize the persistent + // image bake the pre-swap `si.texture` pointer at construction time + // (createGraphicsPass calls textureForOutput for the edge's source + // port). After ping-pong, that bound handle now identifies the + // stale-frame slot. Patch every graphics SRB so it samples the + // post-swap writable target — i.e. what the next compute dispatch + // will write into and what we want to display. + for(auto& [e, gp] : m_graphicsPasses) + { + if(!gp.pipeline.srb || !gp.outputSampler) + continue; + // Resolve which storage image this graphics pass shows. Mirrors + // textureForOutput(): first the per-port mapping in + // m_outStorageImages, otherwise the m_outputTexture fallback. + QRhiTexture* newTex = nullptr; + for(const auto& [port, index] : m_outStorageImages) + { + if(port == e->source && index < (int)m_storageImages.size()) + { + const auto& si = m_storageImages[index]; + if(si.persistent) + newTex = si.texture; + break; + } + } + if(!newTex) + { + // Fallback path — graphics pass uses m_outputTexture. Find the + // persistent entry whose post-swap read_texture equals the + // pre-swap m_outputTexture (= what the SRB currently binds). + for(const auto& si : m_storageImages) + { + if(si.persistent && si.read_texture == m_outputTexture) + { + newTex = si.texture; + break; + } + } + } + if(newTex) + score::gfx::replaceTexture(*gp.pipeline.srb, gp.outputSampler, newTex); + } + + // Diagnostic 014: m_outputTexture is the fallback returned by + // textureForOutput()/resolveDispatchTexture() for default-port + // queries. It was captured from the first persistent storage + // image's primary `texture` at build time; after the swap that + // pointer is the stale-frame slot. Identify the entry whose + // post-swap read_texture (= pre-swap texture) matches the cached + // m_outputTexture and refresh it to the new writable target. + if(m_outputTexture) + { + for(const auto& si : m_storageImages) + { + if(si.persistent && si.read_texture == m_outputTexture && si.texture) + { + m_outputTexture = si.texture; + break; + } + } + } + } + } + + // GENERATE_MIPS: regenerate the mip chain so downstream samplers with a + // mipmap filter see a valid level > 0. Queued on the same per-frame + // resource-update batch as the rest of update()'s work — same pattern + // used for input samplers above at `res.generateMips(texture)`. + // + // Gated on FRAMEINDEX > 0: the textures are created with layout + // PREINITIALIZED and Qt RHI's GenMips path transitions FROM a transfer + // layout BACK to whatever the texture was stored as. Calling generateMips + // before the compute pass has actually written the image at least once + // leaves it in PREINITIALIZED, which trips VUID-VkImageMemoryBarrier- + // newLayout-01198. After one frame the compute dispatch has transitioned + // the image to GENERAL and generateMips is safe. + if(n.standardUBO.frameIndex > 0u) + { + for(const auto& si : m_storageImages) + { + if(!si.generate_mips || !si.texture) + continue; + if(!(si.texture->flags() & QRhiTexture::MipMapped)) + continue; + res->generateMips(si.texture); + } + } } } diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedCSFNode.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedCSFNode.hpp index b89c4c873b..d7953a0c0e 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedCSFNode.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedCSFNode.hpp @@ -17,13 +17,23 @@ struct RenderedCSFNode : score::gfx::NodeRenderer virtual ~RenderedCSFNode(); - void updateInputTexture(const Port& input, QRhiTexture* tex) override; + void updateInputTexture(const Port& input, QRhiTexture* tex, QRhiTexture* depthTex = nullptr) override; QRhiTexture* textureForOutput(const Port& output) override; void init(RenderList& renderer, QRhiResourceUpdateBatch& res) override; void update(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) override; void release(RenderList& r) override; + void initState(RenderList& renderer, QRhiResourceUpdateBatch& res) override; + void releaseState(RenderList& renderer) override; + void addOutputPass( + RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) override; + void removeOutputPass(RenderList& renderer, Edge& edge) override; + bool hasOutputPassForEdge(Edge& edge) const override; + void + addInputEdge(RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) override; + void removeInputEdge(RenderList& renderer, Edge& edge) override; + void runInitialPasses( RenderList&, QRhiCommandBuffer& commands, QRhiResourceUpdateBatch*& res, Edge& edge) override; @@ -31,7 +41,7 @@ struct RenderedCSFNode : score::gfx::NodeRenderer void runRenderPass(RenderList&, QRhiCommandBuffer& commands, Edge& edge) override; private: - void initComputePass(const TextureRenderTarget& rt, RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res); + void initComputeSRBAndPasses(RenderList& renderer, QRhiResourceUpdateBatch& res); void createComputePipeline(RenderList& renderer); void createGraphicsPass(const TextureRenderTarget& rt, RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res); void updateDescriptorSet(RenderList& renderer, Edge& edge); @@ -51,11 +61,24 @@ struct RenderedCSFNode : score::gfx::NodeRenderer RenderList& renderer, const QString& name, const QString& access, int size); void updateStorageBuffers(RenderList& renderer, QRhiResourceUpdateBatch& res); void recreateShaderResourceBindings(RenderList& renderer, QRhiResourceUpdateBatch& res); + + // Single source of truth for the CSF compute SRB binding list. Walks the + // descriptor's INPUTS / RESOURCES / AUXILIARIES in order and emits one + // QRhiShaderResourceBinding per shader binding slot. Both + // initComputeSRBAndPasses (init path) and recreateShaderResourceBindings + // (re-emit path) call this so the two paths can never drift in their + // emission order, indices, or fallback-on-missing-resource policy. + // Binding 1 (ProcessUBO) is left as a nullptr placeholder; each caller + // patches it per-pass. Output: appended to `bindings`. + void buildComputeSrbBindings( + RenderList& renderer, QRhiResourceUpdateBatch& res, + QList& bindings); int getArraySizeFromUI(const QString& bufferName) const; QString updateShaderWithImageFormats(QString current); // Geometry buffer management void updateGeometryBindings(RenderList& renderer, QRhiResourceUpdateBatch& res); + void pushOutputGeometry(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge& edge); int resolveCountExpression( const std::string& expr, const isf::geometry_input& geo, @@ -69,6 +92,12 @@ struct RenderedCSFNode : score::gfx::NodeRenderer QRhiComputePipeline* pipeline{}; QRhiShaderResourceBindings* srb{}; QRhiBuffer* processUBO{}; + // Hash of the last bindings vector applied to `srb`. Compared in + // recreateShaderResourceBindings to skip a destroy+setBindings+ + // create cycle when the bindings haven't actually changed since the + // previous frame. 0 = "never built / unknown" — first call always + // rebuilds. See RenderedCSFNode.cpp recreateShaderResourceBindings. + size_t srbBindingsHash{0}; }; struct GraphicsPass @@ -106,9 +135,21 @@ struct RenderedCSFNode : score::gfx::NodeRenderer struct StorageImage { QRhiTexture* texture{}; + QRhiTexture* read_texture{}; //!< Previous-frame slot, only when persistent QString name; QString access; // "read_only", "write_only", "read_write" QRhiTexture::Format format{QRhiTexture::RGBA8}; + bool is3D{false}; + bool isCube{false}; //!< Writable cubemap (imageCube) + bool persistent{false}; //!< Ping-pong this image across frames + bool pending_initial_copy{false}; //!< First frame: _prev reads from `texture` too + bool generate_mips{false}; //!< Run QRhi::generateMips after compute passes + + // Recorded binding slots in the compute SRB so that end-of-frame + // swapping can call replaceTexture() without having to re-walk the + // descriptor layout. + int binding{-1}; + int prev_binding{-1}; }; std::vector m_storageImages; @@ -138,22 +179,55 @@ struct RenderedCSFNode : score::gfx::NodeRenderer bool scatterPending{false}; // true = needs dispatch this frame }; - // Structured SSBOs that travel with the geometry (matched by name - // against ossia::geometry::auxiliary_buffer entries). + // Structured SSBOs (or UBOs) that travel with the geometry (matched + // by name against ossia::geometry::auxiliary_buffer entries). The + // `is_uniform` flag mirrors the AUXILIARY request's kind: when true, + // the buffer is bound as a std140 uniform block via + // QRhiShaderResourceBinding::uniformBuffer; when false, as an std430 + // SSBO via bufferLoad / bufferStore / bufferLoadStore. struct AuxiliarySSBO { - QRhiBuffer* buffer{}; // GPU SSBO (write target / primary) + QRhiBuffer* buffer{}; // GPU SSBO/UBO (write target / primary) QRhiBuffer* read_buffer{}; // Separate read buffer for ping-pong (nullptr = use buffer for both) int64_t size{}; bool owned{true}; + bool is_uniform{false}; // true = std140 UBO, false = std430 SSBO std::string name; std::string access; std::vector layout; std::string size_expr; // expression for flexible array count, may contain $USER }; + // Auxiliary textures that travel with the geometry (resolved from + // ossia::geometry::auxiliary_textures by name). Either sampled + // (sampler*) or storage-image (image*). Shape-matched placeholder + // used as fallback when no match exists on the incoming geometry. + struct AuxiliaryTexture + { + QRhiSampler* sampler{}; // null for storage-image entries + QRhiTexture* texture{}; // current bound handle (placeholder or upstream) + QRhiTexture* placeholder{}; // shape-matched empty from RenderList + std::string name; + int binding{-1}; // assigned at SRB build + bool is_storage{false}; + std::string access; // "read_only" / "write_only" / "read_write" + + // True when this binding allocated `texture` itself (write_only / + // read_write storage image declared as a nested aux on a geometry + // input — same lifecycle role as m_storageImages plays for top- + // level csf_image_input outputs). Owned textures: + // - skip the per-frame upstream-resolution overwrite (we own + // the data, no upstream contributes); + // - get pushed into out_geo.auxiliary_textures by name so + // downstream consumers can resolve the live handle; + // - get deleted on release(). + bool owned{false}; + }; + std::vector attribute_ssbos; std::vector auxiliary_ssbos; + std::vector auxiliary_textures; + std::string input_name; // RESOURCES[].NAME (e.g. "geoIn", "geoOut") — used by PER_VERTEX/PER_INSTANCE TARGET filtering int vertex_count{0}; // Number of elements (vertices) in the geometry int instance_count{1}; // Number of instances int input_port_index{-1}; // Input port index for this binding (-1 = no input port, e.g. write_only generator) @@ -175,11 +249,11 @@ struct RenderedCSFNode : score::gfx::NodeRenderer int prev_attribute_count{-1}; int prev_upstream_attr_count{-1}; -#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0) - QRhiBuffer* indirectDrawBuffer{}; // StorageBuffer | IndirectBuffer for GPU-driven draw args - bool uses_indirect_draw{false}; // true when geometry_input has INDIRECT_DRAW: true - bool indirect_draw_indexed{false}; // true for drawIndexedIndirect, false for drawIndirect -#endif + QRhiBuffer* indirectBuffer{}; // StorageBuffer (+ IndirectBuffer on Qt 6.12+) + int64_t indirectBufferSize{}; + int indirectCountResult{0}; // Resolved command count + std::string indirectCountExpr; // Expression string for dynamic re-resolve + bool uses_indirect_draw{false}; }; std::vector m_geometryBindings; diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedISFNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedISFNode.cpp index 327f4a9ff0..fb1b064968 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedISFNode.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedISFNode.cpp @@ -1,3 +1,5 @@ +#include +#include #include #include #include @@ -14,22 +16,67 @@ PassOutput RenderedISFNode::initPassSampler( QRhiResourceUpdateBatch& res) { QRhi& rhi = *renderer.state.rhi; + + // Volumetric fragment passes: a pass targeting a 3D output (OUTPUTS entry + // with DEPTH > 1) or carrying a Z expression requires per-slice color + // attachments / 3D image storage that this node does not wire end-to-end. + // The ISF parser rejects such shaders up-front (see isf.cpp parse_isf: + // "fragment-mode ISF with PASSES targeting Z / 3D OUTPUTS"); reaching this + // point with such a pass means the rejection drifted out of sync. + if(!pass.z_expression.empty() || [&]{ + for(const auto& out : n.descriptor().outputs) + if(out.name == pass.target && out.depth > 1) return true; + return false; + }()) + { + qFatal( + "RenderedISFNode: fragment PASSES with Z / 3D OUTPUTS reached the " + "renderer; parse-time rejection in isf::parser::parse_isf() should " + "have prevented this. Target: %s", + pass.target.c_str()); + } + + // Per-pass FORMAT override takes precedence over the legacy FLOAT flag. + // Covers the handful of formats useful as intermediate render targets: + // rgba8 (default), rgba16f (common precision bump), rgba32f, r16f, r32f. + auto pass_format = [&]() -> QRhiTexture::Format { + if(pass.format.empty()) + return pass.float_storage ? QRhiTexture::RGBA32F : QRhiTexture::RGBA8; + std::string f = pass.format; + for(auto& c : f) + c = (char)std::tolower((unsigned char)c); + if(f == "rgba8") return QRhiTexture::RGBA8; + if(f == "rgba16f") return QRhiTexture::RGBA16F; + if(f == "rgba32f") return QRhiTexture::RGBA32F; + if(f == "r8") return QRhiTexture::R8; + if(f == "r16f") return QRhiTexture::R16F; + if(f == "r32f") return QRhiTexture::R32F; + qWarning() << "ISF pass FORMAT" << pass.format.c_str() + << "not recognised — falling back to RGBA8"; + return QRhiTexture::RGBA8; + }; // In all the other cases we create a custom render target - const auto fmt = (pass.float_storage) ? QRhiTexture::RGBA32F : QRhiTexture::RGBA8; + const auto fmt = pass_format(); const auto filter = (pass.nearest_filter) ? QRhiSampler::Nearest : QRhiSampler::Linear; auto sampler = rhi.newSampler( filter, filter, QRhiSampler::None, QRhiSampler::Mirror, QRhiSampler::Mirror); - sampler->setName("ISFNode::initPassSamplers::sampler"); + sampler->setName("RenderedISFNode::initPassSamplers::sampler"); sampler->create(); const QSize texSize = (pass.width_expression.empty() && pass.height_expression.empty()) ? mainTexSize : n.computeTextureSize(pass, mainTexSize); - QImage clear_texture(texSize, pass.float_storage ? QImage::Format_RGBA32FPx4 : QImage::Format_ARGB32); + // Upload a zero clear matching the texture format. Qt can convert, so we + // pick a plausible source: float32 for floating-point formats, uint8 otherwise. + const bool is_float_fmt + = fmt == QRhiTexture::RGBA16F || fmt == QRhiTexture::RGBA32F + || fmt == QRhiTexture::R16F || fmt == QRhiTexture::R32F; + QImage clear_texture( + texSize, is_float_fmt ? QImage::Format_RGBA32FPx4 : QImage::Format_ARGB32); clear_texture.fill(0); auto tex = rhi.newTexture(fmt, texSize, 1, QRhiTexture::RenderTarget); - tex->setName("ISFNode::initPassSamplers::tex"); + tex->setName("RenderedISFNode::initPassSamplers::tex"); SCORE_ASSERT(tex->create()); res.uploadTexture(tex, clear_texture); @@ -39,7 +86,7 @@ PassOutput RenderedISFNode::initPassSampler( if(pass.persistent) { auto tex2 = rhi.newTexture(fmt, texSize, 1, QRhiTexture::RenderTarget); - tex2->setName("ISFNode::initPassSamplers::tex2"); + tex2->setName("RenderedISFNode::initPassSamplers::tex2"); SCORE_ASSERT(tex2->create()); res.uploadTexture(tex2, clear_texture); @@ -83,7 +130,7 @@ RenderedISFNode::RenderedISFNode(const ISFNode& node) noexcept { } -void RenderedISFNode::updateInputTexture(const Port& input, QRhiTexture* tex) +void RenderedISFNode::updateInputTexture(const Port& input, QRhiTexture* tex, QRhiTexture* depthTex) { int sampler_idx = 0; for(auto* p : node.input) @@ -91,7 +138,11 @@ void RenderedISFNode::updateInputTexture(const Port& input, QRhiTexture* tex) if(p == &input) break; if(p->type == Types::Image) + { sampler_idx++; + if((p->flags & Flag::SamplableDepth) == Flag::SamplableDepth) + sampler_idx++; + } } if(sampler_idx < (int)m_inputSamplers.size()) @@ -110,6 +161,65 @@ void RenderedISFNode::updateInputTexture(const Port& input, QRhiTexture* tex) score::gfx::replaceTexture(*pass.p.srb, sampl.sampler, tex); } } + + if(depthTex + && (input.flags & Flag::SamplableDepth) == Flag::SamplableDepth + && sampler_idx + 1 < (int)m_inputSamplers.size()) + { + auto& depthSampl = m_inputSamplers[sampler_idx + 1]; + if(depthSampl.texture != depthTex) + { + depthSampl.texture = depthTex; + for(auto& [e, passes] : m_passes) + { + for(auto& pass : passes.passes) + if(pass.p.srb) + score::gfx::replaceTexture(*pass.p.srb, depthSampl.sampler, depthTex); + for(auto& pass : passes.altPasses) + if(pass.p.srb) + score::gfx::replaceTexture(*pass.p.srb, depthSampl.sampler, depthTex); + } + } + } + } +} + +void RenderedISFNode::updateInputSamplerFilter( + const Port& input, const RenderTargetSpecs& spec) +{ + int sampler_idx = 0; + for(auto* p : node.input) + { + if(p == &input) + break; + if(p->type == Types::Image) + sampler_idx++; + } + + if(sampler_idx < (int)m_inputSamplers.size()) + { + auto* sampler = m_inputSamplers[sampler_idx].sampler; + if(sampler->magFilter() == spec.mag_filter + && sampler->minFilter() == spec.min_filter + && sampler->mipmapMode() == spec.mipmap_mode + && sampler->addressU() == spec.address_u + && sampler->addressV() == spec.address_v + && sampler->addressW() == spec.address_w) + { + // Nothing to update. The surgical rt_changed path calls this + // whenever renderTargetSpecsChanged fires, but filter/address + // state is often unchanged (the bump was for size or format). + // Skip the sampler->create() — it would destroy and re-allocate + // the backend QRhiSampler for no observable reason. + return; + } + sampler->setMagFilter(spec.mag_filter); + sampler->setMinFilter(spec.min_filter); + sampler->setMipmapMode(spec.mipmap_mode); + sampler->setAddressU(spec.address_u); + sampler->setAddressV(spec.address_v); + sampler->setAddressW(spec.address_w); + sampler->create(); } } @@ -194,7 +304,8 @@ void main () std::pair RenderedISFNode::createPass( RenderList& renderer, ossia::small_vector& passSamplers, - PassOutput target, bool previousPassIsPersistent) + PassOutput target, const isf::pass& modelPass, + bool previousPassIsPersistent) { std::pair ret; QRhi& rhi = *renderer.state.rhi; @@ -205,6 +316,32 @@ std::pair RenderedISFNode::createPass( pubo->setName("RenderedISFNode::createPass::pubo"); pubo->create(); + // Compute effective pipeline state: global default + per-pass override. + const auto eff_state + = mergeState(n.descriptor().default_state, modelPass.override_state); + + // Build the extra-binding list (storage + optional multiview UBO). + auto extraRhiBindings = buildExtraBindings(m_storage); + if(m_multiViewUBO) + { + // Multiview UBO binds right after storage resources. + int mvBinding = m_firstStorageBinding; + for(const auto& e : m_storage.ssbos) + { + if(e.binding >= 0) mvBinding = std::max(mvBinding, e.binding + 1); + if(e.prev_binding >= 0) mvBinding = std::max(mvBinding, e.prev_binding + 1); + } + for(const auto& e : m_storage.images) + if(e.binding >= 0) mvBinding = std::max(mvBinding, e.binding + 1); + + extraRhiBindings.append(QRhiShaderResourceBinding::uniformBuffer( + mvBinding, + QRhiShaderResourceBinding::VertexStage | QRhiShaderResourceBinding::FragmentStage, + m_multiViewUBO)); + } + const std::span extras{ + extraRhiBindings.data(), (std::size_t)extraRhiBindings.size()}; + // Create the main pass { // Render target for the pass @@ -230,9 +367,13 @@ std::pair RenderedISFNode::createPass( try { auto [v, s] = score::gfx::makeShaders(renderer.state, n.m_vertexS, n.m_fragmentS); - auto pip = score::gfx::buildPipeline( + const auto mainSamplers = allSamplers(passSamplers, 1); + auto pip = score::gfx::buildPipelineWithState( renderer, renderer.defaultTriangle(), v, s, renderTarget, pubo, m_materialUBO, - allSamplers(passSamplers, 1)); + mainSamplers, + extras, + eff_state, + n.descriptor().multiview_count); ret.first = Pass{renderTarget, pip, pubo}; } @@ -262,7 +403,7 @@ std::pair RenderedISFNode::createPass( // Then we have to use the textures the "main" passes are rendering to ret.second.p.srb = score::gfx::createDefaultBindings( renderer, ret.second.renderTarget, pubo, m_materialUBO, - allSamplers(passSamplers, 0)); + allSamplers(passSamplers, 0), extras); } } else if(auto psampler = ossia::get_if(&target)) @@ -284,7 +425,7 @@ std::pair RenderedISFNode::createPass( // We necessarily use the main pass rendered-to samplers ret.second.p.srb = score::gfx::createDefaultBindings( renderer, ret.second.renderTarget, pubo, m_materialUBO, - allSamplers(passSamplers, 0)); + allSamplers(passSamplers, 0), extras); } else { @@ -294,7 +435,7 @@ std::pair RenderedISFNode::createPass( // Then we have to use the textures the "main" passes are rendering to ret.second.p.srb = score::gfx::createDefaultBindings( renderer, ret.second.renderTarget, pubo, m_materialUBO, - allSamplers(passSamplers, 0)); + allSamplers(passSamplers, 0), extras); } } } @@ -327,12 +468,53 @@ void RenderedISFNode::initPasses( } } + // Lazily compute the storage-binding offset now that pass-samplers are + // known. Each PersistSampler entry in passes.samplers consumes one sampler + // binding in the shader reflection (input_samplers + audio_samplers + + // pass_samplers). Only do this once per node lifetime — m_firstStorageBinding + // stays >= 0 on subsequent edges, but ensureStorageResources is idempotent + // and must run so that any resize reallocates the buffers. + if(m_firstStorageBinding < 0) + { + int passSamplerCount = 0; + for(auto& s : passes.samplers) + if(ossia::get_if(&s)) + passSamplerCount++; + + const int firstStorageBinding + = 3 + (int)m_inputSamplers.size() + (int)m_audioSamplers.size() + + passSamplerCount; + m_firstStorageBinding = firstStorageBinding; + collectGraphicsStorageResources(n.descriptor(), firstStorageBinding, m_storage); + + // Allocate the multiview UBO when MULTIVIEW >= 2 is declared. + if(n.descriptor().multiview_count >= 2) + { + QRhi& rhi = *renderer.state.rhi; + const int mvCount = n.descriptor().multiview_count; + m_multiViewUBO = rhi.newBuffer( + QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, + sizeof(float[16]) * mvCount); + m_multiViewUBO->setName("RenderedISFNode::multiview_ubo"); + SCORE_ASSERT(m_multiViewUBO->create()); + } + } + + // Ensure storage buffers/images exist. Safe to call per edge: it's idempotent + // and resizes to match renderSize. Then borrow any upstream-provided UBOs / + // read-only SSBOs (no SRB patch here — SRBs don't exist yet). + ensureStorageResources( + *renderer.state.rhi, res, renderer, n.descriptor(), m_storage, + renderer.state.renderSize); + bindUpstreamBuffers(renderer, n.input, m_storage); + bool previousPassIsPersistent = false; for(std::size_t i = 0; i < passes.samplers.size(); i++) { auto& pass = passes.samplers[i]; const auto [p1, p2] - = createPass(renderer, passes.samplers, pass, previousPassIsPersistent); + = createPass(renderer, passes.samplers, pass, model_passes[i], + previousPassIsPersistent); if(p1.p.pipeline) { passes.passes.push_back(p1); @@ -386,6 +568,14 @@ void RenderedISFNode::initPasses( } void RenderedISFNode::init(RenderList& renderer, QRhiResourceUpdateBatch& res) +{ + initState(renderer, res); + + for(Edge* edge : n.output[0]->edges) + addOutputPass(renderer, *edge, res); +} + +void RenderedISFNode::initState(RenderList& renderer, QRhiResourceUpdateBatch& res) { QRhi& rhi = *renderer.state.rhi; @@ -407,6 +597,8 @@ void RenderedISFNode::init(RenderList& renderer, QRhiResourceUpdateBatch& res) = rhi.newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, m_materialSize); m_materialUBO->setName("RenderedISFNode::init::m_materialUBO"); SCORE_ASSERT(m_materialUBO->create()); + if(n.m_material_data) + res.updateDynamicBuffer(m_materialUBO, 0, m_materialSize, n.m_material_data.get()); } // Create the samplers @@ -414,40 +606,116 @@ void RenderedISFNode::init(RenderList& renderer, QRhiResourceUpdateBatch& res) SCORE_ASSERT(m_inputSamplers.empty()); SCORE_ASSERT(m_audioSamplers.empty()); - m_inputSamplers = initInputSamplers(this->n, renderer, n.input); + m_inputSamplers = initInputSamplers(this->n, renderer, n.input, &n.descriptor()); m_audioSamplers = initAudioTextures(renderer, n.m_audio_textures); - // Create the passes + m_initialized = true; +} - for(Edge* edge : n.output[0]->edges) +void RenderedISFNode::addOutputPass( + RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) +{ + auto rt = renderer.renderTargetForOutput(edge); + if(rt.renderTarget) + { + initPasses(rt, renderer, edge, renderer.renderSize(&edge), res); + } +} + +void RenderedISFNode::addInputEdge( + RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) +{ + if(edge.sink->type == Types::Image) + { + // Find upstream texture through the upstream renderer's textureForOutput(). + if(auto it = edge.source->node->renderedNodes.find(&renderer); + it != edge.source->node->renderedNodes.end()) + { + if(auto* tex = it->second->textureForOutput(*edge.source)) + { + auto rt = renderer.renderTargetForInputPort(*edge.sink); + updateInputTexture(*edge.sink, tex, rt.depthTexture); + } + } + } +} + +void RenderedISFNode::removeInputEdge(RenderList& renderer, Edge& edge) +{ + if(edge.sink && edge.sink->type == Types::Image) { - auto rt = renderer.renderTargetForOutput(*edge); - if(rt.renderTarget) + // Swap image-sampler bindings to empty-texture placeholders so the SRB + // never holds pointers to the just-released upstream renderer's + // textures. Mirrors SimpleRenderedISFNode::removeInputEdge — same + // dangling VkImageView / end-of-frame barrier crash applies to the + // multi-pass ISF renderer whenever a cable is cut at runtime. Include + // the depth companion when the port declared DEPTH: true. + const bool hasDepthCompanion + = (edge.sink->flags & Flag::SamplableDepth) == Flag::SamplableDepth; + QRhiTexture* depthFallback + = hasDepthCompanion ? &renderer.emptyTexture() : nullptr; + updateInputTexture(*edge.sink, &renderer.emptyTexture(), depthFallback); + } +} + +void RenderedISFNode::removeOutputPass(RenderList& renderer, Edge& edge) +{ + auto it = ossia::find_if(m_passes, [&](auto& p) { return p.first == &edge; }); + if(it != m_passes.end()) + { + auto& [passes, altPasses, passSamplers] = it->second; + + std::size_t num = passes.size(); + for(std::size_t i = 0; i < num; i++) { - initPasses(rt, renderer, *edge, renderer.renderSize(edge), res); + auto& pass = passes[i]; + auto& altpass = altPasses[i]; + auto& sampler = passSamplers[i]; + + if(pass.p.srb != altpass.p.srb) + { + altpass.p.srb->deleteLater(); + } + + pass.p.release(); + + if(pass.processUBO) + pass.processUBO->deleteLater(); + + if(auto p = ossia::get_if(&sampler)) + { + delete p->sampler; + } } + + m_passes.erase(it); } } +bool RenderedISFNode::hasOutputPassForEdge(Edge& edge) const +{ + return ossia::find_if(m_passes, [&](const auto& p) { return p.first == &edge; }) + != m_passes.end(); +} + void RenderedISFNode::update( RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) { SCORE_ASSERT(m_passes.size() > 0); - // PASSINDEX must be set to the last index - // FIXME - - // FIXME should be -2 if last pass is persistent - if(n.m_descriptor.passes.back().persistent) - n.standardUBO.passIndex = m_passes.size() - 2; - else - n.standardUBO.passIndex = m_passes.size() - 1; + // Persistent-storage ping-pong happens once per frame. Reset the guard + // here so whichever edge's runRenderPass fires first does the swap. + m_storageSwappedThisFrame = false; + // passIndex gets set per-pass in the processUBO update loop below; no + // need to seed a value here (previous code used m_passes.size() — which + // is the edge count, not the pass count — and was then overwritten). n.standardUBO.frameIndex++; // Update audio textures bool audioChanged = false; + std::size_t audio_idx = 0; for(auto& audio : n.m_audio_textures) { if(std::optional sampl @@ -456,7 +724,14 @@ void RenderedISFNode::update( // Audio texture changed, this means the material needs update audioChanged = true; - auto& [rhiSampler, tex] = *sampl; + auto& [rhiSampler, tex, fb_] = *sampl; + // Keep m_audioSamplers[i].texture in sync with the live GPU texture so + // any later pipeline rebuild (rt_changed path in RenderList::render + // calling removeOutputPass + addOutputPass) uses the live binding + // instead of the placeholder empty texture. + if(audio_idx < m_audioSamplers.size()) + m_audioSamplers[audio_idx].texture = tex; + for(auto& [e, p] : m_passes) { for(auto& pass : p.passes) @@ -467,6 +742,7 @@ void RenderedISFNode::update( *pass.p.srb, rhiSampler, tex ? tex : &renderer.emptyTexture()); } } + ++audio_idx; } // Update material @@ -475,6 +751,28 @@ void RenderedISFNode::update( char* data = n.m_material_data.get(); res.updateDynamicBuffer(m_materialUBO, 0, m_materialSize, data); } + materialChanged = false; + + // Reset event ports now that the UBO has captured their pulse value. + // If anything fired, force next frame's upload so the reset-to-zero + // propagates out through the normally-gated upload path. + if(n.resetEventPortsAfterFrame()) + materialChanged = true; + + // Re-bind upstream UBOs / read-only SSBOs on every pass's SRB. Cables can + // be added or replaced after init, so this runs every frame. Both the main + // and alt chains hold independent descriptor sets referencing the same + // storage resources; both must be patched. bindUpstreamBuffers is + // idempotent when the pointer already matches. + for(auto& [e, p] : m_passes) + { + for(auto& pass : p.passes) + if(pass.p.srb) + bindUpstreamBuffers(renderer, n.input, m_storage, pass.p.srb); + for(auto& pass : p.altPasses) + if(pass.p.srb) + bindUpstreamBuffers(renderer, n.input, m_storage, pass.p.srb); + } // Update all the process UBOs @@ -518,7 +816,15 @@ void RenderedISFNode::update( void RenderedISFNode::release(RenderList& r) { - // customRelease + releaseState(r); +} + +void RenderedISFNode::releaseState(RenderList& r) +{ + if(!m_initialized) + return; + + // Release all remaining passes { for(auto& texture : n.m_audio_textures) { @@ -530,7 +836,6 @@ void RenderedISFNode::release(RenderList& r) if(tex != &r.emptyTexture()) tex->deleteLater(); } - // FIXME remove it from n.m_audio_textures? } } @@ -538,8 +843,8 @@ void RenderedISFNode::release(RenderList& r) { auto& [passes, altPasses, passSamplers] = allPasses; - std::size_t n = passes.size(); - for(std::size_t i = 0; i < n; i++) + std::size_t num = passes.size(); + for(std::size_t i = 0; i < num; i++) { auto& pass = passes[i]; auto& altpass = altPasses[i]; @@ -558,12 +863,6 @@ void RenderedISFNode::release(RenderList& r) if(auto p = ossia::get_if(&sampler)) { delete p->sampler; - // TODO check texture deletion ??? - // texture isdeleted elsewxheree - } - else - { - // It's the render target of another node, do not touch it } } } @@ -578,13 +877,11 @@ void RenderedISFNode::release(RenderList& r) for(auto sampler : m_inputSamplers) { delete sampler.sampler; - // texture isdeleted elsewxheree } m_inputSamplers.clear(); for(auto sampler : m_audioSamplers) { delete sampler.sampler; - // texture isdeleted elsewxheree } m_audioSamplers.clear(); @@ -592,6 +889,19 @@ void RenderedISFNode::release(RenderList& r) m_materialUBO = nullptr; m_meshBuffer = {}; + + // Release storage resources (owned SSBOs + storage images). + m_storage.release(); + m_firstStorageBinding = -1; + m_storageSwappedThisFrame = false; + + if(m_multiViewUBO) + { + m_multiViewUBO->deleteLater(); + m_multiViewUBO = nullptr; + } + + m_initialized = false; } void RenderedISFNode::runInitialPasses( @@ -630,8 +940,10 @@ void RenderedISFNode::runInitialPasses( auto srb = pass.p.srb; auto texture = pass.renderTarget.texture; - // TODO need to free stuff - cb.beginPass(rt, Qt::black, {1.0f, 0}, updateBatch); + // Note: updateBatch ownership transfers to QRhi on beginPass; per-pass + // state (pipeline/srb/processUBO/renderTarget) is owned by m_passes and + // released in releaseState() / removeOutputPass(). Nothing to free here. + cb.beginPass(rt, Qt::black, {0.0f, 0}, updateBatch); updateBatch = nullptr; { cb.setGraphicsPipeline(pipeline); @@ -681,7 +993,10 @@ void RenderedISFNode::runRenderPass( auto srb = pass.p.srb; auto texture = pass.renderTarget.texture; - // TODO need to free stuff + // No allocations in this scope: this function records draw calls into a + // command buffer already opened by RenderList::render(). updateBatch is + // managed by the caller; per-pass state lives in m_passes and is released + // in releaseState() / removeOutputPass(). { cb.setGraphicsPipeline(pipeline); cb.setShaderResources(srb); @@ -703,6 +1018,32 @@ void RenderedISFNode::runRenderPass( using namespace std; swap(passes, altPasses); + + // Persistent-storage ping-pong. Mutate the shared state exactly once per + // frame, then re-apply bindings to every SRB across every edge/chain so + // each draw next frame sees the swapped pointers. Patching only one SRB + // would leave others referencing stale buffers and read wrong data. + if(!m_storageSwappedThisFrame) + { + m_storageSwappedThisFrame = true; + swapPersistentSSBOsState(m_storage); + for(auto& [e, p] : m_passes) + { + const std::size_t num = p.passes.size(); + for(std::size_t i = 0; i < num; i++) + { + auto* mainSrb = p.passes[i].p.srb; + if(mainSrb) + reapplyStorageBindings(m_storage, *mainSrb); + // altPass's SRB aliases the main one for non-persistent passes; skip + // the second reapply in that case — replaceBuffer is idempotent but + // srb->create() is not free. + auto* altSrb = p.altPasses[i].p.srb; + if(altSrb && altSrb != mainSrb) + reapplyStorageBindings(m_storage, *altSrb); + } + } + } } AudioTextureUpload::AudioTextureUpload() @@ -737,9 +1078,14 @@ void AudioTextureUpload::processTemporal( m_scratchpad[i] = 0.5f + audio.data[i] / 2.f; } - // Copy it + // Copy it. Texture layout is samples × channels (width × height). QRhiTextureSubresourceUploadDescription subdesc( m_scratchpad.data(), audio.data.size() * sizeof(float)); + if(audio.channels > 0) + { + const int samples_per_channel = int(audio.data.size()) / audio.channels; + subdesc.setSourceSize(QSize(samples_per_channel, audio.channels)); + } QRhiTextureUploadEntry entry{0, 0, subdesc}; QRhiTextureUploadDescription desc{entry}; res.uploadTexture(rhiTexture, desc); @@ -751,7 +1097,8 @@ void AudioTextureUpload::processHistogram( // Size of the audio input buffer std::size_t audioInputBufferSize = audio.data.size() / audio.channels; - // Effective size of the FFT data we want to use (e.g. without DC offset and nyquist coefficient at the end) + // Effective size of the FFT data we want to use (skips DC and nyquist bins; + // this also matches the texture width picked in updateAudioTexture). if(audioInputBufferSize < 4) return; std::size_t fftSize = audioInputBufferSize / 2 - 2; @@ -769,48 +1116,60 @@ void AudioTextureUpload::processHistogram( const float byte_norm = 255.f / (dbmax - dbmin); const float norm = 2.f / (fftSize); - for(int i = 0; i < 1; i++) + // Histogram treats channel 0 as the source — it's a scrolling + // spectrogram display and summing / interleaving channels would blur + // the visualisation. Explicitly use i=0 rather than the old + // `for(int i = 0; i < 1; i++)` single-iteration loop. + const int i = 0; { float* inputData = audio.data.data() + i * audioInputBufferSize; double current_window_value = 0.; - // Basic window function on the audio buffer + // Basic triangular window function on the audio buffer double window_increment = 1. / (audioInputBufferSize / 2); - for(int s = 0; s < audioInputBufferSize / 2; s++) + for(int s = 0; s < (int)(audioInputBufferSize / 2); s++) { inputData[s] *= current_window_value; current_window_value += window_increment; } - for(int s = audioInputBufferSize / 2; s < audioInputBufferSize; s++) + for(int s = (int)(audioInputBufferSize / 2); s < (int)audioInputBufferSize; s++) { current_window_value -= window_increment; inputData[s] *= current_window_value; } - // Compute fft. Spectrum is in CCs format. + // Compute fft. Spectrum is in CCs format — index 0 is DC, the last + // coefficient is nyquist. Skip both. auto spectrum = m_fft.execute(inputData, audioInputBufferSize); float* outputSpectrum = m_scratchpad.data(); - // Compute the actual data to show - for(std::size_t k = 1; k < fftSize - 1; k++) + // Fill all fftSize slots of the new row. Previously the loop bounds + // (k=1..fftSize-1) left the last two pixels of each row untouched, + // leaking stale data from a 240-frame-old row into every output. + for(std::size_t k = 0; k < fftSize; k++) { + const std::size_t bin = k + 1; // bins 1..fftSize (skip DC at 0) const float float_magnitude = std::sqrt( - spectrum[k][0] * spectrum[k][0] + spectrum[k][1] * spectrum[k][1]) + spectrum[bin][0] * spectrum[bin][0] + + spectrum[bin][1] * spectrum[bin][1]) * norm; - const float float_db = 20.f * std::log10(std ::max(float_magnitude, 1e-10f)); + const float float_db = 20.f * std::log10(std::max(float_magnitude, 1e-10f)); const float magnitude_byte = (float_db - dbmin) * byte_norm; - // We are going to put the data in a R32F texture thus we scale to [0; 1] - outputSpectrum[k - 1] = std::clamp(magnitude_byte, 0.f, 255.f) / 255.f; + // R32F texture with values scaled to [0; 1] + outputSpectrum[k] = std::clamp(magnitude_byte, 0.f, 255.f) / 255.f; } } } - // Copy it + // Copy it. setSourceSize makes the upload strides explicit so Qt RHI + // never second-guesses the row pitch — processSpectral sets it, keeping + // the histogram path aligned avoids a subtle inconsistency in validation. QRhiTextureSubresourceUploadDescription subdesc( m_scratchpad.data(), m_scratchpad.size() * sizeof(float)); + subdesc.setSourceSize(QSize((int)fftSize, 240)); QRhiTextureUploadEntry entry{0, 0, subdesc}; QRhiTextureUploadDescription desc{entry}; res.uploadTexture(rhiTexture, desc); @@ -865,46 +1224,62 @@ std::optional AudioTextureUpload::updateAudioTexture( return {}; } - auto& [rhiSampler, rhiTexture] = it->second; - const auto curSz = (rhiTexture) ? rhiTexture->pixelSize() : QSize{}; - int numSamples = curSz.width() * curSz.height(); - if(numSamples != std::max(1, int(audio.data.size())) || !rhiTexture) + auto& [rhiSampler, rhiTexture, fb_] = it->second; + + // The texture the shader wants for the current (mode, samples, channels) + // triple. Previously the detection compared `curSz.w * curSz.h` against + // `audio.data.size()` — correct for Waveform (a W=samples × H=channels + // layout has pixel_count == raw_sample_count), but completely wrong for + // FFT (half the pixels) and Histogram (H is hard-coded 240 so pixel count + // bears no relation to the raw audio buffer). The mismatch meant every + // frame saw "size changed → destroy+recreate the texture", which also + // forced a full SRB rebuild via replaceTexture in the caller and + // thrashed the FFT planner's reset() cache. + const bool has_data = audio.channels > 0 && !audio.data.empty(); + int samples = 0; + QSize desired{1, 1}; + if(has_data) { - if(audio.channels > 0) + samples = int(audio.data.size()) / audio.channels; + if(samples % 2 != 0) + samples++; + switch(audio.mode) { - int samples = audio.data.size() / audio.channels; - if(samples % 2 != 0) - samples++; - int pixelWidth = 0; - int pixelHeight = 0; - switch(audio.mode) - { - case AudioTexture::Mode::Waveform: - pixelWidth = samples; - pixelHeight = audio.channels; - break; - case AudioTexture::Mode::FFT: - pixelWidth = samples / 2; - pixelHeight = audio.channels; - break; - case AudioTexture::Mode::Histogram: - pixelWidth = samples / 2 - 2; - pixelHeight = 240; - break; - } + case AudioTexture::Mode::Waveform: + desired = {samples, audio.channels}; + break; + case AudioTexture::Mode::FFT: + desired = {std::max(1, samples / 2), audio.channels}; + break; + case AudioTexture::Mode::Histogram: + // Histogram is a scrolling spectrogram: rows = frames of FFT history. + desired = {std::max(1, samples / 2 - 2), 240}; + break; + } + } + const QSize curSz = rhiTexture ? rhiTexture->pixelSize() : QSize{}; + if(curSz != desired || !rhiTexture) + { + if(has_data) + { m_fft.reset(samples); if(rhiTexture) { + // destroy()+create() on the same QRhiTexture wrapper swaps the + // native handle (VkImage / ID3D12Resource / MTLTexture). Flag + // the change so the caller re-runs replaceTexture to refresh + // the SRB's descriptor set binding. rhiTexture->destroy(); - rhiTexture->setPixelSize({pixelWidth, pixelHeight}); + rhiTexture->setPixelSize(desired); rhiTexture->create(); + textureChanged = true; } else { rhiTexture = rhi.newTexture( - QRhiTexture::R32F, {pixelWidth, pixelHeight}, 1, QRhiTexture::Flag{}); + QRhiTexture::R32F, desired, 1, QRhiTexture::Flag{}); rhiTexture->setName("AudioTextureUpload::rhiTexture"); auto created = rhiTexture->create(); SCORE_ASSERT(created); @@ -915,34 +1290,33 @@ std::optional AudioTextureUpload::updateAudioTexture( { if(rhiTexture) { + // Audio went quiet: drop our texture and fall back to the + // RenderList's shared emptyTexture via the caller. Never resize + // the stored rhiTexture in-place — when that pointer aliased + // `&renderer.emptyTexture()` (old no-data init path) a resize + // would have destroyed the shared empty texture used by every + // unbound sampler in every node on this RenderList. rhiTexture->destroy(); - rhiTexture->setPixelSize({1, 1}); - rhiTexture->create(); - } - else - { - rhiTexture = &renderer.emptyTexture(); + rhiTexture->deleteLater(); + rhiTexture = nullptr; textureChanged = true; } + // else: stays nullptr; caller already bound emptyTexture on a + // previous pass. No need to re-fire replaceTexture. } } if(rhiTexture) { - // Process the audio data auto sz = rhiTexture->pixelSize(); if(sz.width() * sz.height() > 1) this->process(audio, res, rhiTexture); } if(textureChanged) - { return it->second; - } else - { return {}; - } } } diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedISFNode.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedISFNode.hpp index 341bb6a2d6..07adaa0e75 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedISFNode.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedISFNode.hpp @@ -1,5 +1,6 @@ #pragma once #include +#include #include #include @@ -11,12 +12,22 @@ struct RenderedISFNode : score::gfx::NodeRenderer virtual ~RenderedISFNode(); - void updateInputTexture(const Port& input, QRhiTexture* tex) override; + void updateInputTexture(const Port& input, QRhiTexture* tex, QRhiTexture* depthTex = nullptr) override; + void updateInputSamplerFilter(const Port& input, const RenderTargetSpecs& spec) override; + void addInputEdge(RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) override; + void removeInputEdge(RenderList& renderer, Edge& edge) override; void init(RenderList& renderer, QRhiResourceUpdateBatch& res) override; void update(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* e) override; void release(RenderList& r) override; + void initState(RenderList& renderer, QRhiResourceUpdateBatch& res) override; + void releaseState(RenderList& renderer) override; + void addOutputPass( + RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) override; + void removeOutputPass(RenderList& renderer, Edge& edge) override; + bool hasOutputPassForEdge(Edge& edge) const override; + void runInitialPasses( RenderList&, QRhiCommandBuffer& commands, QRhiResourceUpdateBatch*& res, Edge& edge) override; @@ -26,7 +37,8 @@ struct RenderedISFNode : score::gfx::NodeRenderer private: std::pair createPass( RenderList& renderer, ossia::small_vector& m_passSamplers, - PassOutput target, bool previousPassIsPersistent); + PassOutput target, const isf::pass& modelPass, + bool previousPassIsPersistent); std::pair createFinalPass( RenderList& renderer, ossia::small_vector& m_passSamplers, @@ -65,6 +77,22 @@ struct RenderedISFNode : score::gfx::NodeRenderer int m_materialSize{}; AudioTextureUpload m_audioTex; + + // Graphics-visible storage buffers / images declared by the shader + // (storage_input / csf_image_input / uniform_input). See IsfBindingsBuilder. + GraphicsStorageResources m_storage; + + // Multiview UBO: N × mat4 view-projection matrices, when MULTIVIEW >= 2. + QRhiBuffer* m_multiViewUBO{}; + + // First binding slot reserved for storage resources; determined lazily in + // initPasses once the pass-sampler count is known (Rendered differs from + // Simple by having one extra sampler per inner pass). + int m_firstStorageBinding{-1}; + + // Guard so the persistent-SSBO state swap runs exactly once per frame even + // when the node has multiple output edges (each triggers runRenderPass). + bool m_storageSwappedThisFrame{false}; }; } diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedISFSamplerUtils.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedISFSamplerUtils.hpp index 9219f2d95a..4694677869 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedISFSamplerUtils.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedISFSamplerUtils.hpp @@ -7,6 +7,28 @@ namespace score::gfx { +namespace detail +{ +inline QRhiSampler::Filter parseAudioFilter(const std::string& s) +{ + if(s.empty()) return QRhiSampler::Linear; + std::string v = s; + for(auto& c : v) c = (char)tolower(c); + if(v == "nearest") return QRhiSampler::Nearest; + return QRhiSampler::Linear; +} +inline QRhiSampler::AddressMode parseAudioWrap(const std::string& s) +{ + if(s.empty()) return QRhiSampler::ClampToEdge; + std::string v = s; + for(auto& c : v) c = (char)tolower(c); + for(auto& c : v) if(c == '-') c = '_'; + if(v == "repeat") return QRhiSampler::Repeat; + if(v == "mirror" || v == "mirrored_repeat") return QRhiSampler::Mirror; + return QRhiSampler::ClampToEdge; +} +} + inline std::vector initAudioTextures(RenderList& renderer, std::list& textures) { @@ -14,13 +36,14 @@ initAudioTextures(RenderList& renderer, std::list& textures) QRhi& rhi = *renderer.state.rhi; for(auto& texture : textures) { + const auto filter = detail::parseAudioFilter(texture.filter); + const auto wrap = detail::parseAudioWrap(texture.wrap); auto sampler = rhi.newSampler( - QRhiSampler::Linear, QRhiSampler::Linear, QRhiSampler::None, - QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge); + filter, filter, QRhiSampler::None, wrap, wrap); sampler->setName("ISFNode::initAudioTextures::sampler"); sampler->create(); - samplers.push_back({sampler, &renderer.emptyTexture()}); + samplers.push_back({sampler, nullptr}); texture.samplers[&renderer] = {sampler, nullptr}; } return samplers; diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedISFUtils.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedISFUtils.hpp index 9b9d3b0862..7cfa08b677 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedISFUtils.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedISFUtils.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -9,13 +10,6 @@ namespace score::gfx { -struct Pass -{ - TextureRenderTarget renderTarget; - Pipeline p; - QRhiBuffer* processUBO{}; -}; - struct PersistSampler { QRhiSampler* sampler{}; diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedRawRasterPipelineNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedRawRasterPipelineNode.cpp index 5d8893466e..4dd58d4aed 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedRawRasterPipelineNode.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedRawRasterPipelineNode.cpp @@ -1,14 +1,61 @@ +#include +#include +#include #include #include +#include #include #include #include +#include +#include +#include + +#include + +#include +#include namespace score::gfx { +static const constexpr auto rrp_blit_vs = R"_(#version 450 +layout(location = 0) in vec2 position; +layout(location = 1) in vec2 texcoord; +layout(location = 0) out vec2 v_texcoord; + +layout(std140, binding = 0) uniform renderer_t { + mat4 clipSpaceCorrMatrix; + vec2 renderSize; +} renderer; + +out gl_PerVertex { vec4 gl_Position; }; + +void main() +{ + v_texcoord = texcoord; + gl_Position = renderer.clipSpaceCorrMatrix * vec4(position.xy, 0.0, 1.); +#if defined(QSHADER_HLSL) || defined(QSHADER_MSL) + gl_Position.y = - gl_Position.y; +#endif +} +)_"; + +static const constexpr auto rrp_blit_fs = R"_(#version 450 +layout(std140, binding = 0) uniform renderer_t { + mat4 clipSpaceCorrMatrix; + vec2 renderSize; +} renderer; + +layout(binding = 3) uniform sampler2D blitTexture; +layout(location = 0) in vec2 v_texcoord; +layout(location = 0) out vec4 fragColor; + +void main() { fragColor = texture(blitTexture, v_texcoord); } +)_"; + RenderedRawRasterPipelineNode::RenderedRawRasterPipelineNode( const ISFNode& node) noexcept : score::gfx::NodeRenderer{node} @@ -16,7 +63,7 @@ RenderedRawRasterPipelineNode::RenderedRawRasterPipelineNode( { } -void RenderedRawRasterPipelineNode::updateInputTexture(const Port& input, QRhiTexture* tex) +void RenderedRawRasterPipelineNode::updateInputTexture(const Port& input, QRhiTexture* tex, QRhiTexture* depthTex) { // Find which image-type sampler index this port corresponds to int sampler_idx = 0; @@ -25,20 +72,97 @@ void RenderedRawRasterPipelineNode::updateInputTexture(const Port& input, QRhiTe if(p == &input) break; if(p->type == Types::Image) + { sampler_idx++; + if((p->flags & Flag::SamplableDepth) == Flag::SamplableDepth) + sampler_idx++; + } } + // Match key for replaceTexture MUST be the sampler that's actually + // in the SRB binding. allSamplers() (line ~155-170) substitutes + // m_inputSamplerOverrides[i] for m_inputSamplers[i] when an + // override is present (per-bucket sampler from ScenePreprocessor). + // Same fix as commit 7d1afd27b applied to FIX-C — see the long + // comment there. Without this updateInputTexture silently no-ops on + // every override-bound entry. + auto srbKey = [&](int i) -> QRhiSampler* { + if(i >= 0 && i < (int)m_inputSamplerOverrides.size() + && m_inputSamplerOverrides[i]) + return m_inputSamplerOverrides[i]; + return m_inputSamplers[i].sampler; + }; + if(sampler_idx < (int)m_inputSamplers.size()) { auto& sampl = m_inputSamplers[sampler_idx]; if(sampl.texture != tex) { sampl.texture = tex; + auto* key = srbKey(sampler_idx); for(auto& [e, pass] : m_passes) if(pass.p.srb) - score::gfx::replaceTexture(*pass.p.srb, sampl.sampler, tex); + score::gfx::replaceTexture(*pass.p.srb, key, tex); + } + + if(depthTex + && (input.flags & Flag::SamplableDepth) == Flag::SamplableDepth + && sampler_idx + 1 < (int)m_inputSamplers.size()) + { + auto& depthSampl = m_inputSamplers[sampler_idx + 1]; + if(depthSampl.texture != depthTex) + { + depthSampl.texture = depthTex; + auto* depthKey = srbKey(sampler_idx + 1); + for(auto& [e, pass] : m_passes) + if(pass.p.srb) + score::gfx::replaceTexture(*pass.p.srb, depthKey, depthTex); + } + } + } +} + +QRhiTexture* RenderedRawRasterPipelineNode::textureForOutput(const Port& output) +{ + if(!m_hasMRT) + return nullptr; + + // Find which output port index this is + const auto& outputs = n.descriptor().outputs; + for(int i = 0; i < (int)n.output.size() && i < (int)outputs.size(); i++) + { + if(n.output[i] == &output) + { + // Depth outputs expose the depth attachment directly. With + // EXECUTION_MODEL: PER_LAYER on a depth target this is the + // multi-layer Texture2DArray populated layer-by-layer via the + // scratch+copy dance in runInitialPasses; for single-layer + // depth shaders (shadow_map.frag) it's the plain 2D depth + // texture. Either way, downstream wires it through + // SceneResourceRoute(ShadowMapArray) into scene_state. + if(outputs[i].type == "depth") + return m_mrtRenderTarget.depthTexture; + + // Color output: index 0 = primary texture, 1+ = additional + int colorIdx = 0; + for(int j = 0; j < i; j++) + if(outputs[j].type != "depth") + colorIdx++; + + // CUBEMAP + MULTIVIEW shim: the public handle is the CubeMap, + // not the shadow TextureArray that we actually render into. + // Consumers bind this as samplerCube without knowing about the + // array-then-copy dance happening under the hood. + if(colorIdx == m_cubeCopyOutputIdx && m_cubeCopyCube) + return m_cubeCopyCube; + + if(colorIdx == 0) + return m_mrtRenderTarget.texture; + else if(colorIdx - 1 < (int)m_mrtRenderTarget.additionalColorTextures.size()) + return m_mrtRenderTarget.additionalColorTextures[colorIdx - 1]; } } + return nullptr; } std::vector RenderedRawRasterPipelineNode::allSamplers() const noexcept @@ -46,6 +170,21 @@ std::vector RenderedRawRasterPipelineNode::allSamplers() const noexcept // Input ports std::vector samplers = m_inputSamplers; + // Apply non-owning per-port sampler overrides published by upstream + // geometry's auxiliary_texture::sampler_handle (e.g., the per-bucket + // QRhiSampler from ScenePreprocessor's per-glTF-texture sampler + // config). The override is applied only on the SRB-build copy here; + // m_inputSamplers itself keeps its original (owning) sampler so + // release() can `delete sampler.sampler` without freeing a registry- + // owned sampler. + const std::size_t n_overrides + = std::min(samplers.size(), m_inputSamplerOverrides.size()); + for(std::size_t i = 0; i < n_overrides; ++i) + { + if(m_inputSamplerOverrides[i]) + samplers[i].sampler = m_inputSamplerOverrides[i]; + } + // Audio textures samplers.insert(samplers.end(), m_audioSamplers.begin(), m_audioSamplers.end()); @@ -53,7 +192,8 @@ std::vector RenderedRawRasterPipelineNode::allSamplers() const noexcept } void RenderedRawRasterPipelineNode::initPass( - const TextureRenderTarget& renderTarget, RenderList& renderer, Edge& edge) + const TextureRenderTarget& renderTarget, RenderList& renderer, + QRhiResourceUpdateBatch& res, Edge& edge) { auto& model_passes = n.descriptor().passes; SCORE_ASSERT(model_passes.size() == 1); @@ -63,7 +203,6 @@ void RenderedRawRasterPipelineNode::initPass( QRhiBuffer* pubo{}; pubo = rhi.newBuffer( QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, sizeof(ProcessUBO)); - qWarning() << "RRP ALLOC [processUBO] size=" << sizeof(ProcessUBO); pubo->setName("RenderedRawRasterPipelineNode::initPass::pubo"); pubo->create(); @@ -86,22 +225,57 @@ void RenderedRawRasterPipelineNode::initPass( ossia::small_vector additionalBindings; + // INPUTS storage trio (storage_input SSBO / csf_image_input image2D / + // uniform_input UBO) — order MUST match isf_emit_graphics_storage's + // GLSL emission (declaration order, sequential bindings starting at + // max_binding == 3 + samplers count). + { + auto extras = buildExtraBindings(m_storage); + for(const auto& b : extras) + { + additionalBindings.push_back(b); + max_binding++; + } + } + for(auto& aux : m_auxiliarySSBOs) { - // If no buffer yet, create a small dummy so the descriptor set is valid + // If no buffer yet, create a small dummy so the descriptor set is valid. + // Dummy usage flag matches the aux kind so the created buffer can be + // bound as the intended descriptor type. if(!aux.buffer) { - auto* dummy = rhi.newBuffer( - QRhiBuffer::Immutable, QRhiBuffer::StorageBuffer, 16); - dummy->setName("RRP_aux_dummy"); + auto usage = aux.is_uniform ? QRhiBuffer::UniformBuffer + : QRhiBuffer::StorageBuffer; + const int64_t dummySize = aux.is_uniform ? 256 : 16; + auto* dummy = rhi.newBuffer(QRhiBuffer::Immutable, usage, dummySize); + dummy->setName(aux.is_uniform ? "RRP_ubo_dummy" : "RRP_aux_dummy"); dummy->create(); aux.buffer = dummy; - aux.size = 16; + aux.size = dummySize; aux.owned = true; } + // Persistent ping-pong pair: emit the read-only _prev binding + // FIRST (binding N), then the writable binding (binding N+1). + // GLSL emission uses the same ordering. + if(aux.persistent && aux.prev_buffer) + { + additionalBindings.push_back( + QRhiShaderResourceBinding::bufferLoad( + max_binding, bindingStages, aux.prev_buffer)); + aux.prev_binding = max_binding; + max_binding++; + } + QRhiShaderResourceBinding binding; - if(aux.access == "read_only") + if(aux.is_uniform) + { + // uniform_input → std140 UBO binding + binding = QRhiShaderResourceBinding::uniformBuffer( + max_binding, bindingStages, aux.buffer); + } + else if(aux.access == "read_only") binding = QRhiShaderResourceBinding::bufferLoad( max_binding, bindingStages, aux.buffer); else if(aux.access == "write_only") @@ -112,6 +286,36 @@ void RenderedRawRasterPipelineNode::initPass( max_binding, bindingStages, aux.buffer); additionalBindings.push_back(binding); + aux.binding = max_binding; // remember slot for per-sub-mesh patching + max_binding++; + } + + // Auxiliary texture / storage-image bindings: placed right after + // aux SSBOs, matching GLSL emission order. Dispatch on is_storage + // so TYPE:"image" gets sampledTexture and TYPE:"storage_image" + // gets imageLoad / imageStore / imageLoadStore per `access`. + for(auto& ats : m_auxTextureSamplers) + { + QRhiShaderResourceBinding b; + if(ats.is_storage) + { + if(ats.access == "read_only") + b = QRhiShaderResourceBinding::imageLoad( + max_binding, bindingStages, ats.texture, 0); + else if(ats.access == "write_only") + b = QRhiShaderResourceBinding::imageStore( + max_binding, bindingStages, ats.texture, 0); + else + b = QRhiShaderResourceBinding::imageLoadStore( + max_binding, bindingStages, ats.texture, 0); + } + else + { + b = QRhiShaderResourceBinding::sampledTexture( + max_binding, bindingStages, ats.texture, ats.sampler); + } + additionalBindings.push_back(b); + ats.binding = max_binding; max_binding++; } @@ -142,19 +346,73 @@ void RenderedRawRasterPipelineNode::initPass( } ps->setSampleCount(pipelineSamples); - m_mesh->preparePipeline(*ps); - - // Override topology and blend after preparePipeline, - // since the mesh may set its own defaults (e.g. CSF geometry outputs as points) - QRhiGraphicsPipeline::TargetBlend premulAlphaBlend; - premulAlphaBlend.enable = mat.enable_blend; - premulAlphaBlend.srcColor = mat.src_color; - premulAlphaBlend.dstColor = mat.dst_color; - premulAlphaBlend.opColor = mat.op_color; - premulAlphaBlend.srcAlpha = mat.src_alpha; - premulAlphaBlend.dstAlpha = mat.dst_alpha; - premulAlphaBlend.opAlpha = mat.op_alpha; - ps->setTargetBlends({premulAlphaBlend}); + // Procedural draws (VERTEX_INPUTS: [] + VERTEX_COUNT) don't need + // a mesh — skip preparePipeline (no vertex-input layout bindings + // to set). + if(m_mesh) + m_mesh->preparePipeline(*ps); + + // Compute effective pipeline state: the descriptor's PIPELINE_STATE (if + // any) wins over the legacy material-UBO-driven blend. When no state is + // declared (empty pipeline_state) we keep the legacy behaviour: blending + // driven by the material's runtime-editable blend UI + hardcoded depth + // test/write. This preserves bit-exact output for existing shaders. + const auto& desc = n.m_descriptor; + const bool hasDescriptorState = stateAffectsPipeline(desc.default_state); + + if(hasDescriptorState) + { + // New path: pipeline_state drives blend/depth/cull/stencil. Seed the + // legacy material-UBO-driven blend on every attachment first so that + // a partial PIPELINE_STATE declaration (e.g. just CULL_MODE) doesn't + // silently lose the runtime blend UI's effect; applyPipelineState only + // overrides blend when BLEND was explicitly declared. + QRhiGraphicsPipeline::TargetBlend seededBlend; + seededBlend.enable = mat.enable_blend; + seededBlend.srcColor = mat.src_color; + seededBlend.dstColor = mat.dst_color; + seededBlend.opColor = mat.op_color; + seededBlend.srcAlpha = mat.src_alpha; + seededBlend.dstAlpha = mat.dst_alpha; + seededBlend.opAlpha = mat.op_alpha; + QList seedBlends; + for(int i = 0; i < std::max(1, renderTarget.colorAttachmentCount()); i++) + seedBlends.append(seededBlend); + ps->setTargetBlends(seedBlends.begin(), seedBlends.end()); + ps->setDepthTest(true); + ps->setDepthWrite(true); + // Reverse-Z project rule (applyPipelineState overrides only if the + // shader explicitly declares depth_compare). + ps->setDepthOp(QRhiGraphicsPipeline::Greater); + + const bool depthAvailable + = (renderTarget.depthTexture != nullptr) + || (renderTarget.depthRenderBuffer != nullptr) + || (renderTarget.msDepthTexture != nullptr); + applyPipelineState( + *ps, desc.default_state, renderTarget.colorAttachmentCount(), + depthAvailable, /*wantsDepthByDefault=*/true); + } + else + { + // Legacy path: blend from material UBO, depth hardcoded on. + QRhiGraphicsPipeline::TargetBlend premulAlphaBlend; + premulAlphaBlend.enable = mat.enable_blend; + premulAlphaBlend.srcColor = mat.src_color; + premulAlphaBlend.dstColor = mat.dst_color; + premulAlphaBlend.opColor = mat.op_color; + premulAlphaBlend.srcAlpha = mat.src_alpha; + premulAlphaBlend.dstAlpha = mat.dst_alpha; + premulAlphaBlend.opAlpha = mat.op_alpha; + ps->setTargetBlends({premulAlphaBlend}); + + ps->setDepthTest(true); + ps->setDepthWrite(true); + // Reverse-Z project rule. + ps->setDepthOp(QRhiGraphicsPipeline::Greater); + } + + // Topology is always runtime-controllable via the material UBO. switch(mat.mode) { default: @@ -170,25 +428,29 @@ void RenderedRawRasterPipelineNode::initPass( } // Remap vertex inputs by semantic: match shader input variable names - // to geometry attribute semantics. - if(auto* geom = m_mesh->semanticGeometry()) + // to geometry attribute semantics. Honour explicit SEMANTIC overrides + // declared on VERTEX_INPUTS in the descriptor (CSF-style). Skip for + // procedural draws (no mesh, no attributes to remap). + // + // The fallback-aware overload resolves "REQUIRED: false" inputs + // missing from upstream geometry to a shared PerInstance identity + // buffer from the RenderList's pool. When no inputs opted in, the + // plan is empty and the draw path short-circuits with zero cost. + FallbackBindingPlan fallbackPlan; + if(m_mesh) { - if(!remapPipelineVertexInputs(*ps, v, *geom)) + if(auto* geom = m_mesh->semanticGeometry()) { - qDebug() << "RawRaster::initPass: remapPipelineVertexInputs FAILED"; - delete ps; - delete pubo; - return; + if(!remapPipelineVertexInputs( + *ps, v, *geom, n.descriptor(), + rhi, renderer.vertexFallbackPool(), res, fallbackPlan)) + { + delete ps; + delete pubo; + return; + } } - qDebug() << "RawRaster::initPass: remapPipelineVertexInputs OK"; } - else - { - qDebug() << "RawRaster::initPass: no semanticGeometry"; - } - - ps->setDepthTest(true); - ps->setDepthWrite(true); ps->setShaderStages({{QRhiShaderStage::Vertex, v}, {QRhiShaderStage::Fragment, s}}); @@ -207,7 +469,9 @@ void RenderedRawRasterPipelineNode::initPass( Pipeline pip = {ps, bindings}; if(pip.pipeline) { - m_passes.emplace_back(&edge, Pass{renderTarget, pip, pubo}); + Pass pass{renderTarget, pip, pubo}; + pass.fallback_bindings = std::move(fallbackPlan); + m_passes.emplace_back(&edge, std::move(pass)); } else { @@ -220,201 +484,1966 @@ void RenderedRawRasterPipelineNode::initPass( } } -void RenderedRawRasterPipelineNode::init( +void RenderedRawRasterPipelineNode::initMRTPass( RenderList& renderer, QRhiResourceUpdateBatch& res) { QRhi& rhi = *renderer.state.rhi; + const auto& outputs = n.descriptor().outputs; + + // Tear down any state left from a previous init pass. `update` calls + // `m_mrtRenderTarget.release()` before hitting us again, but it's not + // responsible for our private per-mip / per-face RT pool or the + // CUBEMAP+MULTIVIEW shim's separate cube handle. Without these drops + // the pool would grow unboundedly across re-inits and, worse, + // m_mipRTs entries would point at a shadow array that's already been + // freed — the next beginPass on one of those stale RTs triggers a + // driver-level crash in CmdBeginRenderPass (NVIDIA specifically). + for(auto& e : m_mipRTs) + { + if(e.renderTarget) + e.renderTarget->deleteLater(); + if(e.renderPass) + e.renderPass->deleteLater(); + if(e.depth) + e.depth->deleteLater(); + } + m_mipRTs.clear(); + m_mipCount = 0; + + // PerLayer depth-path resources. The color path's per-layer RTs are + // owned by m_mipRTs (cleared above); the shared scratch depth + RT + // used by the depth path live outside m_mipRTs and must be dropped + // explicitly here. m_perLayerOutputDepthArray aliases depthTex (owned + // by m_mrtRenderTarget) so it just gets nulled out. + if(m_perLayerSharedRT) + { + m_perLayerSharedRT->deleteLater(); + m_perLayerSharedRT = nullptr; + } + if(m_perLayerSharedRP) + { + m_perLayerSharedRP->deleteLater(); + m_perLayerSharedRP = nullptr; + } + if(m_perLayerScratchDepth) + { + m_perLayerScratchDepth->deleteLater(); + m_perLayerScratchDepth = nullptr; + } + if(m_perLayerDummyColor) + { + m_perLayerDummyColor->deleteLater(); + m_perLayerDummyColor = nullptr; + } + m_perLayerOutputDepthArray = nullptr; + m_perLayerOutputIndex = -1; + m_perLayerIsDepth = false; - // Create the mesh + if(m_cubeCopyCube) { - if(geometry.meshes) + m_cubeCopyCube->deleteLater(); + m_cubeCopyCube = nullptr; + } + // m_cubeCopyShadowArray is a pointer into m_mrtRenderTarget's + // attachments; it's freed by m_mrtRenderTarget.release() in update(). + m_cubeCopyShadowArray = nullptr; + m_cubeCopyOutputIdx = -1; + + // Per-invocation UBO+SRB pool — rebuilt below against the fresh + // main SRB once the pipeline is re-created. Leaking these across + // re-inits would point old SRBs at freed buffers (same failure + // mode as the stale mip RTs above). + for(auto* ubo : m_perInvocationUBOs) + if(ubo) ubo->deleteLater(); + m_perInvocationUBOs.clear(); + for(auto* srb : m_perInvocationSRBs) + if(srb) srb->deleteLater(); + m_perInvocationSRBs.clear(); + + // Target size resolution: honour OUTPUTS.WIDTH / HEIGHT (integer + // literal or string expression) when declared; otherwise fall back + // to the renderer's render-size. A RAW_RASTER_PIPELINE shader has + // one shared render pass, so all attachments end up at the same + // size — pick the first OUTPUT with an explicit size as the RT + // size. Mixing sized and unsized outputs is fine (unsized ones + // just inherit); mixing differing explicit sizes is a shader- + // author error we don't diagnose here. + QSize sz = renderer.state.renderSize; + // First non-zero explicit WIDTH/HEIGHT wins. Depth outputs participate + // too: shadow_cascades.frag (depth-only, no colour outputs at all) + // declares the shadow-map resolution on its depth output, and we want + // that to drive the RT size rather than falling through to renderSize. + for(const auto& out : outputs) + { + int w = out.width_expression.empty() + ? out.width + : resolveIntExpression(out.width_expression, 0); + int h = out.height_expression.empty() + ? out.height + : resolveIntExpression(out.height_expression, 0); + if(w > 0 && h > 0) { - std::tie(m_mesh, m_meshbufs) - = renderer.acquireMesh(geometry, res, m_mesh, m_meshbufs); + sz = QSize(w, h); + break; } + } + + // EXECUTION_MODEL resolution. Matters before allocation because + // PER_MIP forces a MipMapped flag on the target output's texture, + // PER_CUBE_FACE forces a CubeMap flag. Manual / Single have no + // effect on allocation — they only influence the render loop in + // runInitialPasses(). + { + const auto& em = n.descriptor().execution_model; + std::string et = em.type; + for(auto& c : et) + c = (char)std::toupper((unsigned char)c); + if(et == "PER_MIP") + m_executionMode = ExecutionMode::PerMip; + else if(et == "PER_CUBE_FACE") + m_executionMode = ExecutionMode::PerCubeFace; + else if(et == "PER_LAYER") + m_executionMode = ExecutionMode::PerLayer; + else if(et == "MANUAL") + m_executionMode = ExecutionMode::Manual; else + m_executionMode = ExecutionMode::Single; + + m_perMipOutputIndex = -1; + m_perCubeFaceOutputIndex = -1; + m_perLayerOutputIndex = -1; + m_perLayerIsDepth = false; + const bool needsTarget = m_executionMode == ExecutionMode::PerMip + || m_executionMode == ExecutionMode::PerCubeFace + || m_executionMode == ExecutionMode::PerLayer; + if(needsTarget && !em.target.empty()) { - if(m_mesh) + // PER_MIP / PER_CUBE_FACE only make sense on colour outputs (depth + // attachments don't have mip chains in our pipeline, and cube + // depth would need a separate code path). PER_LAYER allows either: + // colour TextureArray (setLayer attachment) or depth TextureArray + // (scratch + copy strategy). Walk the raw outputs[] for PER_LAYER + // so depth entries are included; keep the colour-only walk for the + // other two modes. + if(m_executionMode == ExecutionMode::PerLayer) { - if(m_meshbufs.buffers.empty()) + for(int i = 0; i < (int)outputs.size(); ++i) { - m_meshbufs = renderer.initMeshBuffer(*m_mesh, res); + if(outputs[i].name == em.target) + { + m_perLayerOutputIndex = i; + m_perLayerIsDepth = (outputs[i].type == "depth"); + break; + } + } + } + else + { + int colorIdx = 0; + for(const auto& out : outputs) + { + if(out.type == "depth") + continue; + if(out.name == em.target) + { + if(m_executionMode == ExecutionMode::PerMip) + m_perMipOutputIndex = colorIdx; + else + m_perCubeFaceOutputIndex = colorIdx; + break; + } + ++colorIdx; } } + const bool resolved + = (m_executionMode == ExecutionMode::PerMip + && m_perMipOutputIndex >= 0) + || (m_executionMode == ExecutionMode::PerCubeFace + && m_perCubeFaceOutputIndex >= 0) + || (m_executionMode == ExecutionMode::PerLayer + && m_perLayerOutputIndex >= 0); + if(!resolved) + { + qWarning() << "RawRaster EXECUTION_MODEL=" << et.c_str() + << ": TARGET" << QString::fromStdString(em.target) + << "not found among outputs — falling back to SINGLE"; + m_executionMode = ExecutionMode::Single; + } } - } - // Create the material UBO - m_materialSize = n.m_materialSize; - if(m_materialSize > 0) - { - m_materialUBO - = rhi.newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, m_materialSize); - qWarning() << "RRP ALLOC [materialUBO] size=" << m_materialSize; - m_materialUBO->setName("RenderedRawRasterPipelineNode::init::m_materialUBO"); - SCORE_ASSERT(m_materialUBO->create()); + // PER_CUBE_FACE + MULTIVIEW on the same shader is redundant: + // multiview already amplifies one draw into 6 face writes, so + // iterating per face would collapse back to the same 6 writes. + // Warn and disable the per-face loop — the cube-copy shim + // (CUBEMAP + MULTIVIEW) handles everything downstream. + if(m_executionMode == ExecutionMode::PerCubeFace + && n.descriptor().multiview_count >= 2) + { + qWarning() + << "RawRaster EXECUTION_MODEL=PER_CUBE_FACE + MULTIVIEW:" + << n.descriptor().multiview_count + << "is redundant. Multiview already amplifies one draw to" + " N faces; PER_CUBE_FACE is for the explicit 6-pass path" + " without multiview. Disabling PER_CUBE_FACE."; + m_executionMode = ExecutionMode::Single; + m_perCubeFaceOutputIndex = -1; + } } - m_modelUBO - = rhi.newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, sizeof(float[16])); - qWarning() << "RRP ALLOC [modelUBO] size=" << sizeof(float[16]); - m_modelUBO->setName("RenderedRawRasterPipelineNode::init::m_modelUBO"); - SCORE_ASSERT(m_modelUBO->create()); - - // Create the samplers - SCORE_ASSERT(m_passes.empty()); - SCORE_ASSERT(m_inputSamplers.empty()); - SCORE_ASSERT(m_audioSamplers.empty()); - - m_inputSamplers = initInputSamplers(this->n, renderer, n.input); - - m_audioSamplers = initAudioTextures(renderer, n.m_audio_textures); - - // Initialize auxiliary SSBOs from descriptor + // Layered / multiview detection — same shape as SimpleRenderedISFNode. + // `LAYERS: N` on any OUTPUT → N-layer texture array; `MULTIVIEW: N` on + // the descriptor → single-draw-writes-N-views (requires caps.multiview). + // Consumer shaders like `prefilter_ggx.frag` / `irradiance_convolve.frag` + // / `shadow_cascades.frag` all rely on this plumbing to land their + // outputs on the right cubemap face / cascade slice. + int maxLayers = 1; + for(const auto& out : outputs) + if(out.layers > maxLayers) + maxLayers = out.layers; + const int mvCount = n.descriptor().multiview_count; + const bool wantMultiview + = mvCount >= 2 && renderer.state.caps.multiview; + if(wantMultiview && mvCount > maxLayers) + maxLayers = mvCount; + + // MSAA uniform across colour attachments — pick the max SAMPLES declared + // by any OUTPUT and apply it to the render pass. Allocated textures stay + // single-sample and serve as MSAA resolve targets (see SimpleRenderedISF + // initMRTPass for the full rationale). + int mrtSamples = std::max(renderer.samples(), 1); + for(const auto& out : outputs) + mrtSamples = std::max(mrtSamples, out.samples); + + // Allocate colour + depth textures per declared OUTPUT. Unknown / empty + // FORMAT falls back to RGBA8 (colour) or D32F (depth). `type: "depth"` + // skips the standard depth-renderbuffer path and uses this texture as + // the depth attachment — required for shadow-map passes that want to + // sample the depth array downstream. + std::vector colorTextures; + QRhiTexture* depthTex = nullptr; + + // Resolve the colour-attachment index of the PER_MIP / PER_CUBE_FACE + // target up-front (walk order matches the colorTextures[] we're + // about to build) so the allocation pass can OR in the matching + // flag only for that texture. + const int perMipColorIdx + = (m_executionMode == ExecutionMode::PerMip) ? m_perMipOutputIndex + : -1; + const int perCubeFaceColorIdx + = (m_executionMode == ExecutionMode::PerCubeFace) + ? m_perCubeFaceOutputIndex + : -1; + int colorAllocIdx = 0; + // Reset the cube-copy shim state; (re)assigned below when an output + // with CUBEMAP:true + MULTIVIEW:N is encountered. + m_cubeCopyOutputIdx = -1; + m_cubeCopyShadowArray = nullptr; + m_cubeCopyCube = nullptr; + + for(const auto& out : outputs) { - const auto& desc = n.descriptor(); - m_auxiliarySSBOs.clear(); - m_auxiliarySSBOs.reserve(desc.auxiliary.size()); - for(const auto& aux : desc.auxiliary) + if(out.type == "depth") { - AuxiliarySSBO ssbo; - ssbo.name = aux.name; - ssbo.access = aux.access; - - // Try to find a matching auxiliary buffer from upstream geometry - if(geometry.meshes && !geometry.meshes->meshes.empty()) + auto depthFmt = score::gfx::parseOutputFormat(out.format, QRhiTexture::D32F); + QRhiTexture::Flags dflags = QRhiTexture::RenderTarget; + if(maxLayers > 1) { - const auto& mesh = geometry.meshes->meshes[0]; - if(auto* geo_aux = mesh.find_auxiliary(ssbo.name)) - { - if(geo_aux->buffer >= 0 && geo_aux->buffer < (int)mesh.buffers.size()) - { - const auto& geo_buf = mesh.buffers[geo_aux->buffer]; - if(auto* gpu = ossia::get_if(&geo_buf.data)) - { - if(gpu->handle) - { - ssbo.buffer = static_cast(gpu->handle); - ssbo.size = geo_aux->byte_size > 0 ? geo_aux->byte_size : gpu->byte_size; - ssbo.owned = false; - } - } - else if(auto* cpu = ossia::get_if(&geo_buf.data)) - { - if(cpu->raw_data && cpu->byte_size > 0) - { - int64_t sz = geo_aux->byte_size > 0 ? geo_aux->byte_size : cpu->byte_size; - auto* buf = rhi.newBuffer( - QRhiBuffer::Immutable, QRhiBuffer::StorageBuffer, sz); - buf->setName(QByteArray("RRP_aux_") + ssbo.name.c_str()); - buf->create(); - res.uploadStaticBuffer(buf, 0, sz, cpu->raw_data.get()); - - ssbo.buffer = buf; - ssbo.size = sz; - ssbo.owned = true; - } - } - } - } + dflags |= QRhiTexture::TextureArray; + depthTex = rhi.newTextureArray(depthFmt, maxLayers, sz, 1, dflags); + } + else + { + depthTex = rhi.newTexture(depthFmt, sz, 1, dflags); + } + depthTex->setName( + ("RenderedRawRasterPipelineNode::MRT::depth::" + out.name).c_str()); + SCORE_ASSERT(depthTex->create()); + } + else + { + auto fmt = score::gfx::parseOutputFormat(out.format, QRhiTexture::RGBA8); + QRhiTexture::Flags flags + = QRhiTexture::RenderTarget | QRhiTexture::UsedWithLoadStore; + const int layers + = std::max({1, out.layers, (wantMultiview ? mvCount : 1), + (out.is_cubemap ? 6 : 1)}); + // PER_MIP: flag the target output so QRhi allocates the full mip + // chain. Downstream consumers that care about the mips (prefilter + // sampling keyed on roughness) need them, and the per-mip render + // targets built below attach individual levels. + if(colorAllocIdx == perMipColorIdx) + flags |= QRhiTexture::MipMapped; + + // GENERATE_MIPS: MipMapped allocation + UsedWithGenerateMips flag + // so QRhi's generateMips() can filter the base level into the + // sub-mips at end-of-frame. Orthogonal to PER_MIP (which provides + // shader-authored per-mip content) — we just need the storage + // shape + the capability bit. + if(out.generate_mips) + flags |= QRhiTexture::MipMapped | QRhiTexture::UsedWithGenerateMips; + QRhiTexture* tex = nullptr; + + // Transparent CUBEMAP + MULTIVIEW path. QRhi forbids multiview on + // a cube texture (qrhi.cpp:2561-2565), so we render into a + // `UsedAsTransferSource`-tagged 2D TextureArray (what multiview + // accepts) and stamp a separate CubeMap alongside for downstream + // sampling. After the render pass ends we copyTexture each array + // layer into the matching cube face — downstream sees a real + // samplerCube without the shader having to know about it. + // Only one output gets the cube-copy treatment in this first cut + // (multiview already amortises 6× render amplification for free). + const bool wantCubeCopy + = out.is_cubemap && wantMultiview && m_cubeCopyOutputIdx < 0; + + // PER_CUBE_FACE target: allocate as a real CubeMap (6 implicit + // layers). setLayer(face) per per-face render target drives each + // loop iteration. Mutually exclusive with the multiview-cube-copy + // shim above: PER_CUBE_FACE assumes you want the 6-pass behaviour + // explicitly; multiview would collapse the 6 passes back into 1. + const bool useCubeDirect + = (colorAllocIdx == perCubeFaceColorIdx) + || (out.is_cubemap && !wantMultiview); + + if(wantCubeCopy) + { + // Cubemaps must have square faces in QRhi / Vulkan (CUBE_COMPATIBLE + // images require extent.width == extent.height). When the render + // target size is non-square (typical window aspect), the cube we + // hand downstream would otherwise be non-cubemap-compatible and + // produce stripe-like artefacts from the copy/sample stride + // mismatch. Force the cube face to min(w, h); the shadow array is + // sized to match so the multiview draw writes the full face. + const int face_edge = std::min(sz.width(), sz.height()); + const QSize cubeSz(face_edge, face_edge); + + // The rendered-to shadow array. Multiview-compatible shape, square + // (matches the cube). UsedAsTransferSource so it can be a + // copyTexture source. + QRhiTexture::Flags arrayFlags = flags | QRhiTexture::TextureArray + | QRhiTexture::UsedAsTransferSource; + tex = rhi.newTextureArray(fmt, 6, cubeSz, 1, arrayFlags); + tex->setName( + ("RRPNode::MRT::cubeCopyArray::" + out.name).c_str()); + SCORE_ASSERT(tex->create()); + m_cubeCopyShadowArray = tex; + + // The downstream-visible cube. Same format, no RenderTarget + // flag (we never render into it directly, only copy). Default + // access is sampled/transfer-dst — enough for the classic + // consumer path (samplerCube). MipMapped is forwarded so a + // future prefilter chain can be generated downstream if the + // user also requested it on this output. UsedWithGenerateMips + // lets the end-of-frame generateMips() hit the public cube + // (the shadow array isn't sampled downstream so it doesn't + // need the flag itself). + QRhiTexture::Flags cubeFlags = QRhiTexture::CubeMap; + if(flags & QRhiTexture::MipMapped) + cubeFlags |= QRhiTexture::MipMapped; + if(out.generate_mips) + cubeFlags |= QRhiTexture::UsedWithGenerateMips; + QRhiTexture* cube = rhi.newTexture(fmt, cubeSz, 1, cubeFlags); + cube->setName( + ("RRPNode::MRT::cubeCopyCube::" + out.name).c_str()); + SCORE_ASSERT(cube->create()); + m_cubeCopyCube = cube; + m_cubeCopyOutputIdx = colorAllocIdx; + } + else if(useCubeDirect) + { + flags |= QRhiTexture::CubeMap; + // QRhi: a cubemap is allocated via newTexture (not newTextureArray) + // — its 6 faces are implicit when the CubeMap flag is set. A cube + // array (multiple cubes) would need newTextureArray + CubeMap, but + // we only cover single-cube here. + tex = rhi.newTexture(fmt, sz, 1, flags); + } + else if(layers > 1) + { + flags |= QRhiTexture::TextureArray; + tex = rhi.newTextureArray(fmt, layers, sz, 1, flags); + } + else + { + tex = rhi.newTexture(fmt, sz, 1, flags); } - m_auxiliarySSBOs.push_back(std::move(ssbo)); + if(!wantCubeCopy) + { + tex->setName( + ("RRPNode::MRT::color::" + out.name).c_str()); + SCORE_ASSERT(tex->create()); + } + colorTextures.push_back(tex); + ++colorAllocIdx; } } - if(!m_mesh) - return; - - // Create the passes - for(Edge* edge : n.output[0]->edges) + // Render-target variant picked from the shape of the declared outputs. + // Raw Raster always ships with depth test/write (3D geometry invariant), + // so on the common colour-only path we still synthesise a depth target + // if the shader didn't declare one explicitly. + if(colorTextures.empty() && depthTex) { - auto rt = renderer.renderTargetForOutput(*edge); - if(rt.renderTarget) + // Depth-only shader (e.g. shadow_cascades.frag). Build the RT AROUND the + // node-owned depth texture (possibly a TextureArray) instead of letting + // the helper allocate one and then deleting it while the render pass + // still references it (use-after-free + never-rendered output texture). + m_mrtRenderTarget = createDepthOnlyRenderTarget( + renderer.state, depthTex, mrtSamples, /*samplableDepth=*/true); + } + else if(wantMultiview && !colorTextures.empty()) + { + // Allocate depth for the multiview RT if the shader didn't declare + // one — createMultiViewRenderTarget expects a matching layered depth + // or nullptr. Layered depth is cheaper and Vulkan-correct for MV. + if(!depthTex) { - initPass(rt, renderer, *edge); + depthTex = rhi.newTextureArray( + QRhiTexture::D32F, mvCount, sz, 1, + QRhiTexture::RenderTarget | QRhiTexture::TextureArray); + depthTex->setName( + "RenderedRawRasterPipelineNode::MRT::depthTextureArray (D32F)"); + SCORE_ASSERT(depthTex->create()); } + // Attach ALL color textures so attachments == pipeline blend targets. + m_mrtRenderTarget = createMultiViewRenderTarget( + renderer.state, + std::span{colorTextures.data(), colorTextures.size()}, + mvCount, depthTex, mrtSamples); } -} - -bool RenderedRawRasterPipelineNode::updateMaterials( - RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) -{ - bool mustRecreatePasses = false; - // Update audio textures - if(!n.m_audio_textures.empty() && !m_audioTex) + else if(maxLayers > 1 && !colorTextures.empty()) { - m_audioTex.emplace(); + // Layered but not multiview — render to layer 0 by default; downstream + // per-pass LAYER selection (once PASSES loop lands) will pick others. + // Attach ALL color textures so attachments == pipeline blend targets. + m_mrtRenderTarget = createLayeredRenderTarget( + renderer.state, + std::span{colorTextures.data(), colorTextures.size()}, + 0, depthTex, mrtSamples); + } + else if(!colorTextures.empty()) + { + // Plain MRT path — single-sample 2D textures, renderbuffer depth if + // the shader didn't ask for a samplable depth OUTPUT. + if(depthTex) + { + m_mrtRenderTarget = createRenderTarget( + renderer.state, + std::span{ + colorTextures.data(), colorTextures.size()}, + depthTex, mrtSamples); + } + else + { + m_mrtRenderTarget.texture = colorTextures[0]; + for(std::size_t i = 1; i < colorTextures.size(); i++) + m_mrtRenderTarget.additionalColorTextures.push_back(colorTextures[i]); + + QList attachments; + for(auto* tex : colorTextures) + attachments.append(QRhiColorAttachment(tex)); + + QRhiTextureRenderTargetDescription desc; + desc.setColorAttachments(attachments.begin(), attachments.end()); + + // Reverse-Z project rule: D32F float depth. D24 + reverse-Z is strictly + // worse than standard-Z. Stencil dropped (unused elsewhere). + m_mrtRenderTarget.depthTexture = rhi.newTexture( + QRhiTexture::D32F, sz, renderer.samples(), + QRhiTexture::RenderTarget); + m_mrtRenderTarget.depthTexture->setName( + "RenderedRawRasterPipelineNode::MRT::depthTexture (D32F)"); + SCORE_ASSERT(m_mrtRenderTarget.depthTexture->create()); + desc.setDepthTexture(m_mrtRenderTarget.depthTexture); + + auto* renderTarget = rhi.newTextureRenderTarget(desc); + renderTarget->setName("RenderedRawRasterPipelineNode::MRT::renderTarget"); + SCORE_ASSERT(renderTarget); + + auto* renderPass = renderTarget->newCompatibleRenderPassDescriptor(); + renderPass->setName("RenderedRawRasterPipelineNode::MRT::renderPass"); + SCORE_ASSERT(renderPass); + + renderTarget->setRenderPassDescriptor(renderPass); + SCORE_ASSERT(renderTarget->create()); + + m_mrtRenderTarget.renderTarget = renderTarget; + m_mrtRenderTarget.renderPass = renderPass; + } + } + else + { + return; } - bool audioChanged = false; - for(auto& audio : n.m_audio_textures) + // PER_CUBE_FACE: build one render target per cube face, each + // attaching the same cube texture via setLayer(i). Mirrors the + // PER_MIP path structurally (iteration over a fixed axis with a + // distinct per-iteration RT) but with a CubeMap target instead of + // a MipMapped one. m_mipRTs reused as storage (semantics: index = + // face in this mode, mip level in PER_MIP mode). MUTUALLY EXCLUSIVE + // with PER_MIP — PER_CUBE_FACE_MIP would require a 2D iteration + // and isn't supported here; compose via external looping if needed. + if(m_executionMode == ExecutionMode::PerCubeFace + && m_perCubeFaceOutputIndex >= 0 && !colorTextures.empty()) { - if(std::optional sampl - = m_audioTex->updateAudioTexture(audio, renderer, n.m_material_data.get(), res)) + QRhiTexture* targetTex + = (m_perCubeFaceOutputIndex == 0) + ? m_mrtRenderTarget.texture + : (m_perCubeFaceOutputIndex - 1 + < (int)m_mrtRenderTarget.additionalColorTextures.size() + ? m_mrtRenderTarget.additionalColorTextures + [m_perCubeFaceOutputIndex - 1] + : nullptr); + + if(targetTex) { - // Texture changed -> material changed - audioChanged = true; + m_mipCount = 6; // m_mipCount stores invocation count for the loop + m_mipRTs.reserve(6); + const QSize faceSize = targetTex->pixelSize(); - auto& [rhiSampler, tex] = *sampl; - for(auto& [e, pass] : m_passes) + for(int face = 0; face < 6; ++face) { - score::gfx::replaceTexture( - *pass.p.srb, rhiSampler, tex ? tex : &renderer.emptyTexture()); + QRhiColorAttachment color(targetTex); + color.setLayer(face); + // No multiview here: PER_CUBE_FACE opts into per-pass cube + // rendering explicitly. Multiview + cubemap is forbidden by + // QRhi anyway. + + QRhiTexture* faceDepth = rhi.newTexture( + QRhiTexture::D32F, faceSize, 1, QRhiTexture::RenderTarget); + faceDepth->setName( + ("RRPNode::MRT::perCubeFaceDepth::" + + std::to_string(face)) + .c_str()); + SCORE_ASSERT(faceDepth->create()); + + QRhiTextureRenderTargetDescription faceDesc; + faceDesc.setColorAttachments({color}); + faceDesc.setDepthTexture(faceDepth); + + auto* faceRT = rhi.newTextureRenderTarget(faceDesc); + faceRT->setName( + ("RRPNode::MRT::perCubeFaceRT::" + + std::to_string(face)) + .c_str()); + auto* faceRP = faceRT->newCompatibleRenderPassDescriptor(); + faceRP->setName( + ("RRPNode::MRT::perCubeFaceRP::" + + std::to_string(face)) + .c_str()); + faceRT->setRenderPassDescriptor(faceRP); + SCORE_ASSERT(faceRT->create()); + + MipRT entry; + entry.renderTarget = faceRT; + entry.renderPass = faceRP; + entry.depth = faceDepth; + m_mipRTs.push_back(entry); } } - } - - // Update material - if(m_materialUBO && m_materialSize > 0 && (materialChanged || audioChanged)) - { - char* data = n.m_material_data.get(); - SCORE_ASSERT(m_materialSize >= size_of_pipeline_material); - if(std::memcmp(data, this->m_prevPipelineChangingMaterial, size_of_pipeline_material) - != 0) + else { - mustRecreatePasses = true; - std::copy_n(data, size_of_pipeline_material, this->m_prevPipelineChangingMaterial); + qWarning() << "RawRaster EXECUTION_MODEL=PER_CUBE_FACE: could not " + "resolve target texture — falling back to SINGLE"; + m_executionMode = ExecutionMode::Single; } - res.updateDynamicBuffer(m_materialUBO, 0, m_materialSize, data); } - return mustRecreatePasses; -} - -void RenderedRawRasterPipelineNode::update( - RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) -{ - // Update node materials. This must be before any initial return, - // otherwise we miss the materialsChanged - bool mustRecreatePasses = updateMaterials(renderer, res, edge); - bool recreateDueToMaterial = mustRecreatePasses; - - // Update the geometry (sync with ModelDisplayNode) - if(this->geometryChanged) + // PER_MIP: build one render target per mip level of the target output, + // each attaching that specific level via setLevel(i). The draw loop in + // runInitialPasses() iterates these in order, injecting the mip index + // via ProcessUBO.passIndex. Multiview propagates: when the shader + // declared MULTIVIEW:6 (irradiance / prefilter cube case), each mip's + // attachment also carries setMultiViewCount(6) so one draw writes all + // six faces of that mip. Depth is a per-mip single-sample D32F to + // keep the pipeline's render-pass contract consistent across levels. + if(m_executionMode == ExecutionMode::PerMip && m_perMipOutputIndex >= 0 + && !colorTextures.empty()) { - if(geometry.meshes) + QRhiTexture* targetTex + = (m_perMipOutputIndex == 0) + ? m_mrtRenderTarget.texture + : (m_perMipOutputIndex - 1 + < (int)m_mrtRenderTarget.additionalColorTextures.size() + ? m_mrtRenderTarget.additionalColorTextures + [m_perMipOutputIndex - 1] + : nullptr); + + if(targetTex) { - const Mesh* prevMesh = m_mesh; - std::tie(m_mesh, m_meshbufs) - = renderer.acquireMesh(geometry, res, m_mesh, m_meshbufs); - - this->meshChangedIndex = this->m_mesh->dirtyGeometryIndex; + QSize baseSize = targetTex->pixelSize(); + int mipCount = 1; + { + int s = std::min(baseSize.width(), baseSize.height()); + while(s > 1) + { + s >>= 1; + ++mipCount; + } + } + m_mipCount = mipCount; + m_mipRTs.reserve(mipCount); -#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0) - // Check for standalone indirect draw buffer from Buffer input ports - if(!m_meshbufs.useIndirectDraw) + for(int i = 0; i < mipCount; ++i) { - for(auto* port : n.input) + QSize mipSize( + std::max(1, baseSize.width() >> i), + std::max(1, baseSize.height() >> i)); + + QRhiColorAttachment color(targetTex); + color.setLevel(i); +#if QT_VERSION >= QT_VERSION_CHECK(6, 7, 0) + if(wantMultiview) + color.setMultiViewCount(mvCount); +#endif + + // Depth must match multiview shape: a plain 2D texture as the + // depth attachment against a multiview color attachment fails + // QRhi's render-pass compat check. Allocate a layered depth for + // the multiview case, plain 2D otherwise. Each mip gets its own + // depth because the attachment size must match the colour + // attachment's mip-i pixel size. + QRhiTexture* mipDepth = nullptr; + if(wantMultiview) { - if(port->type == Types::Buffer && !port->edges.empty()) - { - auto bv = renderer.bufferForInput(*port->edges.front()); - if(bv.usage == BufferView::Usage::IndirectDraw) - { - m_meshbufs.indirectDrawBuffer = bv.handle; - m_meshbufs.useIndirectDraw = true; - m_meshbufs.indirectDrawIndexed = false; + mipDepth = rhi.newTextureArray( + QRhiTexture::D32F, mvCount, mipSize, 1, + QRhiTexture::RenderTarget | QRhiTexture::TextureArray); + } + else + { + mipDepth = rhi.newTexture( + QRhiTexture::D32F, mipSize, 1, QRhiTexture::RenderTarget); + } + mipDepth->setName( + ("RenderedRawRasterPipelineNode::MRT::perMipDepth::" + + std::to_string(i)) + .c_str()); + SCORE_ASSERT(mipDepth->create()); + + QRhiTextureRenderTargetDescription mipDesc; + mipDesc.setColorAttachments({color}); + mipDesc.setDepthTexture(mipDepth); + + auto* mipRT = rhi.newTextureRenderTarget(mipDesc); + mipRT->setName( + ("RenderedRawRasterPipelineNode::MRT::perMipRT::" + + std::to_string(i)) + .c_str()); + auto* mipRP = mipRT->newCompatibleRenderPassDescriptor(); + mipRP->setName( + ("RenderedRawRasterPipelineNode::MRT::perMipRP::" + + std::to_string(i)) + .c_str()); + mipRT->setRenderPassDescriptor(mipRP); + SCORE_ASSERT(mipRT->create()); + + MipRT entry; + entry.renderTarget = mipRT; + entry.renderPass = mipRP; + entry.depth = mipDepth; + m_mipRTs.push_back(entry); + } + } + else + { + qWarning() << "RawRaster EXECUTION_MODEL=PER_MIP: could not resolve " + "target texture — falling back to SINGLE"; + m_executionMode = ExecutionMode::Single; + } + } + + // PER_LAYER: build one render target per layer of the target output's + // TextureArray (or copy strategy for depth targets — see below). The + // draw loop in runInitialPasses() iterates them in order, injecting + // the layer index via ProcessUBO.passIndex. Drives shadow_cascades. + // + // Two paths depending on target type: + // + // - COLOR target: same shape as PER_CUBE_FACE with a variable layer + // count. m_mipRTs holds N entries, each with QRhiColorAttachment + // bound via setLayer(i). Per-layer 2D depth (one D32F per slice) + // keeps the render-pass attachment shapes consistent. + // + // - DEPTH target: Qt RHI 6.11 has no per-layer depth-attachment API + // (QRhiTextureRenderTargetDescription::setDepthTexture takes a + // QRhiTexture* with no layer overload). We render to a single + // shared scratch 2D D32F and copy it into layer i of the OUTPUT + // depth array after each iteration's endPass. The scratch is + // UsedAsTransferSource so the per-iteration copyTexture works. + if(m_executionMode == ExecutionMode::PerLayer && m_perLayerOutputIndex >= 0) + { + const auto& targetOut = outputs[m_perLayerOutputIndex]; + const int layerCount = std::max(1, targetOut.layers); + + if(m_perLayerIsDepth) + { + // depthTex is the OUTPUT array (allocated as Texture2DArray + // earlier when maxLayers > 1). m_perLayerOutputDepthArray + // aliases it for the post-pass copy destination. + if(depthTex && layerCount > 1) + { + m_perLayerOutputDepthArray = depthTex; + + const auto depthFmt = depthTex->format(); + m_perLayerScratchDepth = rhi.newTexture( + depthFmt, sz, 1, + QRhiTexture::RenderTarget | QRhiTexture::UsedAsTransferSource); + m_perLayerScratchDepth->setName( + ("RRPNode::MRT::perLayerScratch::" + targetOut.name).c_str()); + SCORE_ASSERT(m_perLayerScratchDepth->create()); + + // Mirror createDepthOnlyRenderTarget's attachment shape so the + // pipeline (created against m_mrtRenderTarget.renderPass, which + // came from createDepthOnlyRenderTarget) is render-pass- + // compatible with our shared RT. That helper attaches a 1×1 + // dummy RGBA8 color alongside the depth — required by GLES + // backends and harmless on desktop. We allocate our own dummy + // (rather than borrowing m_mrtRenderTarget.dummyColorTexture, + // whose lifetime is owned by m_mrtRenderTarget) so the shared + // RT here owns a self-contained set of attachments. + m_perLayerDummyColor = rhi.newTexture( + QRhiTexture::RGBA8, QSize(1, 1), 1, QRhiTexture::RenderTarget); + m_perLayerDummyColor->setName( + ("RRPNode::MRT::perLayerDummyColor::" + targetOut.name).c_str()); + SCORE_ASSERT(m_perLayerDummyColor->create()); + + QRhiTextureRenderTargetDescription scratchDesc; + { + QRhiColorAttachment color0(m_perLayerDummyColor); + scratchDesc.setColorAttachments({color0}); + } + scratchDesc.setDepthTexture(m_perLayerScratchDepth); + + m_perLayerSharedRT = rhi.newTextureRenderTarget(scratchDesc); + m_perLayerSharedRT->setName( + ("RRPNode::MRT::perLayerSharedRT::" + targetOut.name).c_str()); + m_perLayerSharedRP + = m_perLayerSharedRT->newCompatibleRenderPassDescriptor(); + m_perLayerSharedRP->setName( + ("RRPNode::MRT::perLayerSharedRP::" + targetOut.name).c_str()); + m_perLayerSharedRT->setRenderPassDescriptor(m_perLayerSharedRP); + SCORE_ASSERT(m_perLayerSharedRT->create()); + + m_mipCount = layerCount; // reuse for invocation count + } + else + { + qDebug() + << "RawRaster EXECUTION_MODEL=PER_LAYER: depth target" + << QString::fromStdString(targetOut.name) + << "needs LAYERS > 1 — falling back to SINGLE"; + m_executionMode = ExecutionMode::Single; + } + } + else + { + // Color path. Resolve the colour-attachment index from the raw + // outputs[] index (depth entries don't take a colour slot). + int colorIdx = 0; + for(int j = 0; j < m_perLayerOutputIndex; ++j) + if(outputs[j].type != "depth") + ++colorIdx; + + QRhiTexture* targetTex + = (colorIdx == 0) + ? m_mrtRenderTarget.texture + : (colorIdx - 1 + < (int)m_mrtRenderTarget.additionalColorTextures.size() + ? m_mrtRenderTarget.additionalColorTextures[colorIdx - 1] + : nullptr); + + if(targetTex && layerCount > 1) + { + const QSize layerSize = targetTex->pixelSize(); + m_mipCount = layerCount; + m_mipRTs.reserve(layerCount); + + for(int layer = 0; layer < layerCount; ++layer) + { + QRhiColorAttachment color(targetTex); + color.setLayer(layer); + + // Per-layer 2D depth — same rationale as PER_CUBE_FACE: depth + // attachment size must match the colour attachment, and a + // layered depth here would force multi-view shape against a + // single-layer colour binding. + QRhiTexture* layerDepth = rhi.newTexture( + QRhiTexture::D32F, layerSize, 1, QRhiTexture::RenderTarget); + layerDepth->setName( + ("RRPNode::MRT::perLayerDepth::" + std::to_string(layer)) + .c_str()); + SCORE_ASSERT(layerDepth->create()); + + QRhiTextureRenderTargetDescription layerDesc; + layerDesc.setColorAttachments({color}); + layerDesc.setDepthTexture(layerDepth); + + auto* layerRT = rhi.newTextureRenderTarget(layerDesc); + layerRT->setName( + ("RRPNode::MRT::perLayerRT::" + std::to_string(layer)) + .c_str()); + auto* layerRP = layerRT->newCompatibleRenderPassDescriptor(); + layerRP->setName( + ("RRPNode::MRT::perLayerRP::" + std::to_string(layer)) + .c_str()); + layerRT->setRenderPassDescriptor(layerRP); + SCORE_ASSERT(layerRT->create()); + + MipRT entry; + entry.renderTarget = layerRT; + entry.renderPass = layerRP; + entry.depth = layerDepth; + m_mipRTs.push_back(entry); + } + } + else + { + qDebug() + << "RawRaster EXECUTION_MODEL=PER_LAYER: colour target" + << QString::fromStdString(targetOut.name) + << "needs LAYERS > 1 and a resolved texture — falling back" + " to SINGLE"; + m_executionMode = ExecutionMode::Single; + } + } + } + + // Create the pipeline + QRhiBuffer* pubo = rhi.newBuffer( + QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, sizeof(ProcessUBO)); + pubo->setName("RenderedRawRasterPipelineNode::initMRTPass::pubo"); + pubo->create(); + + try + { + auto [v, s] = score::gfx::makeShaders(renderer.state, n.m_vertexS, n.m_fragmentS); + + auto& mat + = *reinterpret_cast(m_prevPipelineChangingMaterial); + + int max_binding = 3; + auto samplers = allSamplers(); + if(!samplers.empty()) + max_binding += samplers.size(); + + // Build additional bindings: auxiliary SSBOs + model UBO + const auto bindingStages = QRhiShaderResourceBinding::StageFlag::VertexStage + | QRhiShaderResourceBinding::StageFlag::FragmentStage; + + ossia::small_vector additionalBindings; + + // INPUTS storage trio (storage_input SSBO / csf_image_input image2D / + // uniform_input UBO) — order MUST match isf_emit_graphics_storage's + // GLSL emission (declaration order, sequential bindings starting at + // max_binding == 3 + samplers count). + { + auto extras = buildExtraBindings(m_storage); + for(const auto& b : extras) + { + additionalBindings.push_back(b); + max_binding++; + } + } + + for(auto& aux : m_auxiliarySSBOs) + { + // Dummy usage flag matches the aux kind so the created buffer can be + // bound as the intended descriptor type (UBO for uniform_input, SSBO + // otherwise). Mirrors the non-MRT path. + if(!aux.buffer) + { + auto usage = aux.is_uniform ? QRhiBuffer::UniformBuffer + : QRhiBuffer::StorageBuffer; + const int64_t dummySize = aux.is_uniform ? 256 : 16; + auto* dummy = rhi.newBuffer(QRhiBuffer::Immutable, usage, dummySize); + dummy->setName(aux.is_uniform ? "RRP_ubo_dummy" : "RRP_aux_dummy"); + dummy->create(); + aux.buffer = dummy; + aux.size = dummySize; + aux.owned = true; + } + + // Persistent ping-pong: _prev (readonly) goes first. + if(aux.persistent && aux.prev_buffer) + { + additionalBindings.push_back( + QRhiShaderResourceBinding::bufferLoad( + max_binding, bindingStages, aux.prev_buffer)); + aux.prev_binding = max_binding; + max_binding++; + } + + QRhiShaderResourceBinding binding; + if(aux.is_uniform) + { + // uniform_input → std140 UBO binding + binding = QRhiShaderResourceBinding::uniformBuffer( + max_binding, bindingStages, aux.buffer); + } + else if(aux.access == "read_only") + binding = QRhiShaderResourceBinding::bufferLoad( + max_binding, bindingStages, aux.buffer); + else if(aux.access == "write_only") + binding = QRhiShaderResourceBinding::bufferStore( + max_binding, bindingStages, aux.buffer); + else + binding = QRhiShaderResourceBinding::bufferLoadStore( + max_binding, bindingStages, aux.buffer); + + additionalBindings.push_back(binding); + aux.binding = max_binding; // remember slot for per-sub-mesh patching + max_binding++; + } + + // Auxiliary texture / storage-image bindings (MRT path). Same + // is_storage dispatch as the non-MRT site. + for(auto& ats : m_auxTextureSamplers) + { + QRhiShaderResourceBinding b; + if(ats.is_storage) + { + if(ats.access == "read_only") + b = QRhiShaderResourceBinding::imageLoad( + max_binding, bindingStages, ats.texture, 0); + else if(ats.access == "write_only") + b = QRhiShaderResourceBinding::imageStore( + max_binding, bindingStages, ats.texture, 0); + else + b = QRhiShaderResourceBinding::imageLoadStore( + max_binding, bindingStages, ats.texture, 0); + } + else + { + b = QRhiShaderResourceBinding::sampledTexture( + max_binding, bindingStages, ats.texture, ats.sampler); + } + additionalBindings.push_back(b); + ats.binding = max_binding; + max_binding++; + } + + additionalBindings.push_back(QRhiShaderResourceBinding::uniformBuffer( + max_binding, bindingStages, m_modelUBO)); + + auto bindings = createDefaultBindings( + renderer, m_mrtRenderTarget, pubo, m_materialUBO, allSamplers(), + std::span( + additionalBindings.data(), additionalBindings.size())); + + auto ps = rhi.newGraphicsPipeline(); + ps->setName("RenderedRawRasterPipelineNode::initMRTPass::ps"); + SCORE_ASSERT(ps); + + const int rtSamples = m_mrtRenderTarget.sampleCount(); + const int pipelineSamples = (rtSamples > 0) ? rtSamples : renderer.samples(); + ps->setSampleCount(pipelineSamples); + + // Multiview: activate the matching view count on the pipeline so that + // `gl_ViewIndex` in the shader actually picks up the per-view state + // (mat4[] viewProjection etc., emitted by the ISF layer). Must match + // the color attachment's setMultiViewCount set in + // createMultiViewRenderTarget above. +#if QT_VERSION >= QT_VERSION_CHECK(6, 7, 0) + if(wantMultiview) + ps->setMultiViewCount(mvCount); +#endif + + // preparePipeline sets the vertex-input layout from the mesh's + // attributes. Skip for procedural draws (VERTEX_INPUTS: []): the + // pipeline has no vertex bindings and the draw uses gl_VertexIndex. + if(m_mesh) + m_mesh->preparePipeline(*ps); + + const auto& desc = n.m_descriptor; + const bool hasDescriptorState = stateAffectsPipeline(desc.default_state); + + if(hasDescriptorState) + { + // Seed legacy material-UBO blend on every attachment first; applyPipelineState + // only overrides BLEND when the shader explicitly declares it. + QRhiGraphicsPipeline::TargetBlend seededBlend; + seededBlend.enable = mat.enable_blend; + seededBlend.srcColor = mat.src_color; + seededBlend.dstColor = mat.dst_color; + seededBlend.opColor = mat.op_color; + seededBlend.srcAlpha = mat.src_alpha; + seededBlend.dstAlpha = mat.dst_alpha; + seededBlend.opAlpha = mat.op_alpha; + QList seedBlends; + for(int i = 0; i < std::max(1, m_mrtRenderTarget.colorAttachmentCount()); i++) + seedBlends.append(seededBlend); + ps->setTargetBlends(seedBlends.begin(), seedBlends.end()); + ps->setDepthTest(true); + ps->setDepthWrite(true); + // Reverse-Z project rule (applyPipelineState overrides only if the + // shader explicitly declares depth_compare). + ps->setDepthOp(QRhiGraphicsPipeline::Greater); + + const bool depthAvailable + = (m_mrtRenderTarget.depthTexture != nullptr) + || (m_mrtRenderTarget.depthRenderBuffer != nullptr) + || (m_mrtRenderTarget.msDepthTexture != nullptr); + applyPipelineState( + *ps, desc.default_state, m_mrtRenderTarget.colorAttachmentCount(), + depthAvailable, /*wantsDepthByDefault=*/true); + } + else + { + // Legacy: material-UBO-driven blend, hardcoded depth. + QRhiGraphicsPipeline::TargetBlend premulAlphaBlend; + premulAlphaBlend.enable = mat.enable_blend; + premulAlphaBlend.srcColor = mat.src_color; + premulAlphaBlend.dstColor = mat.dst_color; + premulAlphaBlend.opColor = mat.op_color; + premulAlphaBlend.srcAlpha = mat.src_alpha; + premulAlphaBlend.dstAlpha = mat.dst_alpha; + premulAlphaBlend.opAlpha = mat.op_alpha; + + QList blends; + for(int i = 0; i < m_mrtRenderTarget.colorAttachmentCount(); i++) + blends.append(premulAlphaBlend); + ps->setTargetBlends(blends.begin(), blends.end()); + + ps->setDepthTest(true); + ps->setDepthWrite(true); + // Reverse-Z project rule. + ps->setDepthOp(QRhiGraphicsPipeline::Greater); + } + + switch(mat.mode) + { + default: + case 0: + ps->setTopology(QRhiGraphicsPipeline::Triangles); + break; + case 1: + ps->setTopology(QRhiGraphicsPipeline::Points); + break; + case 2: + ps->setTopology(QRhiGraphicsPipeline::Lines); + break; + } + + // Remap vertex inputs by semantic (CSF-style; honour explicit + // SEMANTIC). Procedural draws have no vertex inputs to remap — skip. + // Same fallback-aware path as initPass — "REQUIRED: false" inputs + // missing upstream land on a pooled identity buffer. + FallbackBindingPlan fallbackPlan; + if(m_mesh) + { + if(auto* geom = m_mesh->semanticGeometry()) + { + if(!remapPipelineVertexInputs( + *ps, v, *geom, n.descriptor(), + rhi, renderer.vertexFallbackPool(), res, fallbackPlan)) + { + qWarning() << "RawRaster::initMRTPass: remapPipelineVertexInputs FAILED"; + delete ps; + delete pubo; + return; + } + } + } + + ps->setShaderStages({{QRhiShaderStage::Vertex, v}, {QRhiShaderStage::Fragment, s}}); + ps->setShaderResourceBindings(bindings); + + SCORE_ASSERT(m_mrtRenderTarget.renderPass); + ps->setRenderPassDescriptor(m_mrtRenderTarget.renderPass); + + if(!ps->create()) + { + qDebug() << "Warning! MRT Pipeline not created"; + delete ps; + ps = nullptr; + } + + Pipeline pip = {ps, bindings}; + if(pip.pipeline) + { + // nullptr edge — MRT passes are shared across all output edges + Pass pass{m_mrtRenderTarget, pip, pubo}; + pass.fallback_bindings = std::move(fallbackPlan); + m_passes.emplace_back(nullptr, std::move(pass)); + } + else + { + delete pubo; + } + } + catch(...) + { + delete pubo; + } +} + +void RenderedRawRasterPipelineNode::initMRTBlitPass( + RenderList& renderer, QRhiResourceUpdateBatch& res, Edge& edge) +{ + QRhiTexture* srcTex = textureForOutput(*edge.source); + if(!srcTex) + return; + + auto rt = renderer.renderTargetForOutput(edge); + if(!rt.renderTarget) + return; + + auto [vertexS, fragmentS] = score::gfx::makeShaders(renderer.state, rrp_blit_vs, rrp_blit_fs); + + QRhiSampler* sampler = renderer.state.rhi->newSampler( + QRhiSampler::Linear, QRhiSampler::Linear, QRhiSampler::None, + QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge); + sampler->setName("RenderedRawRasterPipelineNode::MRT::blitSampler"); + sampler->create(); + m_blitSamplersByEdge[&edge] = sampler; + + auto pip = score::gfx::buildPipeline( + renderer, *m_blitMesh, vertexS, fragmentS, rt, nullptr, nullptr, + std::array{Sampler{sampler, srcTex}}); + + if(pip.pipeline) + { + m_passes.emplace_back(&edge, Pass{rt, pip, nullptr}); + } + else + { + m_blitSamplersByEdge.erase(&edge); + delete sampler; + } +} + +void RenderedRawRasterPipelineNode::initMRTBlitPasses( + RenderList& renderer, QRhiResourceUpdateBatch& res) +{ + // For each output port, create a blit pass for each downstream edge + for(auto* output_port : n.output) + { + for(Edge* edge : output_port->edges) + { + initMRTBlitPass(renderer, res, *edge); + } + } +} + +void RenderedRawRasterPipelineNode::initState( + RenderList& renderer, QRhiResourceUpdateBatch& res) +{ + QRhi& rhi = *renderer.state.rhi; + + // Create the mesh + { + if(geometry.meshes) + { + std::tie(m_mesh, m_meshbufs) + = renderer.acquireMesh(geometry, res, m_mesh, m_meshbufs); + m_meshbufs.gpuIndirectSupported = renderer.state.caps.drawIndirect; + } + else + { + if(m_mesh) + { + if(m_meshbufs.buffers.empty()) + { + m_meshbufs = renderer.initMeshBuffer(*m_mesh, res); + m_meshbufs.gpuIndirectSupported = renderer.state.caps.drawIndirect; + } + } + } + } + + // Create the material UBO + m_materialSize = n.m_materialSize; + if(m_materialSize > 0) + { + m_materialUBO + = rhi.newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, m_materialSize); + m_materialUBO->setName("RenderedRawRasterPipelineNode::init::m_materialUBO"); + SCORE_ASSERT(m_materialUBO->create()); + if(n.m_material_data) + res.updateDynamicBuffer(m_materialUBO, 0, m_materialSize, n.m_material_data.get()); + } + + m_modelUBO + = rhi.newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, sizeof(float[16])); + m_modelUBO->setName("RenderedRawRasterPipelineNode::init::m_modelUBO"); + SCORE_ASSERT(m_modelUBO->create()); + + // Create the samplers + SCORE_ASSERT(m_passes.empty()); + SCORE_ASSERT(m_inputSamplers.empty()); + SCORE_ASSERT(m_audioSamplers.empty()); + + m_inputSamplers = initInputSamplers(this->n, renderer, n.input, &n.descriptor()); + + // Build the auxiliary-texture binding table and seed initial texture + // pointers from the incoming geometry. Walks desc.inputs parallel to + // n.input and m_inputSamplers, recording a (sampler_idx, name) pair + // for every image-style INPUT that might be served by a geometry aux + // texture. update() re-runs the lookup whenever the geometry changes + // so rebuilt / grown channel arrays flow through without a cable. + bindAuxTexturesInit(renderer); + + m_audioSamplers = initAudioTextures(renderer, n.m_audio_textures); + + // Initialize auxiliary SSBOs from descriptor + { + const auto& desc = n.descriptor(); + m_auxiliarySSBOs.clear(); + m_auxiliarySSBOs.reserve(desc.auxiliary.size() + desc.inputs.size()); + + // Resolve a buffer for `ssbo` by looking up its name in the first + // incoming geometry's auxiliary_buffer list. Used for the scene-aware + // wiring where the upstream ScenePreprocessor publishes scene_lights / + // scene_materials / per_draw as named aux buffers travelling with the + // geometry edge. + auto try_bind_from_geometry = [&](AuxiliarySSBO& ssbo) { + if(!geometry.meshes || geometry.meshes->meshes.empty()) + return; + const auto& mesh = geometry.meshes->meshes[0]; + auto* geo_aux = mesh.find_auxiliary(ssbo.name); + if(!geo_aux || geo_aux->buffer < 0 + || geo_aux->buffer >= (int)mesh.buffers.size()) + return; + const auto& geo_buf = mesh.buffers[geo_aux->buffer]; + if(auto* gpu = ossia::get_if(&geo_buf.data)) + { + if(!gpu->handle) + return; + ssbo.buffer = static_cast(gpu->handle); + ssbo.size = geo_aux->byte_size > 0 ? geo_aux->byte_size : gpu->byte_size; + ssbo.owned = false; + } + else if(auto* cpu = ossia::get_if(&geo_buf.data)) + { + if(!cpu->raw_data || cpu->byte_size <= 0) + return; + int64_t sz = geo_aux->byte_size > 0 ? geo_aux->byte_size : cpu->byte_size; + // Usage flag must match the aux kind — binding a StorageBuffer- + // only buffer as a uniform block (or vice versa) is rejected by + // the Vulkan validation layer. + const auto usage = ssbo.is_uniform ? QRhiBuffer::UniformBuffer + : QRhiBuffer::StorageBuffer; + auto* buf = rhi.newBuffer(QRhiBuffer::Immutable, usage, sz); + buf->setName(QByteArray("RRP_aux_") + ssbo.name.c_str()); + buf->create(); + res.uploadStaticBuffer(buf, 0, sz, cpu->raw_data.get()); + ssbo.buffer = buf; + ssbo.size = sz; + ssbo.owned = true; + } + }; + + // Resolve a buffer for `ssbo` by scanning the connected input port's + // edges for an upstream producer (CSF storage output, ExtractBuffer2, + // ScenePreprocessor aux extractors, ...). Upstream renderers publish + // their output buffer through the virtual NodeRenderer::bufferForOutput() + // — Port::value is never written for buffer-typed outputs — so the + // retrieval goes through RenderList::bufferForInput(edge). + // + // Complements try_bind_from_geometry: an INPUTS-declared storage_input/ + // uniform_input may be wired through a dedicated Buffer edge instead of + // riding along with the geometry. Mirrors + // IsfBindingsBuilder::bindUpstreamBuffers, which SimpleRenderedISFNode + // uses for non-RawRaster shaders. + auto try_bind_from_input_port = [&](AuxiliarySSBO& ssbo) { + if(ssbo.input_port_index < 0 + || ssbo.input_port_index >= (int)n.input.size()) + return; + Port* port = n.input[ssbo.input_port_index]; + if(!port || port->type != Types::Buffer) + return; + for(Edge* edge : port->edges) + { + if(!edge || !edge->source) + continue; + if(edge->source->type != Types::Buffer) + continue; + auto view = renderer.bufferForInput(*edge); + if(!view.handle) + continue; + ssbo.buffer = view.handle; + if(ssbo.size <= 0) + ssbo.size = view.handle->size(); + ssbo.owned = false; + break; + } + }; + + // Compute the byte size required by a LAYOUT. Used when we need to + // own the buffer (persistent aux). Flexible array members use `size` + // as the element count (falls back to 1 if unspecified). + auto aux_owned_size = [](const isf::geometry_input::auxiliary_request& aux) -> int64_t { + int64_t total = 0; + int64_t arr_elem_bytes = 0; + for(const auto& f : aux.layout) + { + auto bracket = f.type.find('['); + std::string base = (bracket == std::string::npos) ? f.type : f.type.substr(0, bracket); + int64_t sz = 0; + if(base == "float" || base == "int" || base == "uint") sz = 4; + else if(base == "vec2" || base == "ivec2" || base == "uvec2") sz = 8; + else if(base == "vec3" || base == "ivec3" || base == "uvec3") sz = 16; // std430 pads + else if(base == "vec4" || base == "ivec4" || base == "uvec4") sz = 16; + else if(base == "mat4") sz = 64; + else if(base == "mat3") sz = 48; + else sz = 16; // conservative default for unknown types / structs + if(bracket != std::string::npos) + { + // Flexible array (`name[]`) — size comes from SIZE expression. + arr_elem_bytes = sz; + } + else + { + total += sz; + } + } + int64_t count = 1; + if(!aux.size.empty()) + { + try { count = std::max(1, std::stoll(aux.size)); } + catch(const std::exception& e) { + count = 1024; // TODO: evaluate $USER when we add it + qWarning() << "RenderedRawRasterPipelineNode: aux SSBO size" + << aux.size.c_str() << "could not be parsed (" << e.what() + << "); falling back to 1024."; + } + } + else if(arr_elem_bytes > 0) + { + qWarning() << "RenderedRawRasterPipelineNode: aux SSBO has element size but no count;" + " falling back to 1024."; + count = 1024; + } + return total + arr_elem_bytes * count; + }; + + // Top-level AUXILIARY textures: allocate one QRhiSampler per sampled + // entry (storage-image entries don't need a sampler — imageLoad / + // imageStore don't take one), seed with a type-appropriate + // placeholder texture. Actual upstream resolution happens in + // rebindAuxTextures() every frame. + for(const auto& atx : desc.auxiliary_textures) + { + AuxTextureAuxSampler ats; + ats.name = atx.name; + ats.is_storage = atx.is_storage; + ats.access = atx.access; + + if(!atx.is_storage) + { + ats.sampler = score::gfx::makeSampler(rhi, atx.sampler); + ats.sampler->setName( + ("RRP_aux_tex_sampler::" + atx.name).c_str()); + } + + // Pick placeholder matching the declared shape. Stored separately + // so rebindAuxTextures can revert to it when upstream stops + // publishing the aux name (otherwise we'd keep the stale upstream + // handle around — UAF waiting to happen when the producer releases + // the texture). + if(atx.is_cubemap) + ats.placeholder = &renderer.emptyTextureCube(); + else if(atx.dimensions == 3) + ats.placeholder = &renderer.emptyTexture3D(); + else if(atx.is_array) + ats.placeholder = &renderer.emptyTextureArray(); + else + ats.placeholder = &renderer.emptyTexture(); + ats.texture = ats.placeholder; + + m_auxTextureSamplers.push_back(std::move(ats)); + } + + // INPUTS storage_input / uniform_input: these have a matching score + // input port created by ISFNode's isf_input_port_vis. We record its + // index so update() can re-pull the upstream buffer if it changes + // (useful when the upstream node's init() runs after ours and only + // publishes its Port::value then). + // + // walk_descriptor_inputs() advances the cumulative port_counts in + // lockstep with isf_input_port_vis (single source of truth — see + // ISFVisitors.hpp). For RawRaster the cursor starts at 1 because + // port 0 is the mandatory Geometry input. + // + // Ordering: GLSL emits desc.inputs first then top-level AUXILIARY, + // so we push AuxiliarySSBOs in the same order — reversing would + // shift every binding index by desc.auxiliary.size() and Vulkan + // would reject the pipeline with "VkDescriptorType mismatch". + const bool isRawRaster = (desc.mode == isf::descriptor::RawRaster); + const port_counts startPC{isRawRaster ? 1 : 0, 0, 0}; + // INPUTS storage_input / csf_image_input / uniform_input are handled by + // IsfBindingsBuilder's m_storage path (allocateStorageResources + + // buildExtraBindings) so the SRB binding type matches what + // isf_emit_graphics_storage emits in GLSL. See `isf.cpp:4073` for the + // GLSL emission and `IsfBindingsBuilder.cpp:417` for the allocation + // path. The previous hand-rolled walker here only handled storage_input + // and uniform_input, silently skipping csf_image_input — the shader + // would emit `image2D NAME at binding=N` while no descriptor was added, + // triggering VUID-VkGraphicsPipelineCreateInfo-layout-07990 on bind. + // + // No-op for INPUTS storage/uniform/csf_image entries — IsfBindingsBuilder + // handles them. We still need the walker for indirect_draw storage_input + // (special-cased at runtime, no SRB binding). + walk_descriptor_inputs( + desc, startPC, + [&](const isf::input& inp, const port_counts&, const port_counts&) { + if(auto* s = ossia::get_if(&inp.data)) + { + if(!s->buffer_usage.empty()) + return; // indirect_draw handled elsewhere + } + // INPUTS storage_input / uniform_input / csf_image_input now flow + // through m_storage (initialised below). All other variants: + // nothing to record here; the canonical walker still advances + // port_idx correctly via `delta`. + }); + + // Now init m_storage from desc.inputs (storage_input + csf_image_input + // + uniform_input). Bindings start at 3 + samplers count to align with + // the GLSL emission order (samplers first in the binding range, then + // INPUTS storage in declaration order via isf_emit_graphics_storage, + // then AUXILIARY storage, then AUXILIARY textures, then model UBO). + if(m_firstStorageBinding < 0) + { + const int firstStorageBinding + = 3 + (int)m_inputSamplers.size() + (int)m_audioSamplers.size(); + m_firstStorageBinding = firstStorageBinding; + collectGraphicsStorageResources(desc, firstStorageBinding, m_storage); + } + ensureStorageResources( + *renderer.state.rhi, res, renderer, desc, m_storage, + renderer.state.renderSize); + bindUpstreamBuffers(renderer, n.input, m_storage); + // Read-only csf_image_input adopts the matching upstream + // auxiliary_texture by name (the storage image an upstream CSF / + // RawRaster published into its out_geo). The auto-allocated + // placeholder is freed inside the helper. The SRB doesn't exist + // yet at init time — patched in update() once the pass is built. + // INPUTS storage_input / uniform_input also name-match against the + // upstream geometry's auxiliary_buffers list — that's how + // ScenePreprocessor publishes scene_lights / world_transforms / + // per_draws / scene_materials / scene_counts / scene_light_indices / + // camera UBO / env UBO into flattened-scene shaders (classic_pbr et al.). + if(geometry.meshes && !geometry.meshes->meshes.empty()) + { + bindUpstreamImagesFromGeometry(m_storage, geometry.meshes->meshes[0]); + bindUpstreamBuffersFromGeometry( + *renderer.state.rhi, res, m_storage, geometry.meshes->meshes[0]); + } + + // Top-level AUXILIARY entries: no corresponding score input port — + // resolved by name from the upstream geometry's auxiliary list. + // Kind dispatch (is_uniform): SSBO → std430 buffer, UBO → std140 + // uniform. The AuxiliarySSBO struct already carries an is_uniform + // flag that downstream allocation / SRB-build sites dispatch on. + // Non-persistent: resolved from the incoming geometry. + // Persistent: node owns a ping-pong pair (SSBO only — UBO + persistent + // is a no-op per the parser's semantic note; this branch is gated on + // !is_uniform). + // + // Ordering: GLSL emits these AFTER all INPUTS bindings, so we push + // them after the INPUTS loop above to keep binding slots aligned + // between shader and SRB. + for(const auto& aux : desc.auxiliary) + { + AuxiliarySSBO ssbo; + ssbo.name = aux.name; + ssbo.access = aux.access; + ssbo.persistent = aux.persistent && !aux.is_uniform; + ssbo.is_uniform = aux.is_uniform; + + if(ssbo.persistent) + { + const int64_t sz = std::max(16, aux_owned_size(aux)); + auto alloc = [&](const char* suffix) -> QRhiBuffer* { + auto* b = rhi.newBuffer( + QRhiBuffer::Static, QRhiBuffer::StorageBuffer, (quint32)sz); + b->setName(QByteArray("RRP_persistent_aux_") + aux.name.c_str() + suffix); + b->create(); + // Zero-initialise so the first frame's readonly _prev reads don't + // hit uninitialised memory. + std::vector zeros(sz, 0); + res.uploadStaticBuffer(b, 0, sz, zeros.data()); + return b; + }; + ssbo.buffer = alloc(""); + ssbo.prev_buffer = alloc("_prev"); + ssbo.size = sz; + ssbo.owned = true; + } + else + { + try_bind_from_geometry(ssbo); + } + + m_auxiliarySSBOs.push_back(std::move(ssbo)); + } + } + + // Determine if we need MRT. MRT is required for anything that + // `initMRTPass` knows how to allocate which the non-MRT single- + // target path can't express: multiple colour attachments, explicit + // depth output, layered / cubemap output, or multiview. Multiview + // specifically needs the MRT path because the RT has a different + // shape from a swap-chain RT. + { + const auto& outputs = n.descriptor().outputs; + int colorCount = 0; + bool hasDepth = false; + bool hasLayered = false; + bool hasCubemap = false; + for(const auto& out : outputs) + { + if(out.type == "depth") + hasDepth = true; + else + ++colorCount; + if(out.layers > 1) + hasLayered = true; + if(out.is_cubemap) + hasCubemap = true; + } + m_hasMRT = colorCount > 1 || hasDepth || hasLayered || hasCubemap + || n.descriptor().multiview_count >= 2; + } + + if(m_hasMRT) + { + // Initialize the blit mesh (default quad) + m_blitMesh = &renderer.defaultQuad(); + if(m_blitMeshbufs.buffers.empty()) + m_blitMeshbufs = renderer.initMeshBuffer(*m_blitMesh, res); + } + + m_initialized = true; +} + +void RenderedRawRasterPipelineNode::addOutputPass( + RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) +{ + // Procedural draws (VERTEX_INPUTS: [] + VERTEX_COUNT) have no + // upstream geometry; m_mesh stays null and the draw call doesn't + // fetch vertex attributes. Don't block MRT setup on the absence + // of a mesh in that case. + if(!m_mesh && !isProceduralDraw()) + return; + + if(m_hasMRT) + { + // Create the shared MRT internal render target on first output edge + if(m_mrtRenderTarget.texture == nullptr) + { + initMRTPass(renderer, res); + } + + // Create the blit pass for this single edge + initMRTBlitPass(renderer, res, edge); + } + else + { + auto rt = renderer.renderTargetForOutput(edge); + if(rt.renderTarget) + { + initPass(rt, renderer, res, edge); + } + } +} + +void RenderedRawRasterPipelineNode::removeOutputPass(RenderList& renderer, Edge& edge) +{ + // Find and erase the pass for this edge + auto it = ossia::find_if(m_passes, [&](auto& p) { return p.first == &edge; }); + if(it != m_passes.end()) + { + it->second.p.release(); + if(it->second.processUBO) + it->second.processUBO->deleteLater(); + m_passes.erase(it); + } + + if(m_hasMRT) + { + // Release the blit sampler for this edge + auto sit = m_blitSamplersByEdge.find(&edge); + if(sit != m_blitSamplersByEdge.end()) + { + delete sit->second; + m_blitSamplersByEdge.erase(sit); + } + + // If no more blit passes remain (only the shared MRT pass with nullptr edge), + // release MRT resources + bool hasBlitPasses = false; + for(auto& [e, pass] : m_passes) + { + if(e != nullptr) + { + hasBlitPasses = true; + break; + } + } + if(!hasBlitPasses) + { + // Remove the shared MRT pass + auto mrtIt = ossia::find_if(m_passes, [](auto& p) { return p.first == nullptr; }); + if(mrtIt != m_passes.end()) + { + mrtIt->second.p.release(); + if(mrtIt->second.processUBO) + mrtIt->second.processUBO->deleteLater(); + m_passes.erase(mrtIt); + } + m_mrtRenderTarget.release(); + } + } +} + +bool RenderedRawRasterPipelineNode::hasOutputPassForEdge(Edge& edge) const +{ + return ossia::find_if(m_passes, [&](const auto& p) { return p.first == &edge; }) + != m_passes.end(); +} + +void RenderedRawRasterPipelineNode::releaseState(RenderList& r) +{ + if(!m_initialized) + return; + + // Release all remaining passes + { + for(auto& texture : n.m_audio_textures) + { + auto it = texture.samplers.find(&r); + if(it != texture.samplers.end()) + { + if(auto tex = it->second.texture) + { + if(tex != &r.emptyTexture()) + tex->deleteLater(); + } + } + } + + for(auto& [edge, pass] : m_passes) + { + pass.p.release(); + + if(pass.processUBO) + { + pass.processUBO->deleteLater(); + } + } + + m_passes.clear(); + } + + for(auto sampler : m_inputSamplers) + { + delete sampler.sampler; + // texture is deleted elsewhere + } + m_inputSamplers.clear(); + // Override entries are non-owning (registry-owned). Just drop the + // pointers — the registry's destroy() will deleteLater the underlying + // QRhiSampler. + m_inputSamplerOverrides.clear(); + for(auto sampler : m_audioSamplers) + { + delete sampler.sampler; + // texture is deleted elsewhere + } + m_audioSamplers.clear(); + for(auto& [edge, sampler] : m_blitSamplersByEdge) + { + delete sampler; + } + m_blitSamplersByEdge.clear(); + + delete m_materialUBO; + m_materialUBO = nullptr; + + delete m_modelUBO; + m_modelUBO = nullptr; + + m_blitMeshbufs = {}; // Freed in RenderList + + for(auto& aux : m_auxiliarySSBOs) + { + if(aux.owned && aux.buffer) + aux.buffer->deleteLater(); + if(aux.owned && aux.prev_buffer) + aux.prev_buffer->deleteLater(); + } + m_auxiliarySSBOs.clear(); + + // INPUTS storage trio (storage_input/csf_image_input/uniform_input) + // — owned by m_storage; release frees the underlying QRhiBuffer/Texture. + m_storage.release(); + m_firstStorageBinding = -1; + + for(auto& ats : m_auxTextureSamplers) + { + if(ats.sampler) + ats.sampler->deleteLater(); + // `texture` is either a renderer-owned placeholder or an upstream- + // geometry-owned handle — we don't own it here. + } + m_auxTextureSamplers.clear(); + + // Release per-mip / per-cube-face render targets. The underlying + // colour texture is owned by m_mrtRenderTarget and freed via its + // release() below — we only drop the per-iteration RT wrappers + + // per-iteration depth textures that we alloc'd here. + for(auto& e : m_mipRTs) + { + if(e.renderTarget) + e.renderTarget->deleteLater(); + if(e.renderPass) + e.renderPass->deleteLater(); + if(e.depth) + e.depth->deleteLater(); + } + m_mipRTs.clear(); + m_mipCount = 0; + m_perMipOutputIndex = -1; + m_perCubeFaceOutputIndex = -1; + + // PerLayer state — same shape as the init-time cleanup in update(). + // Color path is held in m_mipRTs (cleared above); depth path keeps + // its scratch + shared RT outside m_mipRTs. + if(m_perLayerSharedRT) + { + m_perLayerSharedRT->deleteLater(); + m_perLayerSharedRT = nullptr; + } + if(m_perLayerSharedRP) + { + m_perLayerSharedRP->deleteLater(); + m_perLayerSharedRP = nullptr; + } + if(m_perLayerScratchDepth) + { + m_perLayerScratchDepth->deleteLater(); + m_perLayerScratchDepth = nullptr; + } + if(m_perLayerDummyColor) + { + m_perLayerDummyColor->deleteLater(); + m_perLayerDummyColor = nullptr; + } + m_perLayerOutputDepthArray = nullptr; + m_perLayerOutputIndex = -1; + m_perLayerIsDepth = false; + + m_executionMode = ExecutionMode::Single; + + // CUBEMAP + MULTIVIEW shim textures. The shadow TextureArray is + // slotted into m_mrtRenderTarget's colour attachment slot, so + // m_mrtRenderTarget.release() below handles it. The cube, however, + // lives outside m_mrtRenderTarget (it's the public output handle) + // and must be deleteLater'd here. + if(m_cubeCopyCube) + { + m_cubeCopyCube->deleteLater(); + m_cubeCopyCube = nullptr; + } + m_cubeCopyShadowArray = nullptr; // owned via m_mrtRenderTarget + m_cubeCopyOutputIdx = -1; + + // Per-invocation UBO + SRB pool (PerMip / PerCubeFace / Manual). + for(auto* ubo : m_perInvocationUBOs) + if(ubo) ubo->deleteLater(); + m_perInvocationUBOs.clear(); + for(auto* srb : m_perInvocationSRBs) + if(srb) srb->deleteLater(); + m_perInvocationSRBs.clear(); + + // Release MRT render target (textures are owned by us) + if(m_hasMRT) + { + m_mrtRenderTarget.release(); + m_hasMRT = false; + } + + m_mesh = nullptr; + m_meshbufs = {}; + m_blitMesh = nullptr; + + m_initialized = false; +} + +void RenderedRawRasterPipelineNode::addInputEdge( + RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) +{ + if(edge.sink->type == Types::Image) + { + // Find upstream texture + if(auto it = edge.source->node->renderedNodes.find(&renderer); + it != edge.source->node->renderedNodes.end()) + { + if(auto* tex = it->second->textureForOutput(*edge.source)) + { + auto rt = renderer.renderTargetForInputPort(*edge.sink); + updateInputTexture(*edge.sink, tex, rt.depthTexture); + } + } + } +} + +void RenderedRawRasterPipelineNode::removeInputEdge(RenderList& renderer, Edge& edge) +{ + if(edge.sink->type == Types::Image) + { + // See SimpleRenderedISFNode::removeInputEdge — same dangling-depth- + // sampler issue applies here when DEPTH: true inputs get disconnected. + const bool hasDepthCompanion + = (edge.sink->flags & Flag::SamplableDepth) == Flag::SamplableDepth; + QRhiTexture* depthFallback + = hasDepthCompanion ? &renderer.emptyTexture() : nullptr; + updateInputTexture(*edge.sink, &renderer.emptyTexture(), depthFallback); + } +} + +void RenderedRawRasterPipelineNode::init( + RenderList& renderer, QRhiResourceUpdateBatch& res) +{ + initState(renderer, res); + + // Procedural shaders (gl_VertexIndex + VERTEX_COUNT) don't need an + // upstream geometry cable — still wire their output passes. + if(!m_mesh && !isProceduralDraw()) + return; + + for(auto* out_port : n.output) + for(auto* edge : out_port->edges) + addOutputPass(renderer, *edge, res); +} + +bool RenderedRawRasterPipelineNode::updateMaterials( + RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) +{ + bool mustRecreatePasses = false; + // Update audio textures + if(!n.m_audio_textures.empty() && !m_audioTex) + { + m_audioTex.emplace(); + } + + bool audioChanged = false; + std::size_t audio_idx = 0; + for(auto& audio : n.m_audio_textures) + { + if(std::optional sampl + = m_audioTex->updateAudioTexture(audio, renderer, n.m_material_data.get(), res)) + { + // Texture changed -> material changed + audioChanged = true; + + auto& [rhiSampler, tex, fb_] = *sampl; + // Keep m_audioSamplers[i].texture in sync with the live GPU texture so + // any later pipeline rebuild (rt_changed path in RenderList::render + // calling removeOutputPass + addOutputPass) uses the live binding + // instead of the placeholder empty texture. + if(audio_idx < m_audioSamplers.size()) + m_audioSamplers[audio_idx].texture = tex; + + for(auto& [e, pass] : m_passes) + { + score::gfx::replaceTexture( + *pass.p.srb, rhiSampler, tex ? tex : &renderer.emptyTexture()); + } + } + ++audio_idx; + } + + // Update material + if(m_materialUBO && m_materialSize > 0 && (materialChanged || audioChanged)) + { + char* data = n.m_material_data.get(); + SCORE_ASSERT(m_materialSize >= size_of_pipeline_material); + if(std::memcmp(data, this->m_prevPipelineChangingMaterial, size_of_pipeline_material) + != 0) + { + mustRecreatePasses = true; + std::copy_n(data, size_of_pipeline_material, this->m_prevPipelineChangingMaterial); + } + res.updateDynamicBuffer(m_materialUBO, 0, m_materialSize, data); + } + materialChanged = false; + return mustRecreatePasses; +} + +void RenderedRawRasterPipelineNode::update( + RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) +{ + // Update node materials. This must be before any initial return, + // otherwise we miss the materialsChanged + bool mustRecreatePasses = updateMaterials(renderer, res, edge); + bool recreateDueToMaterial = mustRecreatePasses; + + // Refresh upstream-bound storage_input / uniform_input buffers from input + // ports. The first pass will pick them up via the SRB; subsequent passes + // need bindUpstreamBuffers to patch their SRBs in-place — handled per-pass + // when m_passes is iterated for SRB updates further down. (Safe to call + // even with no SRB; the helper just refreshes the m_storage entries.) + bindUpstreamBuffers(renderer, n.input, m_storage); + // Same pattern for read-only csf_image_input: adopt the matching upstream + // auxiliary_texture (a storage image written by an upstream CSF / + // RawRaster). Called per-frame so a producer that switches its underlying + // QRhiTexture on resize / rebuild flows through. The helper is + // idempotent on the swap and unconditionally patches each SRB it's + // given — so calling it once per pass refreshes every SRB while only + // doing the actual upstream lookup + swap on the first iteration. + if(geometry.meshes && !geometry.meshes->meshes.empty()) + { + // Per-pass refresh of name-matched-from-geometry bindings (SSBO/UBO/ + // storage_image). bindUpstream*FromGeometry are idempotent on the + // swap and unconditionally patch each SRB they're given — so calling + // each once per pass refreshes every SRB while doing the actual + // upstream lookup + swap only on the first iteration that observed + // a change. + for(auto& [edge, pass] : m_passes) + { + if(pass.p.srb) + { + bindUpstreamImagesFromGeometry( + m_storage, geometry.meshes->meshes[0], pass.p.srb); + bindUpstreamBuffersFromGeometry( + *renderer.state.rhi, res, m_storage, + geometry.meshes->meshes[0], pass.p.srb); + } + } + } + + // Update the geometry (sync with ModelDisplayNode) + + if(this->geometryChanged) + { + if(geometry.meshes) + { + const Mesh* prevMesh = m_mesh; + std::tie(m_mesh, m_meshbufs) + = renderer.acquireMesh(geometry, res, m_mesh, m_meshbufs); + m_meshbufs.gpuIndirectSupported = renderer.state.caps.drawIndirect; + + this->meshChangedIndex = this->m_mesh->dirtyGeometryIndex; + +#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0) + // Check for standalone indirect draw buffer from Buffer input ports + if(!m_meshbufs.useIndirectDraw) + { + for(auto* port : n.input) + { + if(port->type == Types::Buffer && !port->edges.empty()) + { + auto bv = renderer.bufferForInput(*port->edges.front()); + if(bv.usage == BufferView::Usage::IndirectDraw) + { + m_meshbufs.indirectDrawBuffer = bv.handle; + m_meshbufs.useIndirectDraw = true; + m_meshbufs.indirectDrawIndexed = false; break; } else if(bv.usage == BufferView::Usage::IndirectDrawIndexed) @@ -443,6 +2472,14 @@ void RenderedRawRasterPipelineNode::update( } this->geometryChanged = false; + // Re-resolve image-input samplers against the geometry's aux + // textures. Growing a channel's texture array on ScenePreprocessor + // republishes the geometry with a new QRhiTexture*; picking that up + // here keeps the SRB bound to the live array instead of the deleted + // one. A sampler change forces pass recreation so the SRB rebinds. + if(rebindAuxTextures()) + mustRecreatePasses = true; + // Re-match auxiliary SSBOs from updated geometry if(geometry.meshes && !geometry.meshes->meshes.empty()) { @@ -499,17 +2536,57 @@ void RenderedRawRasterPipelineNode::update( } } + // Per-frame: re-pull upstream buffers wired through Buffer input ports + // (camera UBO, ExtractBuffer2 SSBOs, ...). Cheap: one virtual call per + // aux that has an input port index. Runs every frame because we cannot + // guarantee the upstream publisher's init() ran before ours — its + // bufferForOutput() may only return a non-null handle a frame later. + for(auto& aux : m_auxiliarySSBOs) + { + if(aux.input_port_index < 0 + || aux.input_port_index >= (int)n.input.size()) + continue; + Port* port = n.input[aux.input_port_index]; + if(!port || port->type != Types::Buffer) + continue; + + QRhiBuffer* upstream = nullptr; + for(Edge* edge : port->edges) + { + if(!edge || !edge->source) + continue; + if(edge->source->type != Types::Buffer) + continue; + if(auto view = renderer.bufferForInput(*edge); view.handle) + { + upstream = view.handle; + break; + } + } + if(!upstream || upstream == aux.buffer) + continue; + + // Drop any placeholder / previously-owned buffer and adopt upstream. + if(aux.owned && aux.buffer) + aux.buffer->deleteLater(); + aux.buffer = upstream; + aux.size = upstream->size(); + aux.owned = false; + mustRecreatePasses = true; + } + bool recreateDueToGeometry = mustRecreatePasses && !recreateDueToMaterial; - if(!m_mesh) + const bool procedural = isProceduralDraw(); + if(!m_mesh && !procedural) { - qDebug() << "RawRaster::update: no mesh!"; return; } // FIXME is that neeeded? // FIXME also not handling geometry_filter dirty geom so far - bool meshDirty = m_mesh->hasGeometryChanged(meshChangedIndex); + // Procedural draws never have a mesh — skip the dirty check. + bool meshDirty = m_mesh && m_mesh->hasGeometryChanged(meshChangedIndex); if(meshDirty) { mustRecreatePasses = true; @@ -517,118 +2594,756 @@ void RenderedRawRasterPipelineNode::update( if(mustRecreatePasses) { - qWarning() << "RRP: recreating passes:" - << "material=" << recreateDueToMaterial - << "geometryChanged=" << recreateDueToGeometry - << "meshDirty=" << meshDirty; for(auto& pass : m_passes) { pass.second.p.release(); - delete pass.second.processUBO; + if(pass.second.processUBO) + pass.second.processUBO->deleteLater(); } m_passes.clear(); - for(Edge* edge : n.output[0]->edges) + for(auto& [e, sampler] : m_blitSamplersByEdge) + sampler->deleteLater(); + m_blitSamplersByEdge.clear(); + + if(m_hasMRT) + { + // Release and recreate the internal MRT render target + m_mrtRenderTarget.release(); + initMRTPass(renderer, res); + initMRTBlitPasses(renderer, res); + } + else + { + for(Edge* edge : n.output[0]->edges) + { + auto rt = renderer.renderTargetForOutput(*edge); + if(rt.renderTarget) + { + initPass(rt, renderer, res, *edge); + } + } + } + + // After pass recreation, the freshly built SRBs reference the + // CURRENT m_storage entries. For storage_input/uniform_input that + // are name-matched against the upstream geometry's auxiliary_buffers + // (the ScenePreprocessor publishing pattern: scene_lights / + // world_transforms / per_draws / scene_materials / scene_counts / + // scene_light_indices / camera UBO / env UBO), m_storage entries + // may still hold the 16-byte zero placeholder ensureStorageResources + // allocated for owned SSBOs — the per-pass refresh loop below + // (lines ~2640+) is gated on m_passes non-empty. On a fresh + // RenderList (resize / graph rebuild) the very first frame's + // initState ran with m_passes empty, init early-returned without + // building m_passes, then the per-pass refresh below was a no-op, + // and now mustRecreatePasses just built passes against the + // placeholder. Re-fire bindUpstream*FromGeometry on the freshly + // built SRBs so they pick up the live geometry buffers / textures + // immediately. Without this, classic_pbr's scene_counts.light_count + // reads as 0 on the resize frame → light loop runs 0 times → no + // specular until the next frame patches the SRB. + if(geometry.meshes && !geometry.meshes->meshes.empty()) { - auto rt = renderer.renderTargetForOutput(*edge); - if(rt.renderTarget) + for(auto& [edge, pass] : m_passes) + { + if(pass.p.srb) + { + bindUpstreamImagesFromGeometry( + m_storage, geometry.meshes->meshes[0], pass.p.srb); + bindUpstreamBuffersFromGeometry( + *renderer.state.rhi, res, m_storage, + geometry.meshes->meshes[0], pass.p.srb); + } + } + + // Sampler refresh: FIX-C above only patches m_storage entries + // (csf_image_input / storage_input / uniform_input). Plain + // image_input INPUTS (sampler2DArray, sampler2D, sampler3D, etc.) + // live in m_inputSamplers and are refreshed only by + // rebindAuxTextures Path A — gated on `geometryChanged` and run + // ONCE earlier in update() (line ~2462). If + // `geometry.meshes` was null at THAT moment (or if a sibling + // renderer republishes a fresh mesh_list AFTER that call) the + // sampler binding stays at its empty-texture placeholder OR a + // stale (deleteLater'd) upstream pointer. + // + // For the textured-PBR pipelines this manifests as: + // baseColorArray sampler reads garbage / NaN → BRDF math + // collapses → specular vanishes (ambient + base color factor + + // emissive remain). Untextured classic_pbr has zero image_input + // INPUTS so its m_inputSamplers is empty and the bug can't + // trigger — exactly the user-reported asymmetry. + // + // Re-run rebindAuxTextures here (idempotent: short-circuits when + // the slot's cached texture pointer matches the upstream's + // current pointer). When it returns true, hot-patch the existing + // SRBs in place via replaceTexture rather than going through + // another full mustRecreatePasses cycle — the pipeline layout + // is unchanged, only the texture pointer needs swapping. + if(rebindAuxTextures()) { - initPass(rt, renderer, *edge); + // Match key for replaceTexture MUST be the sampler that's + // actually in the SRB binding. allSamplers() (line ~155-170) + // substitutes m_inputSamplerOverrides[i] for m_inputSamplers[i] + // when ScenePreprocessor publishes a per-bucket sampler_handle + // (e.g. baseColorArray gets the bucket's QRhiSampler so each + // glTF/FBX material's wrap/filter survives). replaceTexture + // matches by sampler-pointer (Utils.cpp:435); using the + // ORIGINAL m_inputSamplers[i].sampler as the key when the SRB + // has the OVERRIDE silently no-ops — so the texture refresh + // never lands on textured-PBR pipelines that go through + // ScenePreprocessor's per-bucket sampler overrides. That was + // the residual lighting glitch on resize. + const auto srb_key = [&](std::size_t i) -> QRhiSampler* { + if(i < m_inputSamplerOverrides.size() && m_inputSamplerOverrides[i]) + return m_inputSamplerOverrides[i]; + return m_inputSamplers[i].sampler; + }; + for(auto& [edge, pass] : m_passes) + { + if(!pass.p.srb) + continue; + for(std::size_t i = 0; i < m_inputSamplers.size(); ++i) + { + auto& s = m_inputSamplers[i]; + if(s.texture && s.sampler) + score::gfx::replaceTexture( + *pass.p.srb, srb_key(i), s.texture); + } + } + for(auto* invSrb : m_perInvocationSRBs) + { + if(!invSrb) + continue; + for(std::size_t i = 0; i < m_inputSamplers.size(); ++i) + { + auto& s = m_inputSamplers[i]; + if(s.texture && s.sampler) + score::gfx::replaceTexture( + *invSrb, srb_key(i), s.texture); + } + } } } } + m_mrtRenderedThisFrame = false; + n.standardUBO.passIndex = 0; n.standardUBO.frameIndex++; auto sz = renderer.renderSize(edge); n.standardUBO.renderSize[0] = sz.width(); n.standardUBO.renderSize[1] = sz.height(); - // Update all the process UBOs + // Update all the process UBOs (blit passes have nullptr processUBO) for(auto& [e, pass] : m_passes) { + if(!pass.processUBO) + continue; res.updateDynamicBuffer( pass.processUBO, 0, sizeof(ProcessUBO), &this->n.standardUBO); } res.updateDynamicBuffer(m_modelUBO, 0, sizeof(float[16]), m_modelTransform.matrix); + + // Reset event ports now that the material UBO has captured their pulse + // value via updateMaterials() above. If anything fired, set the shared + // materialChanged flag so next frame's updateMaterials() uploads the + // now-zero CPU memory instead of being gated out as unchanged. + if(n.resetEventPortsAfterFrame()) + this->materialChanged = true; + + // Persistent AUXILIARY ping-pong: swap buffer/prev_buffer pointers, then + // patch every pipeline's SRB so binding slots reference the post-swap + // buffers. Done at the end of update() so the pass that renders this + // frame already reads the previous frame's writes via `_prev`. + bool anyPersistentSwap = false; + for(auto& aux : m_auxiliarySSBOs) + { + if(!aux.persistent || !aux.prev_buffer || n.standardUBO.frameIndex < 2u) + continue; + std::swap(aux.buffer, aux.prev_buffer); + anyPersistentSwap = true; + } + if(anyPersistentSwap) + { + for(auto& [e, pass] : m_passes) + { + if(!pass.p.srb) + continue; + for(const auto& aux : m_auxiliarySSBOs) + { + if(!aux.persistent || aux.binding < 0 || aux.prev_binding < 0) + continue; + score::gfx::replaceBuffer(*pass.p.srb, aux.prev_binding, aux.prev_buffer); + score::gfx::replaceBuffer(*pass.p.srb, aux.binding, aux.buffer); + } + // No trailing create() — replaceBuffer's updateResources() fast + // path already refreshes the backend descriptor state. + } + // Per-invocation SRB pool (PerMip / PerCubeFace / Manual EXECUTION_MODELs) + // shares the same persistent aux bindings as pass.p.srb. Without this + // loop, invocation 0 reads post-swap data while invocations 1..N-1 read + // the pre-swap (now `prev_buffer`-backed) buffers. + for(auto* invSrb : m_perInvocationSRBs) + { + if(!invSrb) + continue; + for(const auto& aux : m_auxiliarySSBOs) + { + if(!aux.persistent || aux.binding < 0 || aux.prev_binding < 0) + continue; + score::gfx::replaceBuffer(*invSrb, aux.prev_binding, aux.prev_buffer); + score::gfx::replaceBuffer(*invSrb, aux.binding, aux.buffer); + } + } + } } void RenderedRawRasterPipelineNode::release(RenderList& r) { - // customRelease + releaseState(r); +} + +void RenderedRawRasterPipelineNode::bindAuxTexturesInit(RenderList& /*renderer*/) +{ + m_auxTextureBindings.clear(); + const auto& desc = n.descriptor(); + + // initInputSamplers walks n.input[] and pushes samplers for each + // Types::Image port: 1 sampler, plus an extra "depth sampler" when the + // port has SamplableDepth (set for image_input.depth=true on a + // non-GrabsFromSource input). walk_descriptor_inputs gives us the + // canonical sampler delta per input (see isf_input_port_count_vis), + // so each image-like INPUT lands on its matching sampler slot. + walk_descriptor_inputs( + desc, [&](const isf::input& inp, const port_counts& cur, const port_counts& delta) { + if(delta.samplers > 0) + m_auxTextureBindings.push_back({cur.samplers, inp.name}); + }); + + // Seed initial texture pointers from whatever geometry was already + // published at init() time (typically none — the real lookup happens + // on the first update()'s geometryChanged branch). + rebindAuxTextures(); +} + +bool RenderedRawRasterPipelineNode::rebindAuxTextures() +{ + bool changed = false; + if(!geometry.meshes || geometry.meshes->meshes.empty()) + return changed; + const auto& mesh = geometry.meshes->meshes[0]; + + // Path A: texture *overrides* on input-port-backed samplers (legacy + // pattern: an INPUTS image whose name matches a geometry aux texture + // gets its sampler's texture pointer swapped). When the geometry + // also publishes a sampler_handle, swap that too — that's how + // ScenePreprocessor's per-bucket samplers (per-glTF wrap/filter) + // override the shader's static INPUTS sampler config. + for(const auto& b : m_auxTextureBindings) { - for(auto& texture : n.m_audio_textures) + if(b.sampler_idx < 0 || b.sampler_idx >= (int)m_inputSamplers.size()) + continue; + const auto* aux = mesh.find_auxiliary_texture(b.name); + if(!aux) + continue; + auto* tex = static_cast(aux->native_handle); + if(!tex) + continue; + auto& slot = m_inputSamplers[b.sampler_idx]; + if(slot.texture != tex) { - auto it = texture.samplers.find(&r); - if(it != texture.samplers.end()) + slot.texture = tex; + changed = true; + } + // Sampler override is non-owning — the bucket (in GpuResourceRegistry) + // owns the QRhiSampler. Stored in the parallel m_inputSamplerOverrides + // vector so the original initInputSamplers-owned sampler stays in + // m_inputSamplers and `delete sampler.sampler` in release() doesn't + // free the registry's sampler. allSamplers() applies the override + // when building the SRB. + if((int)m_inputSamplerOverrides.size() <= b.sampler_idx) + m_inputSamplerOverrides.resize(b.sampler_idx + 1, nullptr); + auto* smp = aux->sampler_handle + ? static_cast(aux->sampler_handle) + : nullptr; + if(m_inputSamplerOverrides[b.sampler_idx] != smp) + { + m_inputSamplerOverrides[b.sampler_idx] = smp; + changed = true; + } + } + + // Path B: top-level AUXILIARY textures (no input port). Resolve each + // entry against the geometry's auxiliary_textures by name; fall back + // to the shape-matched placeholder when nothing matches so we never + // keep a stale upstream handle (protects against UAFs when a producer + // disconnects or frees its texture). + bool auxTexChanged = false; + for(auto& ats : m_auxTextureSamplers) + { + const auto* aux = mesh.find_auxiliary_texture(ats.name); + auto* tex = aux ? static_cast(aux->native_handle) : nullptr; + if(!tex) + tex = ats.placeholder; // revert to empty of the right kind + if(!tex || tex == ats.texture) + continue; + ats.texture = tex; + auxTexChanged = true; + } + if(auxTexChanged) + { + // Batched SRB rebuild: one destroy+setBindings+create per pass, + // regardless of how many aux texture handles changed this frame. + // The per-texture `replaceTexture(srb, binding, tex)` overload each + // does its own destroy/setBindings/create, so looping it N times + // would trigger N full SRB rebuilds per pass per frame whenever + // textures change. Using the vector overload lets us batch into a + // single rebuild cycle. + auto rebuildSrb = [&](QRhiShaderResourceBindings* srb) { + if(!srb) + return; + std::vector tmp; + tmp.assign(srb->cbeginBindings(), srb->cendBindings()); + for(const auto& ats : m_auxTextureSamplers) { - if(auto tex = it->second.texture) - { - if(tex != &r.emptyTexture()) - tex->deleteLater(); - } + if(ats.binding < 0 || !ats.texture) + continue; + score::gfx::replaceTexture(tmp, ats.binding, ats.texture); } - } + srb->destroy(); + srb->setBindings(tmp.begin(), tmp.end()); + srb->create(); + }; + for(auto& [e, pass] : m_passes) + rebuildSrb(pass.p.srb); + // Per-invocation SRB pool (PerMip / PerCubeFace / Manual + // EXECUTION_MODELs) — clones of pass.p.srb taken at construction + // (see initPass / initMRTPass per-invocation push). Without this + // mirror, invocation 0 (which renders through pass.p.srb) sees the + // refreshed aux texture while invocations 1..N-1 keep sampling the + // stale handle indefinitely. Same shape as the SSBO ping-pong fix + // for m_perInvocationSRBs above (line ~2649) — symmetric, the bug + // here was that the SSBO fix didn't propagate to aux-texture + // rebinds. + for(auto* invSrb : m_perInvocationSRBs) + rebuildSrb(invSrb); + changed = true; + } - for(auto& [edge, pass] : m_passes) - { - pass.p.release(); + return changed; +} - if(pass.processUBO) +void RenderedRawRasterPipelineNode::runInitialPasses( + RenderList& renderer, QRhiCommandBuffer& cb, QRhiResourceUpdateBatch*& updateBatch, + Edge& edge) +{ + // MDI readback fallback: when the backend doesn't support drawIndirect, + // synchronously read back the GPU indirect buffer so the CPU draw loop + // has the commands ready for this frame's draw call. + // + // This MUST re-run every frame: the indirect buffer is GPU-generated (e.g. + // by a GPU culling compute pass) and changes frame to frame. Gating on + // cpuDrawCommands.empty() would freeze the draw list permanently after the + // first readback, so GPU culling output would diverge forever. We re-derive + // cpuDrawCommands from the latest indirect buffer contents each frame. + // + // Guard behind ReadBackNonUniformBuffer: this is exactly the feature missing + // on OpenGL ES 2.0 (GLES 3.x and desktop backends have it). Without it the + // readBackBuffer call would fail silently / assert, so we degrade gracefully + // (the draw falls back to whatever cpuDrawCommands already holds, or a single + // drawIndexed) and warn once. + if(m_meshbufs.useIndirectDraw + && !m_meshbufs.gpuIndirectSupported + && m_meshbufs.indirectDrawBuffer + && m_meshbufs.indirectDrawBuffer->size() > 0 + && renderer.state.rhi->isFeatureSupported(QRhi::ReadBackNonUniformBuffer)) + { + QRhi& rhi = *renderer.state.rhi; + auto* rb = rhi.nextResourceUpdateBatch(); + const quint32 bufSize = m_meshbufs.indirectDrawBuffer->size(); + m_meshbufs.readbackResult.completed = [this, bufSize]() { + const auto& data = m_meshbufs.readbackResult.data; + constexpr int cmdSize = 5 * sizeof(uint32_t); + const int cmdCount = data.size() / cmdSize; + m_meshbufs.cpuDrawCommands.clear(); + m_meshbufs.cpuDrawCommands.reserve(cmdCount); + const auto* raw = reinterpret_cast(data.constData()); + for(int c = 0; c < cmdCount; ++c) { - pass.processUBO->deleteLater(); + const uint32_t* p = raw + c * 5; + m_meshbufs.cpuDrawCommands.push_back({ + .index_or_vertex_count = p[0], + .instance_count = p[1], + .first_index_or_vertex = p[2], + .base_vertex = static_cast(p[3]), + .first_instance = p[4]}); } + }; + rb->readBackBuffer(m_meshbufs.indirectDrawBuffer, 0, bufSize, &m_meshbufs.readbackResult); + cb.resourceUpdate(rb); + rhi.finish(); + } + else if( + m_meshbufs.useIndirectDraw && !m_meshbufs.gpuIndirectSupported + && m_meshbufs.indirectDrawBuffer && m_meshbufs.indirectDrawBuffer->size() > 0 + && !renderer.state.rhi->isFeatureSupported(QRhi::ReadBackNonUniformBuffer)) + { + // Graceful degradation: the backend (e.g. OpenGL ES 2.0) can neither + // draw indirect nor read back the GPU-generated indirect buffer. The draw + // loop falls back to cpuDrawCommands (if a producer ever filled them) or a + // single drawIndexed. Warn once so the missing GPU-culled commands are + // diagnosable rather than a silent visual divergence. + static bool warned = false; + if(!warned) + { + warned = true; + qWarning() << "RenderedRawRasterPipelineNode: GPU-generated indirect draws " + "require QRhi::ReadBackNonUniformBuffer, unsupported on this " + "backend (e.g. OpenGL ES 2.0) — falling back to CPU draw " + "commands; GPU culling output will not be reflected."; } - - m_passes.clear(); } - for(auto sampler : m_inputSamplers) + if(!m_hasMRT || m_passes.empty()) + return; + // Procedural draws don't require a mesh/vertex buffers — the draw + // call uses gl_VertexIndex with no vertex bindings. Block only on + // the non-procedural path. + if(!isProceduralDraw() && (!m_mesh || m_meshbufs.buffers.empty())) + return; + + // Only render once per frame even if multiple downstream nodes trigger us + if(m_mrtRenderedThisFrame) + return; + m_mrtRenderedThisFrame = true; + + // MRT: render into our internal multi-attachment render target + auto& pass = m_passes[0].second; + + SCORE_ASSERT(pass.renderTarget.renderTarget); + SCORE_ASSERT(pass.p.pipeline); + SCORE_ASSERT(pass.p.srb); + + // Invocation-count resolution. Single → 1, PerMip / PerCubeFace → + // m_mipCount (reused to store either mip count or face count = 6), + // Manual → evaluate the COUNT expression (falls back to 1 when the + // expression is empty / unparseable). Runs every frame for Manual so + // the count can track live input values; cached for PerMip / + // PerCubeFace since the target shape is fixed at init. + int invocationCount = 1; + if(m_executionMode == ExecutionMode::PerMip + || m_executionMode == ExecutionMode::PerCubeFace + || m_executionMode == ExecutionMode::PerLayer) { - delete sampler.sampler; - // texture isdeleted elsewxheree + invocationCount = std::max(1, m_mipCount); } - m_inputSamplers.clear(); - for(auto sampler : m_audioSamplers) + else if(m_executionMode == ExecutionMode::Manual) { - delete sampler.sampler; - // texture isdeleted elsewxheree + m_manualCount = resolveManualInvocationCount(); + invocationCount = std::max(1, m_manualCount); } - m_audioSamplers.clear(); - delete m_materialUBO; - m_materialUBO = nullptr; + auto* mainTex = pass.renderTarget.texture; + // Depth-only shaders have no colour attachment so mainTex is null; + // fall back to the depth attachment for the render-target size, then + // to the renderer's render-size as a last resort. PER_LAYER+depth + // specifically declares WIDTH/HEIGHT on its depth output (e.g. + // 2048×2048 for shadow maps) and we want the viewport to honour that + // rather than the window size. + QRhiTexture* sizeTex = mainTex + ? mainTex + : pass.renderTarget.depthTexture; + const QSize baseSize + = sizeTex ? sizeTex->pixelSize() : renderer.state.renderSize; - delete m_modelUBO; - m_modelUBO = nullptr; + QRhi& rhi = *renderer.state.rhi; - // Note: release() doesn't have access to the RenderList, so we use deleteLater. - // These buffers are only used in the SRB which is already released above. - for(auto& aux : m_auxiliarySSBOs) + // Grow the per-invocation UBO+SRB pool if invocationCount exceeds + // what we've already allocated. Each extra UBO gets its own dynamic + // slot (no inter-invocation aliasing of the underlying buffer — the + // QRhi Dynamic-UBO single-slot constraint is what made PASSINDEX + // collapse to the last-written value before this). SRB i clones the + // main SRB with the process-UBO binding swapped to UBO i. + const int needed_extra = std::max(0, invocationCount - 1); + while((int)m_perInvocationUBOs.size() < needed_extra) { - if(aux.owned && aux.buffer) - aux.buffer->deleteLater(); + const int k = (int)m_perInvocationUBOs.size() + 1; + + auto* ubo = rhi.newBuffer( + QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, sizeof(ProcessUBO)); + ubo->setName( + ("RRPNode::MRT::perInvocationUBO::" + std::to_string(k)).c_str()); + ubo->create(); + m_perInvocationUBOs.push_back(ubo); + + // Clone the main SRB's bindings, swap binding=1 (the process UBO + // per ISF convention — see isf.cpp's emitted `layout(std140, + // binding = 1) uniform process_t`) to point at our new buffer. + // The main pass's SRB is the layout-defining parent; new SRBs are + // structurally identical and therefore compatible with the main + // pipeline. + std::vector tmp; + if(pass.p.srb) + tmp.assign(pass.p.srb->cbeginBindings(), pass.p.srb->cendBindings()); + for(auto& b : tmp) + { + auto* d = reinterpret_cast(&b); + if(d->type == QRhiShaderResourceBinding::Type::UniformBuffer + && d->binding == 1) + { + d->u.ubuf.buf = ubo; + } + } + auto* srb = rhi.newShaderResourceBindings(); + srb->setName( + ("RRPNode::MRT::perInvocationSRB::" + std::to_string(k)).c_str()); + srb->setBindings(tmp.begin(), tmp.end()); + srb->create(); + m_perInvocationSRBs.push_back(srb); } - m_auxiliarySSBOs.clear(); -} + for(int i = 0; i < invocationCount; ++i) + { + // Stamp the per-invocation index into ProcessUBO. For PerMip this + // doubles as the mip level; for Manual it's the 0-based loop index. + // Each invocation writes to ITS OWN UBO (one allocated per slot + // above) so Dynamic-UBO single-slot-per-frame doesn't collapse + // every draw to the last-uploaded value. + QRhiBuffer* invUBO + = (i == 0) ? pass.processUBO : m_perInvocationUBOs[i - 1]; + QRhiShaderResourceBindings* invSRB + = (i == 0) ? pass.p.srb : m_perInvocationSRBs[i - 1]; + + auto* invBatch = (i == 0 && updateBatch) + ? updateBatch + : rhi.nextResourceUpdateBatch(); + this->n.standardUBO.passIndex = i; + invBatch->updateDynamicBuffer( + invUBO, 0, sizeof(ProcessUBO), &this->n.standardUBO); + if(i == 0) + updateBatch = nullptr; + + QRhiTextureRenderTarget* rtForPass + = dynamic_cast(pass.renderTarget.renderTarget); + QSize viewportSize = baseSize; + if(m_executionMode == ExecutionMode::PerMip + && i < (int)m_mipRTs.size() && m_mipRTs[i].renderTarget) + { + rtForPass = m_mipRTs[i].renderTarget; + viewportSize = QSize( + std::max(1, baseSize.width() >> i), + std::max(1, baseSize.height() >> i)); + } + else if(m_executionMode == ExecutionMode::PerCubeFace + && i < (int)m_mipRTs.size() && m_mipRTs[i].renderTarget) + { + // Per-face cubemap RT. Face size = base (no per-face mipping in + // this first cut); viewport stays at baseSize. + rtForPass = m_mipRTs[i].renderTarget; + } + else if(m_executionMode == ExecutionMode::PerLayer) + { + // Color path: one RT per layer (stored in m_mipRTs, same shape as + // PerCubeFace). Depth path: a single shared RT bound to the + // scratch depth — we copy into the OUTPUT array layer-i after + // endPass below, so the same RT is reused across iterations. + if(m_perLayerIsDepth && m_perLayerSharedRT) + { + rtForPass = m_perLayerSharedRT; + } + else if(!m_perLayerIsDepth && i < (int)m_mipRTs.size() + && m_mipRTs[i].renderTarget) + { + rtForPass = m_mipRTs[i].renderTarget; + } + } -void RenderedRawRasterPipelineNode::runInitialPasses( - RenderList& renderer, QRhiCommandBuffer& cb, QRhiResourceUpdateBatch*& updateBatch, - Edge& edge) -{ + cb.beginPass(rtForPass, Qt::transparent, {0.0f, 0}, invBatch); + + cb.setGraphicsPipeline(pass.p.pipeline); + cb.setViewport( + QRhiViewport(0, 0, viewportSize.width(), viewportSize.height())); + + // drawWithPerMeshAuxRebind sets shader resources and issues the + // draw call (or the per-sub-mesh loop for multi-mesh inputs). + // Pass the per-invocation SRB so each draw reads its own UBO. + // Forward the pass's fallback-binding plan so "REQUIRED: false" + // VERTEX_INPUTS get their identity buffers bound. + drawWithPerMeshAuxRebind( + *invSRB, cb, + std::span{ + pass.fallback_bindings.slots}); + + cb.endPass(); + + // PerLayer + depth: copy the just-rendered scratch into layer i of + // the OUTPUT depth array. Qt RHI 6.11 has no per-layer depth + // attachment API, so this scratch+copy dance is the only way to + // populate distinct depth-array layers in N sequential passes. + // Single-format / single-size copy; QRhi handles the + // depth-write→transfer-src and transfer-dst→depth-write barriers + // around it automatically. + if(m_executionMode == ExecutionMode::PerLayer && m_perLayerIsDepth + && m_perLayerScratchDepth && m_perLayerOutputDepthArray) + { + auto* copyBatch = rhi.nextResourceUpdateBatch(); + QRhiTextureCopyDescription cdesc; + cdesc.setPixelSize(viewportSize); + cdesc.setSourceLayer(0); + cdesc.setSourceLevel(0); + cdesc.setSourceTopLeft(QPoint(0, 0)); + cdesc.setDestinationLayer(i); + cdesc.setDestinationLevel(0); + cdesc.setDestinationTopLeft(QPoint(0, 0)); + copyBatch->copyTexture( + m_perLayerOutputDepthArray, m_perLayerScratchDepth, cdesc); + cb.resourceUpdate(copyBatch); + } + } + + // Transparent CUBEMAP + MULTIVIEW finaliser. After all render passes + // have ended, copy each layer of the shadow TextureArray into the + // matching face of the public CubeMap. QRhi cube face layer order + // is +X, -X, +Y, -Y, +Z, -Z — same ordering as our IBL shaders' + // gl_ViewIndex, so layer i maps to face i 1:1. + // + // When PER_MIP is also active, both array and cube are MipMapped + // and we loop across the full mip chain: N * 6 copyTexture calls + // for N mips. Still basically free (pure GPU blit) — a 512² cube + // with 10 mips is 60 ops taking microseconds. + if(m_cubeCopyShadowArray && m_cubeCopyCube) + { + auto* copyBatch = rhi.nextResourceUpdateBatch(); + const QSize faceSize = m_cubeCopyCube->pixelSize(); + const int mipLevels + = (m_executionMode == ExecutionMode::PerMip && m_mipCount > 0) + ? m_mipCount + : 1; + for(int mip = 0; mip < mipLevels; ++mip) + { + const QSize mipSize( + std::max(1, faceSize.width() >> mip), + std::max(1, faceSize.height() >> mip)); + for(int face = 0; face < 6; ++face) + { + QRhiTextureCopyDescription desc; + desc.setPixelSize(mipSize); + desc.setSourceLayer(face); + desc.setSourceLevel(mip); + desc.setSourceTopLeft(QPoint(0, 0)); + desc.setDestinationLayer(face); + desc.setDestinationLevel(mip); + desc.setDestinationTopLeft(QPoint(0, 0)); + copyBatch->copyTexture( + m_cubeCopyCube, m_cubeCopyShadowArray, desc); + } + } + cb.resourceUpdate(copyBatch); + } + + // GENERATE_MIPS: walk OUTPUTS and call generateMips() on every + // declared target. For cube-copy outputs the generated-on texture + // is the public cube (not the shadow array — downstream samples + // the cube, and the shadow array may not even have the MipMapped + // flag in non-PER_MIP cases). For all other outputs it's the + // colour attachment we allocated in colorTextures[]. + // + // Skip when PER_MIP is active on the SAME output: the render loop + // has already authored distinct content per mip, and generateMips + // would overwrite those sub-mips with averaged base-level data. + { + auto* mipBatch = rhi.nextResourceUpdateBatch(); + bool any = false; + int colorIdx = 0; + for(const auto& out : n.descriptor().outputs) + { + if(out.type == "depth") + continue; + if(out.generate_mips) + { + const bool perMipOwnsThis + = m_executionMode == ExecutionMode::PerMip + && colorIdx == m_perMipOutputIndex; + if(!perMipOwnsThis) + { + QRhiTexture* tgt + = (colorIdx == m_cubeCopyOutputIdx && m_cubeCopyCube) + ? m_cubeCopyCube + : (colorIdx == 0 + ? pass.renderTarget.texture + : (colorIdx - 1 + < (int)pass.renderTarget + .additionalColorTextures.size() + ? pass.renderTarget + .additionalColorTextures[colorIdx - 1] + : nullptr)); + if(tgt) + { + mipBatch->generateMips(tgt); + any = true; + } + } + } + ++colorIdx; + } + if(any) + cb.resourceUpdate(mipBatch); + else + mipBatch->release(); + } } void RenderedRawRasterPipelineNode::runRenderPass( RenderList& renderer, QRhiCommandBuffer& cb, Edge& edge) { + // Plan 09 S6: debug marker for capture-tool readability (RenderDoc / + // Nsight show the scope boundary + node name). No GPU timing + // attribution here — QRhi's lastCompletedGpuTime is CB-scope, not + // pass-scope. RAII via QByteArray lifetime keeps the end-marker + // paired even on early returns. + cb.debugMarkBegin(QByteArrayLiteral("RawRasterPipeline")); + struct MarkEnd + { + QRhiCommandBuffer* c; + ~MarkEnd() { c->debugMarkEnd(); } + } _me{&cb}; + + // MRT nodes render to their internal target in runInitialPasses, + // then blit the appropriate texture here. + if(m_hasMRT) + { + // Find the blit pass for this edge + auto it = ossia::find_if(this->m_passes, [&](auto& p) { return p.first == &edge; }); + if(it == this->m_passes.end()) + return; + + auto& pass = it->second; + SCORE_ASSERT(pass.renderTarget.renderTarget); + SCORE_ASSERT(pass.p.pipeline); + SCORE_ASSERT(pass.p.srb); + + cb.setGraphicsPipeline(pass.p.pipeline); + cb.setShaderResources(pass.p.srb); + + auto* tex = pass.renderTarget.texture; + cb.setViewport(QRhiViewport( + 0, 0, tex->pixelSize().width(), tex->pixelSize().height())); + + m_blitMesh->draw(this->m_blitMeshbufs, cb); + return; + } + auto it = ossia::find_if(this->m_passes, [&](auto& p) { return p.first == &edge; }); // Maybe the shader could not be created if(it == this->m_passes.end()) return; - if(!m_mesh) - return; - if(this->m_meshbufs.buffers.empty()) + // Procedural draws (VERTEX_INPUTS: [] + VERTEX_COUNT) have no mesh + // and no vertex bindings — the draw issues cb.draw(vcount, icount) + // directly via drawWithPerMeshAuxRebind's VERTEX_COUNT branch. + const bool procedural = isProceduralDraw(); + if(!procedural && (!m_mesh || this->m_meshbufs.buffers.empty())) return; auto& pass = it->second; @@ -638,20 +3353,20 @@ void RenderedRawRasterPipelineNode::runRenderPass( SCORE_ASSERT(pass.renderTarget.renderTarget); SCORE_ASSERT(pass.p.pipeline); SCORE_ASSERT(pass.p.srb); - // TODO : combine all the uniforms.. auto pipeline = pass.p.pipeline; auto srb = pass.p.srb; auto texture = pass.renderTarget.texture; - // TODO need to free stuff { cb.setGraphicsPipeline(pipeline); - cb.setShaderResources(srb); cb.setViewport(QRhiViewport( 0, 0, texture->pixelSize().width(), texture->pixelSize().height())); - m_mesh->draw(this->m_meshbufs, cb); + drawWithPerMeshAuxRebind( + *srb, cb, + std::span{ + pass.fallback_bindings.slots}); } } } @@ -661,6 +3376,330 @@ void RenderedRawRasterPipelineNode::process(int32_t port, const ossia::transform m_modelTransform = v; } +void RenderedRawRasterPipelineNode::drawWithPerMeshAuxRebind( + QRhiShaderResourceBindings& srb, QRhiCommandBuffer& cb, + std::span fallback_slots) +{ + // Phase 2 unified MDI: ScenePreprocessor's output geometry is now + // ALWAYS a single sub-mesh (regular meshes + instance groups all + // ride through one drawIndexedIndirect / one cpu_draw_commands + // iteration). There is no per-sub-mesh SRB rebind to do — the SRB + // is bound once and the draw fans out via the indirect cmd list. + // The legacy name is preserved for now to avoid churning every + // call-site; rename pass deferred. + cb.setShaderResources(&srb); + + // PIPELINE_STATE: { "VERTEX_COUNT": N, "INSTANCE_COUNT": M, + // "TOPOLOGY": "..." } — procedural/VSA-style draw override. Issue a + // single cb.draw(N, M, 0, 0) and ignore the incoming geometry's + // index/indirect buffers entirely; the vertex shader drives positions + // from gl_VertexIndex + gl_InstanceIndex. Used for fullscreen passes + // (skybox: VERTEX_COUNT=3), procedural geometry (VSA plasma: + // VERTEX_COUNT=10000, TOPOLOGY=line_strip), etc. Without this, a + // fullscreen pass wired to a complex scene rasterizes N/3 fullscreen + // triangles — devastating even with early-Z (SciFiHelmet → ~46k + // fullscreen tris → ~100ms/frame on a GTX 1080). + // + // Safety: if the shader declares non-empty VERTEX_INPUTS (i.e. reads + // vertex attributes), clamp the draw count to the incoming geometry's + // vertex_count so the VS can't fetch past the bound buffer. Shaders + // that live purely on gl_VertexIndex should declare `VERTEX_INPUTS: + // []` — the pipeline is then built with no vertex bindings and + // VERTEX_COUNT is used verbatim. + { + const auto& ds = n.descriptor().default_state; + if(ds.vertex_count.has_value()) + { + uint32_t vcount = *ds.vertex_count; + const uint32_t icount = ds.instance_count.value_or(1u); + + const bool hasVertexInputs = !n.descriptor().vertex_inputs.empty(); + if(hasVertexInputs && this->geometry.meshes + && !this->geometry.meshes->meshes.empty()) + { + const uint32_t incoming + = (uint32_t)this->geometry.meshes->meshes[0].vertices; + if(incoming > 0 && vcount > incoming) + vcount = incoming; + } + + // Bind vertex buffers driven by the geometry's `input` list — NOT + // every entry in m_meshbufs.buffers. Since the scene preprocessor + // started appending the index buffer + scene-wide SSBOs (lights / + // materials / per-draws / …) to g.buffers for the auxiliary + // mapping, blindly binding the buffers array pushes STORAGE / INDEX + // buffers into vertex binding slots and Vulkan validation fires + // `VUID-vkCmdBindVertexBuffers-pBuffers-00627`. g.input is the + // authoritative vertex-binding list. + std::array inputs; + std::size_t nb = 0; + if(this->geometry.meshes && !this->geometry.meshes->meshes.empty()) + { + const auto& g0 = this->geometry.meshes->meshes[0]; + const std::size_t cap = inputs.size(); + for(const auto& in : g0.input) + { + if(nb >= cap) + break; + const std::size_t idx = (std::size_t)in.buffer; + if(idx >= m_meshbufs.buffers.size()) + continue; + auto* h = m_meshbufs.buffers[idx].handle; + if(!h) + continue; + inputs[nb++] = {h, (quint32)in.byte_offset}; + } + } + if(nb > 0) + cb.setVertexInput(0, (int)nb, inputs.data()); + + if(vcount > 0 && icount > 0) + cb.draw(vcount, icount, 0, 0); + return; + } + } + + // Single-mesh draw. ScenePreprocessor unified-MDI emits one sub-mesh + // covering every regular cmd + every instance group; the indirect cmd + // list fans out across them. Per-pass pipeline swapping (alpha-blend + // etc.) is NOT handled here — that's the job of a dedicated + // downstream node configured by the user as a separate render pass. + if(m_mesh) + { + // Fallback-aware draw when the shader declared "REQUIRED: false" + // VERTEX_INPUTS whose semantics are missing from upstream geometry. + // Plain pass-through otherwise (zero overhead when the plan is empty). + if(!fallback_slots.empty()) + { + if(auto* cm2 = dynamic_cast(m_mesh)) + cm2->drawWithFallbackBindings(m_meshbufs, cb, fallback_slots); + else + m_mesh->draw(m_meshbufs, cb); + } + else + { + m_mesh->draw(m_meshbufs, cb); + } + } +} + RenderedRawRasterPipelineNode::~RenderedRawRasterPipelineNode() { } +bool RenderedRawRasterPipelineNode::isProceduralDraw() const noexcept +{ + const auto& desc = n.descriptor(); + return desc.vertex_inputs.empty() + && desc.default_state.vertex_count.has_value() + && *desc.default_state.vertex_count > 0; +} + +// Generic integer-expression evaluator. Shared by EXECUTION_MODEL=MANUAL +// (COUNT) and OUTPUTS.WIDTH / HEIGHT. Pure-integer fast path avoids the +// expression parser for the overwhelmingly common literal case. +// Variable surface matches CSF dispatch expressions so all three sites +// share a mental model: $WIDTH / $HEIGHT / $DEPTH / $LAYERS of the first +// input image (unsuffixed + per-name variants), plus scalar input values +// as $. '$' → 'var_' rewrite follows the CSF convention. +int RenderedRawRasterPipelineNode::resolveIntExpression( + const std::string& expr, int fallback) const +{ + if(expr.empty()) + return fallback; + + // Pure-integer fast path — std::stoi would otherwise silently accept + // "6 * $x" as 6 (ignoring the variable reference entirely). + { + std::size_t i = 0; + while(i < expr.size() && std::isspace((unsigned char)expr[i])) + ++i; + const std::size_t first_digit = i; + while(i < expr.size() && std::isdigit((unsigned char)expr[i])) + ++i; + const std::size_t last_digit = i; + while(i < expr.size() && std::isspace((unsigned char)expr[i])) + ++i; + if(first_digit < last_digit && i == expr.size()) + { + try + { + return std::max(1, std::stoi(expr)); + } + catch(...) + { + } + } + } + + ossia::math_expression e; + ossia::small_pod_vector data; + data.reserve(16); + + auto register_size = [&](const std::string& name, QRhiTexture* tex, + bool& first) { + QSize px = tex ? tex->pixelSize() : QSize{1280, 720}; + int depth = 1, layers = 1; + if(tex) + { + if((int)(tex->flags() & QRhiTexture::ThreeDimensional)) + depth = std::max(1, tex->depth()); + if((int)(tex->flags() & QRhiTexture::TextureArray)) + layers = std::max(1, tex->arraySize()); + } + if(px.width() <= 0) + px.setWidth(1280); + if(px.height() <= 0) + px.setHeight(720); + e.add_constant("var_WIDTH_" + name, data.emplace_back(px.width())); + e.add_constant("var_HEIGHT_" + name, data.emplace_back(px.height())); + e.add_constant("var_DEPTH_" + name, data.emplace_back(depth)); + e.add_constant("var_LAYERS_" + name, data.emplace_back(layers)); + if(first) + { + e.add_constant("var_WIDTH", data.emplace_back(px.width())); + e.add_constant("var_HEIGHT", data.emplace_back(px.height())); + e.add_constant("var_DEPTH", data.emplace_back(depth)); + e.add_constant("var_LAYERS", data.emplace_back(layers)); + first = false; + } + }; + + // Walk the descriptor's image-style inputs in declared order so the + // first one supplies the unsuffixed $WIDTH / $HEIGHT family, matching + // CSF's `registerCommonExpressionVariables` semantics. + bool first_image = true; + int sampler_idx = 0; + for(const auto& inp : n.descriptor().inputs) + { + if(ossia::get_if(&inp.data) + || ossia::get_if(&inp.data)) + { + QRhiTexture* t = nullptr; + if(sampler_idx < (int)m_inputSamplers.size()) + t = m_inputSamplers[sampler_idx].texture; + register_size(inp.name, t, first_image); + ++sampler_idx; + } + } + + // Scalar ports — mirror the $ surface. Walking node.input in + // parallel with descriptor.inputs lets us pull live values without + // reimplementing the port-dispatch plumbing. + int port_idx = 0; + for(const auto& inp : n.descriptor().inputs) + { + auto port = (port_idx < (int)n.input.size()) ? n.input[port_idx] + : nullptr; + if(ossia::get_if(&inp.data)) + { + if(port && port->value) + e.add_constant( + "var_" + inp.name, data.emplace_back(*(float*)port->value)); + } + else if(ossia::get_if(&inp.data)) + { + if(port && port->value) + e.add_constant( + "var_" + inp.name, data.emplace_back(*(int*)port->value)); + } + ++port_idx; + } + + // Register $COUNT_ / $BYTESIZE_ for every + // SSBO / UBO the raster pipeline binds (INPUTS storage_input / + // uniform_input, plus top-level AUXILIARY entries). Same semantics as + // CSF: COUNT = element count of the flexible array (or 1 for UBOs / + // fixed-layout SSBOs), BYTESIZE = raw byte size of the binding. Lets + // OUTPUTS.WIDTH / HEIGHT / MANUAL-count expressions size themselves + // against upstream buffer extents by name, matching the convention + // used by CSF compute passes. + // + // Live sizes come from m_auxiliarySSBOs (populated at init time from + // actual buffer allocations / upstream adoptions); layout comes from + // the descriptor. Cross-reference by name. + { + ossia::hash_set registered; + const auto& desc = n.descriptor(); + + // Find the live byte size for a given aux name. Falls back to 0 if + // the binding isn't yet live (first frame, unbound edge, etc.) — + // count then resolves to 1, which is the zero-copy-safe default. + auto find_aux_size = [&](const std::string& name) -> int64_t { + for(const auto& aux : m_auxiliarySSBOs) + if(aux.name == name) + return aux.size; + return 0; + }; + + // Register a buffer whose storage-side layout is available. SSBOs + // use the layout to derive element stride (fixed part + flexible- + // array element), UBOs skip the layout lookup since they're always + // one struct instance with $COUNT = 1. + auto register_ssbo + = [&](const std::string& name, int64_t byte_size, + std::span layout) { + if(name.empty() || registered.contains(name)) + return; + int64_t element_count = 1; + const int64_t fixed_part + = score::gfx::calculateStorageBufferSize(layout, 0, desc); + const int64_t with_one + = score::gfx::calculateStorageBufferSize(layout, 1, desc); + const int64_t stride = with_one - fixed_part; + if(stride > 0 && byte_size > fixed_part) + element_count = (byte_size - fixed_part) / stride; + if(element_count < 1) + element_count = 1; + e.add_constant( + "var_COUNT_" + name, data.emplace_back((double)element_count)); + e.add_constant( + "var_BYTESIZE_" + name, data.emplace_back((double)byte_size)); + registered.insert(name); + }; + + auto register_ubo + = [&](const std::string& name, int64_t byte_size) { + if(name.empty() || registered.contains(name)) + return; + e.add_constant("var_COUNT_" + name, data.emplace_back(1.0)); + e.add_constant( + "var_BYTESIZE_" + name, data.emplace_back((double)byte_size)); + registered.insert(name); + }; + + // INPUTS storage_input / uniform_input + for(const auto& inp : desc.inputs) + { + if(auto* s = ossia::get_if(&inp.data)) + register_ssbo(inp.name, find_aux_size(inp.name), s->layout); + else if(ossia::get_if(&inp.data)) + register_ubo(inp.name, find_aux_size(inp.name)); + } + + // Top-level AUXILIARY entries (declared at descriptor root). + for(const auto& aux : desc.auxiliary) + { + if(aux.is_uniform) + register_ubo(aux.name, find_aux_size(aux.name)); + else + register_ssbo(aux.name, find_aux_size(aux.name), aux.layout); + } + } + + std::string eval_expr = expr; + boost::algorithm::replace_all(eval_expr, "$", "var_"); + e.register_symbol_table(); + if(e.set_expression(eval_expr)) + return std::max(1, (int)e.value()); + + qWarning() << "RawRaster: integer expression failed:" + << e.error().c_str() << eval_expr.c_str(); + return fallback; +} + +int RenderedRawRasterPipelineNode::resolveManualInvocationCount() const +{ + return resolveIntExpression( + n.descriptor().execution_model.count_expression, 1); +} + } diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedRawRasterPipelineNode.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedRawRasterPipelineNode.hpp index 296f384553..09cdcf585a 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedRawRasterPipelineNode.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedRawRasterPipelineNode.hpp @@ -1,8 +1,14 @@ #pragma once #include +#include #include #include +#include + +#include + +#include namespace score::gfx { @@ -14,13 +20,22 @@ struct RenderedRawRasterPipelineNode : score::gfx::NodeRenderer virtual ~RenderedRawRasterPipelineNode(); - void updateInputTexture(const Port& input, QRhiTexture* tex) override; + void updateInputTexture(const Port& input, QRhiTexture* tex, QRhiTexture* depthTex = nullptr) override; + QRhiTexture* textureForOutput(const Port& output) override; void init(RenderList& renderer, QRhiResourceUpdateBatch& res) override; void update(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) override; bool updateMaterials(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge); void release(RenderList& r) override; + void initState(RenderList& renderer, QRhiResourceUpdateBatch& res) override; + void releaseState(RenderList& renderer) override; + void addOutputPass(RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) override; + void removeOutputPass(RenderList& renderer, Edge& edge) override; + bool hasOutputPassForEdge(Edge& edge) const override; + void addInputEdge(RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) override; + void removeInputEdge(RenderList& renderer, Edge& edge) override; + void runInitialPasses( RenderList&, QRhiCommandBuffer& commands, QRhiResourceUpdateBatch*& res, Edge& edge) override; @@ -30,7 +45,61 @@ struct RenderedRawRasterPipelineNode : score::gfx::NodeRenderer void process(int32_t port, const ossia::transform3d& v) override; private: - void initPass(const TextureRenderTarget& rt, RenderList& renderer, Edge& edge); + // Resolves every image-style INPUT against the incoming geometry's + // auxiliary_textures list and overrides the initial texture pointer in + // m_inputSamplers for matches. Also builds m_auxTextureBindings so + // update() can cheaply re-run the lookup when the geometry changes. + // Must be called AFTER initInputSamplers. + void bindAuxTexturesInit(RenderList& renderer); + + // Per-frame update hook: walks m_auxTextureBindings, re-resolves each + // binding's texture pointer from the current geometry's aux textures, + // and returns true if at least one sampler's texture pointer changed + // (caller will flag mustRecreatePasses). + bool rebindAuxTextures(); + + void initPass( + const TextureRenderTarget& rt, RenderList& renderer, + QRhiResourceUpdateBatch& res, Edge& edge); + void initMRTPass(RenderList& renderer, QRhiResourceUpdateBatch& res); + void initMRTBlitPasses(RenderList& renderer, QRhiResourceUpdateBatch& res); + void initMRTBlitPass(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge& edge); + + // EXECUTION_MODEL=MANUAL: evaluate the COUNT expression against the + // live input state (first input image's $WIDTH / $HEIGHT / $DEPTH / + // $LAYERS, scalar input values as $). Pure-integer literal + // fast path; otherwise delegate to ossia::math_expression with '$' → + // 'var_' rewrite — same convention as CSF STRIDE / image-size + // expressions. Returns >= 1; unparseable expressions degrade to 1. + int resolveManualInvocationCount() const; + + // True when the shader renders procedurally: no VERTEX_INPUTS + // (gl_VertexIndex-driven) and PIPELINE_STATE.VERTEX_COUNT specified. + // In that mode the node needs no upstream geometry — m_mesh stays + // null and the draw call skips vertex-buffer bindings entirely. + // Used to relax the "no mesh, bail out" guards that otherwise block + // fullscreen passes, test shaders, VSA-style procedural draws, and + // IBL precompute shaders from rendering when wired without a + // geometry input. + bool isProceduralDraw() const noexcept; + + // Evaluate an integer-valued expression against the same variable + // surface as resolveManualInvocationCount ($WIDTH_ / $HEIGHT / + // scalar inputs). Used for OUTPUTS.WIDTH / HEIGHT at init time. + // Returns `fallback` when the expression is empty, >=1 otherwise. + int resolveIntExpression(const std::string& expr, int fallback) const; + + // Issue the draw for the currently bound pipeline + SRB. When the input + // geometry carries multiple sub-meshes with per-mesh aux buffers (e.g. + // ScenePreprocessor per-mesh mode: one `per_draw` SSBO per sub-mesh), this + // iterates sub-meshes and re-points the SRB bindings at the current + // sub-mesh's buffers before drawing it. For single-sub-mesh or MDI-mode + // geometries it delegates to the mesh's default draw(). The SRB is left + // pointing at the last sub-mesh's bindings on return — the next + // runRenderPass call rebinds from scratch. + void drawWithPerMeshAuxRebind( + QRhiShaderResourceBindings& srb, QRhiCommandBuffer& cb, + std::span fallback_slots = {}); std::vector allSamplers() const noexcept; @@ -40,11 +109,16 @@ struct RenderedRawRasterPipelineNode : score::gfx::NodeRenderer std::vector m_inputSamplers; std::vector m_audioSamplers; + ossia::small_flat_map m_blitSamplersByEdge; int64_t meshChangedIndex{-1}; const Mesh* m_mesh{}; MeshBuffers m_meshbufs; + // Quad mesh used for MRT blit passes (separate from the geometry mesh) + const Mesh* m_blitMesh{}; + MeshBuffers m_blitMeshbufs; + QRhiBuffer* m_materialUBO{}; int m_materialSize{}; @@ -53,19 +127,223 @@ struct RenderedRawRasterPipelineNode : score::gfx::NodeRenderer struct AuxiliarySSBO { QRhiBuffer* buffer{}; + QRhiBuffer* prev_buffer{}; //!< Only set when persistent == true: the other half of the ping-pong pair. int64_t size{}; - bool owned{true}; // false when adopted from upstream geometry + bool owned{true}; // false when adopted from upstream geometry / upstream port + bool is_uniform{false}; // true for uniform_input, false for storage_input + bool persistent{false}; //!< Ping-pong pair swapped each frame (raw raster AUXILIARY only) std::string name; std::string access; + // Index into n.input[] for the score port that may carry an upstream- + // supplied QRhiBuffer*. -1 when the buffer can only come from the + // input geometry's auxiliary list (e.g. desc.auxiliary entries without + // a matching INPUTS port). + int input_port_index{-1}; + // SRB binding slot assigned at pipeline build time. Needed so the per- + // sub-mesh draw loop can patch `per_draw` (and any other per-mesh aux) + // to point at mesh[i]'s buffer before drawing sub-mesh i. -1 when the + // aux was filtered out of the SRB (e.g. visibility==none). + int binding{-1}; + // For persistent aux only: binding slot of the _prev (read-only) + // half of the ping-pong pair. prev_binding + 1 == binding. + int prev_binding{-1}; }; std::vector m_auxiliarySSBOs; + // Storage images (and the rest of the INPUTS storage trio: storage_input + // for SSBOs / csf_image_input for image2D/3D / uniform_input for UBOs) + // declared in the top-level INPUTS array. Wired via the shared + // IsfBindingsBuilder helpers so the SRB binding type matches the + // GLSL emission from `isf_emit_graphics_storage` (see + // `isf.cpp:3349-3395`). RenderedISFNode and SimpleRenderedISFNode use + // the same pattern. m_auxiliarySSBOs carries only the AUXILIARY-block + // entries for RawRaster — the dual-population kept here is intentional + // for the Q1 transition while the AUXILIARY path still has its own + // dispatch (line 1885+); a follow-up could fold that into m_storage too. + GraphicsStorageResources m_storage; + int m_firstStorageBinding{-1}; + + // Texture auxes carried on the input geometry (see + // ossia::geometry::auxiliary_textures). Each entry records a sampler + // slot in m_inputSamplers that auto-resolves its texture pointer from + // the incoming geometry's aux-texture list by name at init() time and + // again every time the geometry changes. Eliminates the need for a + // dedicated texture cable (base_color_array / skybox / ...). + struct AuxTextureBinding + { + int sampler_idx{-1}; // index into m_inputSamplers + std::string name; // INPUT name, matched against auxiliary_texture::name + }; + std::vector m_auxTextureBindings; + + // Non-owning per-port sampler overrides published by upstream + // geometry's `auxiliary_texture::sampler_handle`. Parallel to + // m_inputSamplers — index N's override (or null) applies to + // m_inputSamplers[N]'s effective sampler at SRB-build time. Stored + // separately from `Sampler` because the entries in m_inputSamplers + // are owned and `delete sampler.sampler` runs on every entry at + // release; overwriting `Sampler::sampler` with a registry-owned + // sampler would double-free at teardown. + std::vector m_inputSamplerOverrides; + + // Textures declared in the top-level AUXILIARY array (TYPE: image / + // texture / cubemap / image_cube). Do NOT create a score input port — + // resolved only from ossia::geometry::auxiliary_textures by name, with + // a placeholder bound until the first matching handle arrives. + struct AuxTextureAuxSampler + { + QRhiSampler* sampler{}; // Null for storage-image entries. + QRhiTexture* texture{}; + // Shape-matched empty fallback (one of the RenderList-owned empty + // textures). Set at init from is_cubemap / dimensions / is_array and + // never changes. When rebindAuxTextures stops finding a matching + // aux_texture upstream (producer stopped publishing the name, got + // disconnected, etc.) we revert `texture` to this placeholder rather + // than leaving the previous (possibly-freed) upstream handle in + // place. Never owned by us. + QRhiTexture* placeholder{}; + std::string name; + int binding{-1}; + // Storage-image variant: bound with imageLoad / imageStore / + // imageLoadStore instead of sampledTexture. `access` distinguishes + // which of the three — "read_only" / "write_only" / "read_write". + bool is_storage{false}; + std::string access; + }; + std::vector m_auxTextureSamplers; + std::optional m_audioTex; + // MRT: internally-owned render target with multiple attachments + TextureRenderTarget m_mrtRenderTarget; + bool m_hasMRT{false}; + bool m_mrtRenderedThisFrame{false}; + + // EXECUTION_MODEL (top-level, RAW_RASTER only). + // Single — classic single-invocation pass (default; no extra loop). + // PerMip — N invocations, one per mip level of the TARGET output. + // Each invocation binds a per-mip render target so the + // single draw writes only that mip; ProcessUBO.passIndex + // carries the mip index. Needed for prefiltered-GGX + // roughness sweep. + // PerLayer — N invocations, one per array layer of the TARGET output. + // Each invocation binds the matching layer; ProcessUBO. + // passIndex carries the layer index. Color targets bind + // setLayer(i) directly. Depth targets render to a shared + // scratch and copyTexture into layer i after the pass + // (Qt RHI 6.11 has no per-layer depth attachment API). + // Drives shadow_cascades.frag (one cascade per layer). + // Manual — N invocations decided every frame by evaluating a + // COUNT expression via the math_expression parser (same + // variable surface as CSF STRIDE / image-size expressions: + // $WIDTH, $HEIGHT, $, ...). All invocations + // share the single MRT render target; the shader reads + // ProcessUBO.passIndex to branch. + enum class ExecutionMode : std::uint8_t + { + Single, + PerMip, + PerCubeFace, // Iterate 6 cube faces; target = CubeMap + setLayer(i) + PerLayer, // Iterate N array layers; target = TextureArray + setLayer(i) + Manual + }; + ExecutionMode m_executionMode{ExecutionMode::Single}; + + // PerCubeFace state. The target OUTPUT is allocated with + // QRhiTexture::CubeMap (6 implicit layers) and six per-face render + // targets are built at init; runInitialPasses iterates them in order, + // stamping the face index into ProcessUBO.passIndex. Shares the + // m_perMipOutputIndex resolution path (same "which colour output is + // the target" question) and reuses the m_mipRTs vector for storage + // — interpretation is mode-dependent (mip level vs face index). + int m_perCubeFaceOutputIndex{-1}; + + // PerMip state. When PerMip is active the MRT target texture is + // allocated with QRhiTexture::MipMapped and m_mipCount / m_mipRTs + // point at per-level render-pass views of it. m_perMipOutputIndex is + // the index into m_mrtRenderTarget{.texture, .additionalColorTextures} + // that we iterate. -1 in other modes. + int m_perMipOutputIndex{-1}; + int m_mipCount{0}; + struct MipRT + { + QRhiTextureRenderTarget* renderTarget{}; + QRhiRenderPassDescriptor* renderPass{}; + QRhiTexture* depth{}; // per-level depth — owned here. + }; + std::vector m_mipRTs; + + // PerLayer state. m_perLayerOutputIndex is the RAW index into + // descriptor().outputs[] (depth-inclusive — unlike the color-only + // m_perMipOutputIndex / m_perCubeFaceOutputIndex). m_perLayerIsDepth + // discriminates the two implementation paths: + // + // - Color target (m_perLayerIsDepth == false): m_mipRTs holds N + // entries (one per layer), each with a setLayer(i) attachment. + // Mirrors PER_CUBE_FACE structurally with a variable layer count. + // + // - Depth target (m_perLayerIsDepth == true): Qt RHI 6.11 doesn't + // expose per-layer depth attachment, so m_perLayerScratchDepth is + // a single 2D D32F render-target texture shared across iterations + // (m_perLayerSharedRT/RP). After each iteration's endPass, + // runInitialPasses emits copyTexture(scratch -> depthTex layer i). + // m_perLayerOutputDepthArray aliases depthTex (the OUTPUT array), + // used as the copy destination. + int m_perLayerOutputIndex{-1}; + bool m_perLayerIsDepth{false}; + QRhiTexture* m_perLayerScratchDepth{nullptr}; + QRhiTexture* m_perLayerDummyColor{nullptr}; + QRhiTextureRenderTarget* m_perLayerSharedRT{nullptr}; + QRhiRenderPassDescriptor* m_perLayerSharedRP{nullptr}; + QRhiTexture* m_perLayerOutputDepthArray{nullptr}; + + // Manual state. Re-evaluated every frame in runInitialPasses. + int m_manualCount{1}; + + // Per-invocation UBO + SRB pool for PER_MIP / PER_CUBE_FACE / MANUAL. + // + // Dynamic UBOs in QRhi have a SINGLE slot per frame-in-flight: + // multiple updateDynamicBuffer calls to the same buffer within one + // frame overwrite each other on the host, and every draw submitted + // that frame ends up reading the LAST uploaded value. Stamping + // distinct PASSINDEX values per invocation into one shared UBO + // therefore collapses — all mips / faces render with the same + // (last) index, producing uniformly-blurred output at every mip. + // + // Fix: one UBO + one SRB per invocation, all pre-built at init so + // the render loop just swaps which SRB it binds per pass. Index 0 + // corresponds to the main pass UBO/SRB (pass.processUBO / + // pass.p.srb) — the vectors below hold indices 1..N-1 only, which + // are allocated lazily when invocation count exceeds the current + // pool size (handles MANUAL whose count is per-frame-dynamic). + std::vector m_perInvocationUBOs; + std::vector m_perInvocationSRBs; + + // Transparent CUBEMAP + MULTIVIEW compatibility shim. QRhi forbids + // setMultiViewCount on a cube texture (qrhi.cpp:2561). When a shader + // declares both `CUBEMAP: true` and `MULTIVIEW: N`, we render into a + // hidden 2D TextureArray (the only shape multiview accepts) and then + // blit each array layer onto the corresponding cube face at the end + // of runInitialPasses. Downstream consumers see a real samplerCube + // via textureForOutput() → the cube; the shadow array never leaves + // this class. + // + // m_cubeCopyShadowArray = TextureArray used as the multiview render + // target (6 layers, `UsedAsTransferSource`). + // m_cubeCopyCube = public CubeMap handed to downstream. + // m_cubeCopyOutputIdx = colour-attachment index (0-based among + // non-depth outputs) whose target is handled + // via the array-then-copy path; -1 otherwise. + // Only one output per shader gets this + // treatment in this first cut. + QRhiTexture* m_cubeCopyShadowArray{}; + QRhiTexture* m_cubeCopyCube{}; + int m_cubeCopyOutputIdx{-1}; + // The part of the m_materialUBO for which changes // trigger a pipeline recreation (blend status etc.) static constexpr int size_of_pipeline_material = 32; - char m_prevPipelineChangingMaterial[size_of_pipeline_material]{0}; + alignas(4) char m_prevPipelineChangingMaterial[size_of_pipeline_material]{0}; struct PipelineChangingMaterial { int32_t mode; // tri, point, line diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedVSANode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedVSANode.cpp index 8fd1037b5a..288586a76f 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedVSANode.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedVSANode.cpp @@ -33,7 +33,7 @@ SimpleRenderedVSANode::SimpleRenderedVSANode(const ISFNode& node) noexcept { } -void SimpleRenderedVSANode::updateInputTexture(const Port& input, QRhiTexture* tex) +void SimpleRenderedVSANode::updateInputTexture(const Port& input, QRhiTexture* tex, QRhiTexture* depthTex) { int sampler_idx = 0; for(auto* p : node.input) @@ -41,7 +41,11 @@ void SimpleRenderedVSANode::updateInputTexture(const Port& input, QRhiTexture* t if(p == &input) break; if(p->type == Types::Image) + { sampler_idx++; + if((p->flags & Flag::SamplableDepth) == Flag::SamplableDepth) + sampler_idx++; + } } if(sampler_idx < (int)m_inputSamplers.size()) @@ -54,6 +58,20 @@ void SimpleRenderedVSANode::updateInputTexture(const Port& input, QRhiTexture* t if(pd.main_pass.p.srb) score::gfx::replaceTexture(*pd.main_pass.p.srb, sampl.sampler, tex); } + + if(depthTex + && (input.flags & Flag::SamplableDepth) == Flag::SamplableDepth + && sampler_idx + 1 < (int)m_inputSamplers.size()) + { + auto& depthSampl = m_inputSamplers[sampler_idx + 1]; + if(depthSampl.texture != depthTex) + { + depthSampl.texture = depthTex; + for(auto& pd : m_passes) + if(pd.main_pass.p.srb) + score::gfx::replaceTexture(*pd.main_pass.p.srb, depthSampl.sampler, depthTex); + } + } } } @@ -118,35 +136,90 @@ void SimpleRenderedVSANode::initPass( pubo->setName("SimpleRenderedVSANode::initPass::pubo"); pubo->create(); - // Create the main pass + // Create the main pass. + // Apply cull-mode, front-face, and blend state BEFORE the first create() + // call so we only compile the PSO once instead of the previous two-compile + // pattern (buildPipeline::create + destroy + mutate + create). try { auto [v, s] = score::gfx::makeShaders(renderer.state, n.m_vertexS, n.m_fragmentS); - auto pip = score::gfx::buildPipeline( - renderer, *m_mesh, v, s, renderTarget, pubo, m_materialUBO, allSamplers()); - if(pip.pipeline) + auto* srb = score::gfx::createDefaultBindings( + renderer, renderTarget, pubo, m_materialUBO, allSamplers()); + + // Inline the essential steps of buildPipeline(srb) so we can insert the + // VSA-specific cull/front-face/blend state before create(). + auto* ps = rhi.newGraphicsPipeline(); + SCORE_ASSERT(ps); + ps->setName("SimpleRenderedVSANode::initPass::ps"); + + // VSA blend: simple alpha blend (no premul factors needed here). + QRhiGraphicsPipeline::TargetBlend t{}; + t.enable = true; + ps->setTargetBlends({t}); + + // API-specific cull mode for 3-D VSA meshes. + // + // Note: this is NOT a Y-up vs Y-down NDC issue. QRhi exposes + // QRhi::isYUpInNDC() and QRhi::clipSpaceCorrMatrix() (qrhi.h:2056, + // :2059) so a shader applying clipSpaceCorrMatrix uniformly across + // backends does not need a per-backend cull-flip. Other rendered- + // pipeline nodes (RenderedISFNode, RenderedRawRasterPipelineNode, + // CustomMesh) just use unconditional CullMode::Back. + // + // VSA emits its mesh procedurally (no clipSpaceCorrMatrix applied) + // and its triangle winding ends up CCW under GL's framebuffer-Y + // convention; flipping to CullMode::Front under GL is the workaround + // until VSA's procedural emit applies the corr matrix itself. + switch(renderer.state.api) { - QRhiGraphicsPipeline::TargetBlend t{}; - t.enable = true; - pip.pipeline->destroy(); - switch(renderer.state.api) - { - default: - case GraphicsApi::Vulkan: - pip.pipeline->setCullMode(QRhiGraphicsPipeline::CullMode::Back); - break; - case GraphicsApi::OpenGL: - pip.pipeline->setCullMode(QRhiGraphicsPipeline::CullMode::Front); - break; - } - pip.pipeline->setFrontFace(QRhiGraphicsPipeline::FrontFace::CW); - pip.pipeline->setTargetBlends({t}); - pip.pipeline->create(); + case GraphicsApi::Vulkan: + case GraphicsApi::D3D11: + case GraphicsApi::D3D12: + case GraphicsApi::Metal: + case GraphicsApi::Null: + ps->setCullMode(QRhiGraphicsPipeline::CullMode::Back); + break; + case GraphicsApi::OpenGL: + ps->setCullMode(QRhiGraphicsPipeline::CullMode::Front); + break; + default: + qWarning() << "RenderedVSANode: unhandled graphics API for cull mode; defaulting to Back"; + ps->setCullMode(QRhiGraphicsPipeline::CullMode::Back); + break; + } + ps->setFrontFace(QRhiGraphicsPipeline::FrontFace::CW); + + const int rtS = renderTarget.sampleCount(); + ps->setSampleCount(rtS > 0 ? rtS : renderer.samples()); + + m_mesh->preparePipeline(*ps); + + if(!renderer.anyNodeRequiresDepth()) + { + ps->setDepthTest(false); + ps->setDepthWrite(false); + } + + ps->setShaderStages( + {{QRhiShaderStage::Vertex, v}, {QRhiShaderStage::Fragment, s}}); + ps->setShaderResourceBindings(srb); + SCORE_ASSERT(renderTarget.renderPass); + ps->setRenderPassDescriptor(renderTarget.renderPass); + + Pipeline pip{}; + if(ps->create()) + { + pip = {ps, srb}; m_passes.emplace_back( &edge, Pass{renderTarget, pip, pubo}, bg_pip, bg_srb, bg_ubo, bg_tri); } else + { + qDebug() << "Warning! VSA pipeline not created"; + delete ps; + delete srb; delete pubo; + } } catch(...) { @@ -154,6 +227,14 @@ void SimpleRenderedVSANode::initPass( } void SimpleRenderedVSANode::init(RenderList& renderer, QRhiResourceUpdateBatch& res) +{ + initState(renderer, res); + + for(Edge* edge : n.output[0]->edges) + addOutputPass(renderer, *edge, res); +} + +void SimpleRenderedVSANode::initState(RenderList& renderer, QRhiResourceUpdateBatch& res) { QRhi& rhi = *renderer.state.rhi; @@ -195,6 +276,8 @@ void SimpleRenderedVSANode::init(RenderList& renderer, QRhiResourceUpdateBatch& = rhi.newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, m_materialSize); m_materialUBO->setName("SimpleRenderedVSANode::init::m_materialUBO"); SCORE_ASSERT(m_materialUBO->create()); + if(n.m_material_data) + res.updateDynamicBuffer(m_materialUBO, 0, m_materialSize, n.m_material_data.get()); } // Create the samplers @@ -202,22 +285,53 @@ void SimpleRenderedVSANode::init(RenderList& renderer, QRhiResourceUpdateBatch& SCORE_ASSERT(m_inputSamplers.empty()); SCORE_ASSERT(m_audioSamplers.empty()); - m_inputSamplers = initInputSamplers(this->n, renderer, n.input); + m_inputSamplers = initInputSamplers(this->n, renderer, n.input, &n.descriptor()); m_audioSamplers = initAudioTextures(renderer, n.m_audio_textures); - // Create the passes + m_initialized = true; +} - for(Edge* edge : n.output[0]->edges) +void SimpleRenderedVSANode::addOutputPass( + RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) +{ + auto rt = renderer.renderTargetForOutput(edge); + if(rt.renderTarget) { - auto rt = renderer.renderTargetForOutput(*edge); - if(rt.renderTarget) - { - initPass(rt, renderer, *edge, res); - } + initPass(rt, renderer, edge, res); + } +} + +void SimpleRenderedVSANode::removeOutputPass(RenderList& renderer, Edge& edge) +{ + auto it + = ossia::find_if(m_passes, [&](const auto& p) { return p.edge == &edge; }); + if(it != m_passes.end()) + { + it->main_pass.p.release(); + + if(it->main_pass.processUBO) + it->main_pass.processUBO->deleteLater(); + + it->background_pipeline->destroy(); + it->background_pipeline->deleteLater(); + + it->background_srb->destroy(); + it->background_srb->deleteLater(); + + it->background_ubo->destroy(); + it->background_ubo->deleteLater(); + + m_passes.erase(it); } } +bool SimpleRenderedVSANode::hasOutputPassForEdge(Edge& edge) const +{ + return ossia::find_if(m_passes, [&](const auto& p) { return p.edge == &edge; }) + != m_passes.end(); +} + void SimpleRenderedVSANode::update( RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) { @@ -247,6 +361,7 @@ void SimpleRenderedVSANode::update( } bool audioChanged = false; + std::size_t audio_idx = 0; for(auto& audio : n.m_audio_textures) { if(std::optional sampl @@ -255,13 +370,30 @@ void SimpleRenderedVSANode::update( // Texture changed -> material changed audioChanged = true; - auto& [rhiSampler, tex] = *sampl; + auto& [rhiSampler, tex, fb_] = *sampl; + QRhiTexture* boundTex = tex ? tex : &renderer.emptyTexture(); + + // Keep m_audioSamplers[i].texture in sync with the live GPU texture. + // If a pass is later torn down and rebuilt (e.g. rt_changed path in + // RenderList::render calling removeOutputPass + addOutputPass), + // allSamplers() must hand buildPipeline the current texture so the + // fresh SRB is bound correctly. Without this sync the rebuilt SRB + // would bind &renderer.emptyTexture() (because m_audioSamplers had + // texture=nullptr from initAudioTextures) and no subsequent + // updateAudioTexture would ever re-trigger replaceTexture — the + // post-no-change path returns {} — so the shader would read zero + // for the rest of the session. Observed as 1×1 empty texture in + // RenderDoc after a viewport resize. + if(audio_idx < m_audioSamplers.size()) + m_audioSamplers[audio_idx].texture = tex; + for(auto& pass : m_passes) { score::gfx::replaceTexture( - *pass.main_pass.p.srb, rhiSampler, tex ? tex : &renderer.emptyTexture()); + *pass.main_pass.p.srb, rhiSampler, boundTex); } } + ++audio_idx; } // Update material @@ -270,6 +402,7 @@ void SimpleRenderedVSANode::update( char* data = n.m_material_data.get(); res.updateDynamicBuffer(m_materialUBO, 0, m_materialSize, data); } + materialChanged = false; // Update all the process UBOs for(auto& pass : m_passes) @@ -288,7 +421,15 @@ void SimpleRenderedVSANode::update( void SimpleRenderedVSANode::release(RenderList& r) { - // customRelease + releaseState(r); +} + +void SimpleRenderedVSANode::releaseState(RenderList& r) +{ + if(!m_initialized) + return; + + // Release all remaining passes { for(auto& texture : n.m_audio_textures) { @@ -300,6 +441,8 @@ void SimpleRenderedVSANode::release(RenderList& r) if(tex != &r.emptyTexture()) tex->deleteLater(); } + it->second.texture = nullptr; + it->second = {}; } } @@ -326,13 +469,11 @@ void SimpleRenderedVSANode::release(RenderList& r) for(auto sampler : m_inputSamplers) { delete sampler.sampler; - // texture isdeleted elsewxheree } m_inputSamplers.clear(); for(auto sampler : m_audioSamplers) { delete sampler.sampler; - // texture isdeleted elsewxheree } m_audioSamplers.clear(); @@ -341,6 +482,8 @@ void SimpleRenderedVSANode::release(RenderList& r) delete m_mesh; m_mesh = nullptr; + + m_initialized = false; } void SimpleRenderedVSANode::runInitialPasses( diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedVSANode.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedVSANode.hpp index 64607503fd..09c4dfc9ca 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedVSANode.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RenderedVSANode.hpp @@ -12,12 +12,19 @@ struct SimpleRenderedVSANode : score::gfx::NodeRenderer virtual ~SimpleRenderedVSANode(); - void updateInputTexture(const Port& input, QRhiTexture* tex) override; + void updateInputTexture(const Port& input, QRhiTexture* tex, QRhiTexture* depthTex = nullptr) override; void init(RenderList& renderer, QRhiResourceUpdateBatch& res) override; void update(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) override; void release(RenderList& r) override; + void initState(RenderList& renderer, QRhiResourceUpdateBatch& res) override; + void releaseState(RenderList& renderer) override; + void addOutputPass( + RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) override; + void removeOutputPass(RenderList& renderer, Edge& edge) override; + bool hasOutputPassForEdge(Edge& edge) const override; + void runInitialPasses( RenderList&, QRhiCommandBuffer& commands, QRhiResourceUpdateBatch*& res, Edge& edge) override; diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RhiBufferCopyMetal.mm b/src/plugins/score-plugin-gfx/Gfx/Graph/RhiBufferCopyMetal.mm index 61e288d7cd..587089806d 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/RhiBufferCopyMetal.mm +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RhiBufferCopyMetal.mm @@ -13,6 +13,20 @@ namespace score::gfx { +// Pre-condition: cb must NOT have an active render or compute pass. +// Metal allows only one encoder open on a command buffer at a time; calling +// [MTLCommandBuffer blitCommandEncoder] while a render or compute encoder is +// still open will trigger a Metal internal assertion or silent misbehaviour. +// Call this between cb.endPass() and the next cb.beginPass(). +// +// Hazard tracking: Metal's default MTLHazardTrackingModeTracked automatically +// inserts a dependency between this blit encoder and any subsequent encoder on +// the same command buffer that accesses the same buffer. No explicit MTLFence +// or MTLBarrier is required for tracked resources. +// +// Note: QRhi's own QRhiResourceUpdateBatch::copyBuffer enforces the +// no-active-pass contract internally. This native-handle path bypasses that +// check, so the caller is responsible for ensuring no encoder is open. void copyBufferMetal( QRhi& rhi, QRhiCommandBuffer& cb, QRhiBuffer* src, QRhiBuffer* dst, int size, @@ -52,6 +66,54 @@ void copyBufferMetal( [blit endEncoding]; } +// Pre-condition: cb must NOT have an active render or compute pass. +// Same contract as copyBufferMetal above: only one encoder may be open on a +// MTLCommandBuffer at a time. Caller is responsible for ensuring no render or +// compute encoder is currently open before calling this function. +// +// Metal's default hazard tracking inserts the required memory dependency +// between this blit and subsequent encoders on the same command buffer that +// read the destination buffer; no explicit fence is needed. +void copyBufferRegionsMetal( + QRhi& rhi, QRhiCommandBuffer& cb, + QRhiBuffer* src, QRhiBuffer* dst, + const BufferCopyRegion* regions, int count) +{ + if(!src || !dst || !regions || count <= 0) + return; + + const auto* handles + = static_cast(cb.nativeHandles()); + if(!handles || !handles->commandBuffer) + return; + + auto srcNative = src->nativeBuffer(); + auto dstNative = dst->nativeBuffer(); + if(!srcNative.objects[0] || !dstNative.objects[0]) + return; + + id cmdBuf = (id)handles->commandBuffer; + void* const* srcSlot = static_cast(srcNative.objects[0]); + void* const* dstSlot = static_cast(dstNative.objects[0]); + id srcBuf = (__bridge id) (*srcSlot); + id dstBuf = (__bridge id) (*dstSlot); + if(!srcBuf || !dstBuf) + return; + + // One blit encoder, N copyFromBuffer calls. Amortizes encoder + // creation/teardown and any implicit GPU state transitions. + id blit = [cmdBuf blitCommandEncoder]; + for(int i = 0; i < count; ++i) + { + [blit copyFromBuffer:srcBuf + sourceOffset:(NSUInteger)regions[i].src_offset + toBuffer:dstBuf + destinationOffset:(NSUInteger)regions[i].dst_offset + size:(NSUInteger)regions[i].size]; + } + [blit endEncoding]; +} + } #else @@ -64,6 +126,12 @@ void copyBufferMetal( QRhiBuffer*, QRhiBuffer*, int, int, int) { } +void copyBufferRegionsMetal( + QRhi&, QRhiCommandBuffer&, + QRhiBuffer*, QRhiBuffer*, + const BufferCopyRegion*, int) +{ +} } #endif diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RhiClearBuffer.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RhiClearBuffer.cpp new file mode 100644 index 0000000000..dc9bb99129 --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RhiClearBuffer.cpp @@ -0,0 +1,261 @@ +#include + +#include + +#include + +// Vulkan +#if QT_HAS_VULKAN || (QT_CONFIG(vulkan) && __has_include()) +#include +#if __has_include() +#include +#else +#include +#endif +#include +#define SCORE_HAS_VULKAN 1 +#endif + +#include +#include +#include + +// On non-Apple, provide a no-op stub for clearBufferMetal +// (the real implementation lives in RhiClearBufferMetal.mm) +#if !defined(Q_OS_MACOS) && !defined(Q_OS_IOS) +namespace score::gfx +{ +bool clearBufferMetal( + QRhi&, QRhiCommandBuffer&, QRhiBuffer*, quint32, quint32, quint32) +{ + return false; +} +} +#endif + +namespace score::gfx +{ +namespace +{ + +// Thread-local zero-buffer pool. Amortises the std::vector(N, 0) +// allocation across every clearBuffer call site — at steady state the +// vector grows once to the max requested size and is reused for every +// subsequent call, so the per-call cost is just a memset of the +// requested range (already zero, so the access is touched-page free +// for the prefix that survived the last clear). +// +// Pattern != 0 hits a side path that materialises the requested +// 4-byte pattern into a separate vector. The default-pattern (0) path +// is the one every current call site uses. +const char* getZeroBuffer(quint32 size) +{ + thread_local std::vector zero_pool; + if(zero_pool.size() < size) + zero_pool.assign(size, 0); + return zero_pool.data(); +} + +// Pattern path — used when pattern != 0. Replicates the 4-byte pattern +// across the requested size. The buffer is sticky per-thread so a hot +// pattern (e.g. 0xFFFFFFFF for "invalid slot" sentinels) reuses the +// same memory. Switching patterns rewrites the buffer. +const char* getPatternBuffer(quint32 size, quint32 pattern) +{ + thread_local std::vector pattern_pool; + thread_local quint32 last_pattern = 0u; + thread_local quint32 last_filled = 0u; + const bool grow = pattern_pool.size() < size; + if(grow) + pattern_pool.resize(size); + if(grow || last_pattern != pattern || last_filled < size) + { + auto* p = pattern_pool.data(); + const quint32 n = size / 4u; + for(quint32 i = 0; i < n; ++i) + std::memcpy(p + i * 4u, &pattern, 4u); + // Tail bytes (size not 4-aligned). vkCmdFillBuffer requires + // 4-aligned size so this only matters for the batch fallback. + const quint32 tail = size - n * 4u; + if(tail) + std::memcpy(p + n * 4u, &pattern, tail); + last_pattern = pattern; + last_filled = size; + } + return pattern_pool.data(); +} + +const char* getSourceBytes(quint32 size, quint32 pattern) +{ + return pattern == 0u ? getZeroBuffer(size) : getPatternBuffer(size, pattern); +} + +// Route a clear into a QRhiResourceUpdateBatch the way QRhi expects: +// uploadStaticBuffer for Static, updateDynamicBuffer for Dynamic UBOs +// (chunked at 65535 bytes — QRhi's documented maximum per call for +// the host-coherent path). +void clearViaBatch( + QRhiResourceUpdateBatch& batch, QRhiBuffer* buf, + quint32 offset, quint32 size, quint32 pattern) +{ + if(!buf || size == 0) + return; + const char* src = getSourceBytes(size, pattern); + if(buf->type() == QRhiBuffer::Dynamic) + { + quint32 off = 0; + while(off < size) + { + const quint32 chunk = std::min(size - off, 65535u); + batch.updateDynamicBuffer(buf, offset + off, chunk, src + off); + off += chunk; + } + } + else + { + batch.uploadStaticBuffer(buf, offset, size, src); + } +} + +} // namespace + +// Returns true on success (native path took it), false to request the +// shared fallback. Backend-specific helper to keep clearBuffer() free +// of forward-flow control hazards. +static bool clearBufferNative( + QRhi& rhi, + QRhiCommandBuffer& cb, + QRhiBuffer* buf, + quint32 offset, + quint32 size, + quint32 pattern) +{ + switch(rhi.backend()) + { +#if SCORE_HAS_VULKAN + case QRhi::Vulkan: { + // vkCmdFillBuffer is only legal on buffers with + // VK_BUFFER_USAGE_TRANSFER_DST_BIT. QRhi's QVkBuffer::create adds + // that bit only for non-Dynamic buffers (see qrhivulkan.cpp ~line + // 7212). Dynamic UBOs would trip the validation layer if we + // called vkCmdFillBuffer on them — fall back to the deferred + // path. (In practice none of the current call sites pass a + // Dynamic buffer through the CB variant; this is defence in + // depth.) + if(buf->type() == QRhiBuffer::Dynamic) + return false; + + auto* inst = score::gfx::staticVulkanInstance(); + if(!inst) + return false; + + auto fn = reinterpret_cast( + inst->getInstanceProcAddr("vkCmdFillBuffer")); + if(!fn) + return false; + + auto* native + = static_cast(cb.nativeHandles()); + if(!native || !native->commandBuffer) + return false; + + auto bufNative = buf->nativeBuffer(); + if(!bufNative.objects[0]) + return false; + + // QRhi NativeBuffer convention (Vulkan): objects[i] is `VkBuffer *`, + // i.e. a POINTER TO the handle. Dereference to obtain the actual + // VkBuffer. See the long comment in RhiComputeBarrier.cpp's copyBuffer + // for the per-backend convention table. + VkBuffer vkbuf = *static_cast(bufNative.objects[0]); + if(vkbuf == VK_NULL_HANDLE) + return false; + + cb.beginExternal(); + // vkCmdFillBuffer bypasses QRhi's resource tracking, so we must emit the + // same compute→transfer→compute/vertex/indirect barriers the copyBuffer + // path uses. Without the pre-barrier a prior compute write may not be + // visible to the fill; without the post-barrier a subsequent draw/compute + // read may race the fill. beginBufferCopyBarrier/endBufferCopyBarrier are + // designed to run inside an existing beginExternal/endExternal bracket + // (they record vkCmdPipelineBarrier directly), which is exactly here. + beginBufferCopyBarrier(rhi, cb); + // vkCmdFillBuffer signature: (cb, buffer, offset, size, data). + // - offset and size MUST be multiples of 4. Caller is required to + // honour this; we don't silently round here because doing so + // would clear bytes the caller didn't request. + // - data is a uint32_t replicated across the range (exactly the + // contract the abstraction exposes via @p pattern). + // - The buffer must NOT be in a render pass; this path is + // intended for resource setup / runInitialPasses-style sites + // that have a CB but no active pass. + fn(native->commandBuffer, vkbuf, + static_cast(offset), + static_cast(size), + pattern); + endBufferCopyBarrier(rhi, cb); + cb.endExternal(); + return true; + } +#endif + + case QRhi::Metal: + return clearBufferMetal(rhi, cb, buf, offset, size, pattern); + +#if QT_VERSION >= QT_VERSION_CHECK(6, 6, 0) + case QRhi::D3D12: +#endif + case QRhi::D3D11: + case QRhi::OpenGLES2: + default: + // No native fast path wired yet. + return false; + } +} + +void RhiClearBuffer::clearBuffer( + QRhi& rhi, + QRhiCommandBuffer& cb, + QRhiBuffer* buf, + quint32 offset, + quint32 size, + quint32 pattern) +{ + if(!buf || size == 0) + return; + + if(clearBufferNative(rhi, cb, buf, offset, size, pattern)) + return; + + // No native path available. Allocate a one-shot QRhiResourceUpdateBatch + // and submit it to the rhi via the standard route. We deliberately do + // NOT borrow the caller's batch here (the caller doesn't have one in + // scope by definition — they passed us a CB). The cost: one batch + // allocation + queue insertion. Still much cheaper than a per-call + // std::vector(size, 0) allocation thanks to the zero pool. + if(auto* batch = rhi.nextResourceUpdateBatch()) + { + clearViaBatch(*batch, buf, offset, size, pattern); + cb.resourceUpdate(batch); + } +} + +void RhiClearBuffer::clearBuffer( + QRhi& rhi, + QRhiResourceUpdateBatch& batch, + QRhiBuffer* buf, + quint32 offset, + quint32 size, + quint32 pattern) +{ + // Backend is not relevant here — every backend's update batch is a + // straight CPU→GPU upload, so the only thing the abstraction buys us + // is the zero pool (eliminating the per-call vector allocation that + // motivated this whole exercise). A future revision could record a + // pending native fill and apply it in the next CB-recording op, but + // that's a deeper refactor than the current bug warrants. + (void)rhi; + clearViaBatch(batch, buf, offset, size, pattern); +} + +} // namespace score::gfx diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RhiClearBuffer.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RhiClearBuffer.hpp new file mode 100644 index 0000000000..a3a56d6bf9 --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RhiClearBuffer.hpp @@ -0,0 +1,103 @@ +#pragma once +#include + +#include + +class QRhi; +class QRhiBuffer; +class QRhiCommandBuffer; +class QRhiResourceUpdateBatch; + +namespace score::gfx +{ + +/** + * @brief Fill (a sub-range of) a QRhiBuffer with a 4-byte pattern. + * + * Replaces the wasteful `std::vector zeros(size, 0); res.uploadStaticBuffer(buf, 0, size, zeros.data());` + * idiom that pays a per-call zero-vector allocation + a CPU→GPU upload of + * zero bytes. The new entry points either issue a native GPU-side fill + * (vkCmdFillBuffer / MTLBlitCommandEncoder fillBuffer:range:value:) or + * route to QRhi's update batch with a thread-local zero-buffer pool so + * the zero source bytes are amortised across calls. + * + * The motivating bug: Vulkan does NOT initialise VkBuffer memory — the + * underlying device-memory page contains whatever was there before. For + * sparse-uploaded SSBOs (RawLight arena, world_transforms, per_draws past + * drawCount, …), the un-touched bytes get read by shaders and feed + * garbage into the pipeline. Manifests as "wildly different lighting per + * resize" because each fresh VkBuffer lands on a different page. The + * defensive zero-fill via uploadStaticBuffer ships zeros from CPU to GPU + * — correct but slow; this abstraction picks the right native path. + * + * Per-backend behaviour: + * - Vulkan : vkCmdFillBuffer (CB variant) — Static buffers only, since + * QRhi's setupBuffer adds VK_BUFFER_USAGE_TRANSFER_DST_BIT + * only when m_type != Dynamic. Dynamic UBOs fall back to the + * update batch path. (See qrhivulkan.cpp QVkBuffer::create.) + * - Metal : id fillBuffer:range:value: (CB variant) + * - D3D12 : currently falls back to the update batch (a future + * optimisation can use ClearUnorderedAccessViewUint or a + * thread-local zero-resource + CopyBufferRegion). + * - D3D11 : fall back to the update batch. + * - GL/GLES: fall back to the update batch (drivers commonly zero + * initialised buffer memory anyway, and GL exposes + * glClearBufferSubData on 4.3+ which we don't currently wire). + * + * Both variants accept an arbitrary 4-byte @p pattern (replicated across + * the requested range). Default is 0 — the only pattern any current call + * site uses. @p offset and @p size MUST be 4-byte aligned (Vulkan + * vkCmdFillBuffer requires it; the batch fallback is permissive but the + * abstraction enforces the strict contract for portability). + */ +namespace RhiClearBuffer +{ + +/// CB-recording variant. Uses native fast paths inside +/// beginExternal()/endExternal() per QRhi convention. Falls back to +/// recording a host-side memset uploaded via a temporary update batch +/// when no native path is available — but the batch variant is the +/// preferred entry point for sites that aren't already inside a render +/// pass and have only a QRhiResourceUpdateBatch in scope. +SCORE_PLUGIN_GFX_EXPORT +void clearBuffer( + QRhi& rhi, + QRhiCommandBuffer& cb, + QRhiBuffer* buf, + quint32 offset, + quint32 size, + quint32 pattern = 0u); + +/// Update-batch variant. Routes to QRhi's uploadStaticBuffer (Static +/// buffers) or updateDynamicBuffer (Dynamic UBOs) using a thread-local +/// zero-buffer pool — no per-call zero-vector allocation. This is the +/// drop-in replacement for the existing +/// `std::vector zeros(size, 0); batch.uploadStaticBuffer(...)` +/// pattern. +/// +/// @p pattern other than 0 will allocate a small thread-local pattern +/// buffer for the call (uncommon path); 0 hits the fast pool. +SCORE_PLUGIN_GFX_EXPORT +void clearBuffer( + QRhi& rhi, + QRhiResourceUpdateBatch& batch, + QRhiBuffer* buf, + quint32 offset, + quint32 size, + quint32 pattern = 0u); + +} // namespace RhiClearBuffer + +// Metal-specific implementation hook (lives in RhiClearBufferMetal.mm). +// On non-Apple platforms a no-op stub is provided in RhiClearBuffer.cpp. +// Returns true on success, false if the native path is unavailable +// (caller should fall back to the batch variant). +bool clearBufferMetal( + QRhi& rhi, + QRhiCommandBuffer& cb, + QRhiBuffer* buf, + quint32 offset, + quint32 size, + quint32 pattern); + +} // namespace score::gfx diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RhiClearBufferMetal.mm b/src/plugins/score-plugin-gfx/Gfx/Graph/RhiClearBufferMetal.mm new file mode 100644 index 0000000000..05c44b5eb9 --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RhiClearBufferMetal.mm @@ -0,0 +1,87 @@ +#include + +#include + +#if __has_include() +#include +#if __has_include() +#include +#else +#include +#endif + +namespace score::gfx +{ + +// Pre-condition: cb must NOT have an active render or compute pass — +// same contract as copyBufferMetal in RhiBufferCopyMetal.mm. Metal allows +// only one encoder open on a command buffer at a time; opening a blit +// encoder while a render/compute encoder is live triggers an internal +// assertion or silent misbehaviour. +// +// Hazard tracking: the default MTLHazardTrackingModeTracked inserts a +// dependency between this blit encoder and any subsequent encoder on +// the same command buffer that touches the same buffer, so no explicit +// MTLFence / MTLBarrier is needed. +// +// fillBuffer:range:value: takes a single byte value (uint8_t), not a +// 4-byte word. We map 4-byte patterns to a Metal fill ONLY when all +// four bytes are equal — the common case (pattern == 0 or pattern == +// 0xFFFFFFFF). For arbitrary patterns Metal would need a manual +// stage-via-MTLBuffer + copyFromBuffer; we return false and let the +// caller fall back to QRhi's update batch, which is the right vehicle +// for general-purpose host writes anyway. +bool clearBufferMetal( + QRhi& rhi, + QRhiCommandBuffer& cb, + QRhiBuffer* buf, + quint32 offset, + quint32 size, + quint32 pattern) +{ + (void)rhi; + if(!buf || size == 0) + return false; + + const uint8_t b0 = static_cast(pattern & 0xFFu); + const uint8_t b1 = static_cast((pattern >> 8) & 0xFFu); + const uint8_t b2 = static_cast((pattern >> 16) & 0xFFu); + const uint8_t b3 = static_cast((pattern >> 24) & 0xFFu); + // fillBuffer: takes a single uint8_t. Refuse non-uniform-byte patterns. + if(b0 != b1 || b0 != b2 || b0 != b3) + return false; + + const auto* handles + = static_cast(cb.nativeHandles()); + if(!handles || !handles->commandBuffer) + return false; + + auto bufNative = buf->nativeBuffer(); + if(!bufNative.objects[0]) + return false; + + id cmdBuf = (id)handles->commandBuffer; + // QRhi NativeBuffer convention (Metal): objects[i] is `id *`, + // i.e. a POINTER TO the handle. Dereference once to obtain the handle. + // For Dynamic buffers QRhi presents N slots; the CB variant doesn't + // currently target Dynamic buffers (they fall back to the batch path) + // but if it ever does we'd want to clear all slots — same as Vulkan's + // Dynamic guard in RhiClearBuffer.cpp. + void* const* slot = static_cast(bufNative.objects[0]); + id mtlBuf = (__bridge id)(*slot); + if(!mtlBuf) + return false; + + cb.beginExternal(); + id blit = [cmdBuf blitCommandEncoder]; + [blit fillBuffer:mtlBuf + range:NSMakeRange((NSUInteger)offset, (NSUInteger)size) + value:b0]; + [blit endEncoding]; + cb.endExternal(); + return true; +} + +} // namespace score::gfx + +#endif diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RhiComputeBarrier.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RhiComputeBarrier.cpp index 45fca44847..2b21a65f60 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/RhiComputeBarrier.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RhiComputeBarrier.cpp @@ -14,6 +14,11 @@ void copyBufferMetal( QRhi&, QRhiCommandBuffer&, QRhiBuffer*, QRhiBuffer*, int, int, int) { } +void copyBufferRegionsMetal( + QRhi&, QRhiCommandBuffer&, QRhiBuffer*, QRhiBuffer*, + const BufferCopyRegion*, int) +{ +} } #endif @@ -54,8 +59,11 @@ void copyBufferMetal( // D3D12 / D3D11 #if defined(Q_OS_WIN) +// clang-format off +#include #include #include +// clang-format on #if __has_include() #include #endif @@ -111,7 +119,11 @@ void insertComputeBarrier(QRhi& rhi, QRhiCommandBuffer& cb) } #endif -#if SCORE_HAS_D3D +// The QRhi::D3D12 enum value and QRhiD3D12CommandBufferNativeHandles (declared +// in qrhi_platform.h) only exist from Qt 6.6 onward — guard the whole case so +// it doesn't break the Win build on Qt < 6.6. (RhiClearBuffer.cpp guards its +// D3D12 case the same way.) +#if SCORE_HAS_D3D && QT_VERSION >= QT_VERSION_CHECK(6, 6, 0) case QRhi::D3D12: { auto* native = static_cast(cb.nativeHandles()); @@ -142,14 +154,97 @@ void insertComputeBarrier(QRhi& rhi, QRhiCommandBuffer& cb) } } +void beginBufferCopyBarrier(QRhi& rhi, QRhiCommandBuffer& cb) +{ + switch(rhi.backend()) + { +#if SCORE_HAS_VULKAN + case QRhi::Vulkan: { + auto* inst = score::gfx::staticVulkanInstance(); + if(!inst) + break; + auto barrierFn = reinterpret_cast( + inst->getInstanceProcAddr("vkCmdPipelineBarrier")); + if(!barrierFn) + break; + auto* native + = static_cast(cb.nativeHandles()); + if(!native || !native->commandBuffer) + break; + VkMemoryBarrier pre{}; + pre.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + pre.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + pre.dstAccessMask + = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT; + barrierFn(native->commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1, &pre, 0, nullptr, 0, nullptr); + break; + } +#endif + default: + // D3D11, D3D12, OpenGL, Metal: no explicit pre-barrier needed or + // handled by the backend when the encoder transitions. + break; + } +} + +void endBufferCopyBarrier(QRhi& rhi, QRhiCommandBuffer& cb) +{ + switch(rhi.backend()) + { +#if SCORE_HAS_VULKAN + case QRhi::Vulkan: { + auto* inst = score::gfx::staticVulkanInstance(); + if(!inst) + break; + auto barrierFn = reinterpret_cast( + inst->getInstanceProcAddr("vkCmdPipelineBarrier")); + if(!barrierFn) + break; + auto* native + = static_cast(cb.nativeHandles()); + if(!native || !native->commandBuffer) + break; + VkMemoryBarrier post{}; + post.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + post.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + post.dstAccessMask + = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT + | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT + | VK_ACCESS_INDEX_READ_BIT + | VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + barrierFn(native->commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT + | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT + | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, + 0, 1, &post, 0, nullptr, 0, nullptr); + break; + } +#endif + default: + break; + } +} + void copyBuffer( QRhi& rhi, QRhiCommandBuffer& cb, QRhiBuffer* src, QRhiBuffer* dst, int size, - int srcOffset, int dstOffset) + int srcOffset, int dstOffset, + BufferCopyBarrier barrier) { if(!src || !dst || size <= 0 || srcOffset < 0 || dstOffset < 0) return; + // Dynamic buffers rotate over 2-3 backing slots per frame, but every + // backend's nativeBuffer().objects[0] only exposes slot 0 — copying that + // slot would hit a stale/wrong frame's data. The compute/MDI callers of + // these helpers all use Static/Immutable storage buffers; bail on Dynamic + // as defence-in-depth, matching clearBufferNative()'s Dynamic bail. + if(src->type() == QRhiBuffer::Dynamic || dst->type() == QRhiBuffer::Dynamic) + return; + + const bool emit_barriers = (barrier == BufferCopyBarrier::Auto); + switch(rhi.backend()) { #if SCORE_HAS_VULKAN @@ -185,10 +280,11 @@ void copyBuffer( if(srcBuf == VK_NULL_HANDLE || dstBuf == VK_NULL_HANDLE) break; - // Barrier: compute write → transfer read/write + // Barrier: compute write → transfer read/write. Skipped when the + // caller batches multiple copies inside explicit begin/endBufferCopyBarrier. auto barrierFn = reinterpret_cast( inst->getInstanceProcAddr("vkCmdPipelineBarrier")); - if(barrierFn) + if(emit_barriers && barrierFn) { VkMemoryBarrier pre{}; pre.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; @@ -205,15 +301,22 @@ void copyBuffer( fn(native->commandBuffer, srcBuf, dstBuf, 1, ®ion); - // Barrier: transfer write → compute read - if(barrierFn) + // Barrier: transfer write → compute/vertex read + if(emit_barriers && barrierFn) { VkMemoryBarrier post{}; post.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; post.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - post.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + post.dstAccessMask + = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT + | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT + | VK_ACCESS_INDEX_READ_BIT + | VK_ACCESS_INDIRECT_COMMAND_READ_BIT; barrierFn(native->commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 1, &post, 0, nullptr, 0, nullptr); + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT + | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT + | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, + 0, 1, &post, 0, nullptr, 0, nullptr); } break; } @@ -255,7 +358,7 @@ void copyBuffer( } #endif -#if SCORE_HAS_D3D +#if SCORE_HAS_D3D && QT_VERSION >= QT_VERSION_CHECK(6, 6, 0) case QRhi::D3D12: { auto* native = static_cast(cb.nativeHandles()); @@ -269,22 +372,77 @@ void copyBuffer( if(!srcNative.objects[0] || !dstNative.objects[0]) break; - // objects[0] is an `ID3D12Resource * *`, i.e. a pointer to the - // resource pointer slot. Same convention as Vulkan -- see the long - // comment in the Vulkan branch above. - auto* srcRes - = *static_cast(srcNative.objects[0]); - auto* dstRes - = *static_cast(dstNative.objects[0]); + // D3D12 is the ODD ONE OUT in QRhi: unlike Vulkan/Metal/D3D11/GL + // which store `&native_handle` (one extra indirection), the D3D12 + // backend stores `res->resource` directly — i.e. + // `objects[0]` IS the `ID3D12Resource *`, NOT a pointer to it. See + // QD3D12Buffer::nativeBuffer in qrhid3d12.cpp: + // b.objects[0] = res->resource; // ID3D12Resource * + // vs. Vulkan/Metal: + // b.objects[i] = &buffers[i]; // VkBuffer * / id * + // vs. D3D11: + // return { { &buffer }, 1 }; // ID3D11Buffer * * + // Dereferencing here as `**` would treat the COM vtable pointer as + // an `ID3D12Resource *` and hand garbage to CopyBufferRegion, which + // the D3D12 debug layer flags as + // "CORRUPTION: First parameter is corrupt — CORRUPTED_PARAMETER1". + // const_cast: NativeBuffer::objects is `const void *` (Qt's const- + // correct getter signal that the *array* is const for inspection), + // but CopyBufferRegion needs a non-const ID3D12Resource* — and the + // underlying resource is genuinely mutable (it is the GPU buffer + // we are about to write to). + auto* srcRes = static_cast( + const_cast(srcNative.objects[0])); + auto* dstRes = static_cast( + const_cast(dstNative.objects[0])); if(!srcRes || !dstRes) break; + // D3D12 has explicit resource states (unlike Vulkan's access masks the + // backend handles for tracked resources). The buffers are written by a + // compute pass as UAVs, so transition src→COPY_SOURCE and dst→COPY_DEST + // before CopyBufferRegion, then back to UNORDERED_ACCESS so subsequent + // compute/draw reads see the data. Mirrors the Vulkan compute→transfer→ + // compute barrier intent and is gated on emit_barriers the same way. + const auto transition + = [cmdList]( + ID3D12Resource* res, D3D12_RESOURCE_STATES before, + D3D12_RESOURCE_STATES after) { + D3D12_RESOURCE_BARRIER b{}; + b.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + b.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + b.Transition.pResource = res; + b.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + b.Transition.StateBefore = before; + b.Transition.StateAfter = after; + cmdList->ResourceBarrier(1, &b); + }; + if(emit_barriers) + { + transition( + srcRes, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + D3D12_RESOURCE_STATE_COPY_SOURCE); + transition( + dstRes, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + D3D12_RESOURCE_STATE_COPY_DEST); + } + cmdList->CopyBufferRegion( dstRes, static_cast(dstOffset), srcRes, static_cast(srcOffset), static_cast(size)); + + if(emit_barriers) + { + transition( + srcRes, D3D12_RESOURCE_STATE_COPY_SOURCE, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + transition( + dstRes, D3D12_RESOURCE_STATE_COPY_DEST, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + } break; } #endif @@ -334,4 +492,243 @@ void copyBuffer( } } +void copyBufferRegions( + QRhi& rhi, QRhiCommandBuffer& cb, + QRhiBuffer* src, QRhiBuffer* dst, + const BufferCopyRegion* regions, int count, + BufferCopyBarrier barrier) +{ + if(!src || !dst || !regions || count <= 0) + return; + + // See copyBuffer(): Dynamic buffers expose only slot 0 via objects[0], so a + // native copy would read/write the wrong frame slot. Bail like + // clearBufferNative() does. + if(src->type() == QRhiBuffer::Dynamic || dst->type() == QRhiBuffer::Dynamic) + return; + + const bool emit_barriers = (barrier == BufferCopyBarrier::Auto); + + switch(rhi.backend()) + { +#if SCORE_HAS_VULKAN + case QRhi::Vulkan: { + auto* inst = score::gfx::staticVulkanInstance(); + if(!inst) + break; + auto fn = reinterpret_cast( + inst->getInstanceProcAddr("vkCmdCopyBuffer")); + if(!fn) + break; + auto* native + = static_cast(cb.nativeHandles()); + if(!native || !native->commandBuffer) + break; + + auto srcNative = src->nativeBuffer(); + auto dstNative = dst->nativeBuffer(); + if(!srcNative.objects[0] || !dstNative.objects[0]) + break; + VkBuffer srcBuf = *static_cast(srcNative.objects[0]); + VkBuffer dstBuf = *static_cast(dstNative.objects[0]); + if(srcBuf == VK_NULL_HANDLE || dstBuf == VK_NULL_HANDLE) + break; + + auto barrierFn = reinterpret_cast( + inst->getInstanceProcAddr("vkCmdPipelineBarrier")); + if(emit_barriers && barrierFn) + { + VkMemoryBarrier pre{}; + pre.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + pre.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + pre.dstAccessMask + = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT; + barrierFn(native->commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1, &pre, 0, nullptr, 0, nullptr); + } + + // Build region array once and issue a single vkCmdCopyBuffer. + // Small-stack path for the common ≤1024 vertex case; heap fallback + // for larger point clouds. + constexpr int kStackMax = 1024; + VkBufferCopy stack_regions[kStackMax]; + std::vector heap_regions; + VkBufferCopy* vk_regions; + if(count <= kStackMax) + { + vk_regions = stack_regions; + } + else + { + heap_regions.resize(count); + vk_regions = heap_regions.data(); + } + for(int i = 0; i < count; ++i) + { + vk_regions[i].srcOffset = static_cast(regions[i].src_offset); + vk_regions[i].dstOffset = static_cast(regions[i].dst_offset); + vk_regions[i].size = static_cast(regions[i].size); + } + fn(native->commandBuffer, srcBuf, dstBuf, (uint32_t)count, vk_regions); + + if(emit_barriers && barrierFn) + { + VkMemoryBarrier post{}; + post.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + post.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + post.dstAccessMask + = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT + | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT + | VK_ACCESS_INDEX_READ_BIT + | VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + barrierFn(native->commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT + | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT + | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, + 0, 1, &post, 0, nullptr, 0, nullptr); + } + break; + } +#endif + +#if SCORE_HAS_GL + case QRhi::OpenGLES2: { + auto* native = static_cast(rhi.nativeHandles()); + if(!native || !native->context) + break; + auto* f = native->context->extraFunctions(); + if(!f) + break; + auto srcNative = src->nativeBuffer(); + auto dstNative = dst->nativeBuffer(); + if(!srcNative.objects[0] || !dstNative.objects[0]) + break; + GLuint srcId = *static_cast(srcNative.objects[0]); + GLuint dstId = *static_cast(dstNative.objects[0]); + if(srcId == 0 || dstId == 0) + break; + auto* gl = native->context->functions(); + gl->glBindBuffer(GL_COPY_READ_BUFFER, srcId); + gl->glBindBuffer(GL_COPY_WRITE_BUFFER, dstId); + for(int i = 0; i < count; ++i) + { + f->glCopyBufferSubData( + GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER, + static_cast(regions[i].src_offset), + static_cast(regions[i].dst_offset), + static_cast(regions[i].size)); + } + gl->glBindBuffer(GL_COPY_READ_BUFFER, 0); + gl->glBindBuffer(GL_COPY_WRITE_BUFFER, 0); + break; + } +#endif + +#if SCORE_HAS_D3D && QT_VERSION >= QT_VERSION_CHECK(6, 6, 0) + case QRhi::D3D12: { + auto* native + = static_cast(cb.nativeHandles()); + if(!native || !native->commandList) + break; + auto* cmdList = static_cast(native->commandList); + auto srcNative = src->nativeBuffer(); + auto dstNative = dst->nativeBuffer(); + if(!srcNative.objects[0] || !dstNative.objects[0]) + break; + // D3D12 stores the raw ID3D12Resource* directly (no extra + // indirection). See the long comment in copyBuffer's D3D12 branch + // above for the Qt-source-level details. + auto* srcRes = static_cast( + const_cast(srcNative.objects[0])); + auto* dstRes = static_cast( + const_cast(dstNative.objects[0])); + if(!srcRes || !dstRes) + break; + + // UAV(compute-write) → COPY_SOURCE/COPY_DEST around the copies, then + // back to UAV. One transition pair brackets all regions (same src/dst). + // See the matching comment in copyBuffer's D3D12 branch. + const auto transition + = [cmdList]( + ID3D12Resource* res, D3D12_RESOURCE_STATES before, + D3D12_RESOURCE_STATES after) { + D3D12_RESOURCE_BARRIER b{}; + b.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + b.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + b.Transition.pResource = res; + b.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + b.Transition.StateBefore = before; + b.Transition.StateAfter = after; + cmdList->ResourceBarrier(1, &b); + }; + if(emit_barriers) + { + transition( + srcRes, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + D3D12_RESOURCE_STATE_COPY_SOURCE); + transition( + dstRes, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + D3D12_RESOURCE_STATE_COPY_DEST); + } + + for(int i = 0; i < count; ++i) + { + cmdList->CopyBufferRegion( + dstRes, static_cast(regions[i].dst_offset), + srcRes, static_cast(regions[i].src_offset), + static_cast(regions[i].size)); + } + + if(emit_barriers) + { + transition( + srcRes, D3D12_RESOURCE_STATE_COPY_SOURCE, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + transition( + dstRes, D3D12_RESOURCE_STATE_COPY_DEST, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + } + break; + } +#endif + + case QRhi::D3D11: { +#if SCORE_HAS_D3D + auto* native = static_cast(rhi.nativeHandles()); + if(!native || !native->context) + break; + auto srcNative = src->nativeBuffer(); + auto dstNative = dst->nativeBuffer(); + if(!srcNative.objects[0] || !dstNative.objects[0]) + break; + auto* ctx = static_cast(native->context); + auto* srcBuf + = *static_cast(srcNative.objects[0]); + auto* dstBuf + = *static_cast(dstNative.objects[0]); + if(!srcBuf || !dstBuf) + break; + for(int i = 0; i < count; ++i) + { + D3D11_BOX box{}; + box.left = static_cast(regions[i].src_offset); + box.right = static_cast(regions[i].src_offset + regions[i].size); + box.top = 0; box.bottom = 1; box.front = 0; box.back = 1; + ctx->CopySubresourceRegion( + dstBuf, 0, static_cast(regions[i].dst_offset), 0, 0, + srcBuf, 0, &box); + } +#endif + break; + } + + case QRhi::Metal: + copyBufferRegionsMetal(rhi, cb, src, dst, regions, count); + break; + + default: + break; + } +} + } diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/RhiComputeBarrier.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/RhiComputeBarrier.hpp index f7e4b41a96..02cb4ac16d 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/RhiComputeBarrier.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/RhiComputeBarrier.hpp @@ -42,15 +42,76 @@ void insertComputeBarrier(QRhi& rhi, QRhiCommandBuffer& cb); * - D3D11 : CopySubresourceRegion (offsets supported via D3D11_BOX) * - Metal : MTLBlitCommandEncoder copyFromBuffer */ +// Controls whether the copy helpers emit their own pre/post pipeline +// barriers. Default: Auto (each call emits a compute→transfer + +// transfer→compute pair). Use `None` when you are batching N calls +// inside explicit beginBufferCopyBarrier / endBufferCopyBarrier brackets +// to avoid N−1 redundant pipeline stalls. +enum class BufferCopyBarrier +{ + Auto, + None +}; + +/// Emit the compute→transfer barrier that must precede a buffer copy +/// consuming data written by a compute shader. Pair with +/// endBufferCopyBarrier(). No-op on backends that handle the transition +/// implicitly (D3D11, Metal). +SCORE_PLUGIN_GFX_EXPORT +void beginBufferCopyBarrier(QRhi& rhi, QRhiCommandBuffer& cb); + +/// Emit the transfer→compute barrier after a batch of buffer copies so +/// downstream compute/graphics reads observe the writes. +SCORE_PLUGIN_GFX_EXPORT +void endBufferCopyBarrier(QRhi& rhi, QRhiCommandBuffer& cb); + SCORE_PLUGIN_GFX_EXPORT void copyBuffer( QRhi& rhi, QRhiCommandBuffer& cb, QRhiBuffer* src, QRhiBuffer* dst, int size, - int srcOffset = 0, int dstOffset = 0); + int srcOffset = 0, int dstOffset = 0, + BufferCopyBarrier barrier = BufferCopyBarrier::Auto); // Metal-specific implementation (defined in RhiBufferCopyMetal.mm) void copyBufferMetal( QRhi& rhi, QRhiCommandBuffer& cb, QRhiBuffer* src, QRhiBuffer* dst, int size, int srcOffset = 0, int dstOffset = 0); + +/** + * @brief Region-based GPU buffer copy for strided / gather patterns. + * + * One src buffer → one dst buffer, with @p count distinct {srcOffset, + * dstOffset, size} regions. Emits ONE pre-barrier and ONE post-barrier + * for the whole batch on backends that need them (Vulkan), then issues + * the minimum native work: + * - Vulkan : single vkCmdCopyBuffer call with `count` regions + * - OpenGL : N glCopyBufferSubData (bindings reused) + * - D3D12 : N CopyBufferRegion (no per-call barriers needed) + * - D3D11 : N CopySubresourceRegion + * - Metal : N copyFromBuffer within one MTLBlitCommandEncoder + * + * Replaces what would otherwise be N copyBuffer() calls (each with its + * own barrier pair) for strided source layouts — the + * std430-vec3-padded-to-vec4 case in particular. Must be called inside + * beginExternal()/endExternal() like copyBuffer(). + */ +struct BufferCopyRegion +{ + int src_offset{}; + int dst_offset{}; + int size{}; +}; +SCORE_PLUGIN_GFX_EXPORT +void copyBufferRegions( + QRhi& rhi, QRhiCommandBuffer& cb, + QRhiBuffer* src, QRhiBuffer* dst, + const BufferCopyRegion* regions, int count, + BufferCopyBarrier barrier = BufferCopyBarrier::Auto); + +// Metal-specific implementation +void copyBufferRegionsMetal( + QRhi& rhi, QRhiCommandBuffer& cb, + QRhiBuffer* src, QRhiBuffer* dst, + const BufferCopyRegion* regions, int count); } diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/SceneFilterNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/SceneFilterNode.cpp new file mode 100644 index 0000000000..f31c806137 --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/SceneFilterNode.cpp @@ -0,0 +1,250 @@ +#include +#include +#include + +#include + +#include + +namespace score::gfx +{ + +namespace +{ + +struct SceneFilterVisitor +{ + int mode{}; + + // Returns true if this payload should be kept in the output tree. When + // returning true, `out_children` may be populated with rewritten children + // (for scene_node subtrees that have been partially filtered). + bool filter_payload( + const ossia::scene_payload& in, ossia::scene_payload& out) const + { + if(auto* n = ossia::get_if(&in)) + { + ossia::scene_node_ptr rewritten = rewrite_node(*n); + if(!rewritten) + return false; + out = rewritten; + return true; + } + // Non-node payloads: pass-through (lights, cameras, materials, meshes, + // transforms). Hierarchy filtering only drops scene_nodes; payloads + // carried as direct siblings of a kept node follow their parent. + out = in; + return true; + } + + ossia::scene_node_ptr rewrite_node(const ossia::scene_node_ptr& src) const + { + if(!src) + return nullptr; + + // Mode 1: drop invisible subtrees outright. + if(mode == 1 && !src->visible) + return nullptr; + + // Recurse into children. + if(!src->has_children()) + { + // Leaf node — keep as-is if it passed the visibility check above. + return src; + } + + auto newChildren = std::make_shared>(); + newChildren->reserve(src->children->size()); + for(const auto& child : *src->children) + { + ossia::scene_payload out; + if(filter_payload(child, out)) + newChildren->push_back(std::move(out)); + } + + // If nothing survived under this node, drop the node itself. + if(newChildren->empty()) + return nullptr; + + // Share-copy: if children were unchanged identity-wise, reuse src. + if(newChildren->size() == src->children->size()) + { + bool identical = true; + for(std::size_t i = 0; i < newChildren->size(); ++i) + { + const auto& a = (*newChildren)[i]; + const auto& b = (*src->children)[i]; + if(a.index() != b.index()) + { + identical = false; + break; + } + // scene_payload is a variant of shared_ptr-to-component types + // (plus scene_transform). For shared_ptr alternatives, identity + // is the correct check: a freshly-rewritten subtree returns a + // different shared_ptr than the original, while pass-through + // payloads keep the same pointer. scene_transform is always + // pass-through in filter_payload so equality of the variant + // index is sufficient — no transform value is mutated here. + const bool same = ossia::visit( + [&](const T& av) -> bool { + const auto* bv = ossia::get_if(&b); + if(!bv) + return false; + if constexpr(requires { av.get() == bv->get(); }) + return av.get() == bv->get(); + else + return true; // scene_transform: pass-through, treat as same + }, + a); + if(!same) + { + identical = false; + break; + } + } + if(identical) + return src; + } + + auto copy = std::make_shared(*src); + copy->children = std::move(newChildren); + return copy; + } + + ossia::scene_spec rewrite(const ossia::scene_spec& in) const + { + ossia::scene_spec out; + if(!in.state) + return out; + + // Mode 0: pass-through, no copy needed. + if(mode == 0) + return in; + + auto newState = std::make_shared(*in.state); + auto newRoots + = std::make_shared>(); + if(in.state->roots) + { + newRoots->reserve(in.state->roots->size()); + for(const auto& r : *in.state->roots) + { + if(auto rw = rewrite_node(r)) + newRoots->push_back(std::move(rw)); + } + } + newState->roots = std::move(newRoots); + newState->version++; + newState->dirty_index++; + + out.state = std::move(newState); + out.delta = in.delta; + return out; + } +}; + +} + +struct RenderedSceneFilterNode final : NodeRenderer +{ + const SceneFilterNode& m_node; + ossia::scene_spec m_outputScene; + const ossia::scene_state* m_cachedInputState{}; + int64_t m_cachedInputVersion{-1}; + int m_cachedMode{-1}; + + RenderedSceneFilterNode(const SceneFilterNode& n) + : NodeRenderer{n} + , m_node{n} + { + } + + void init(RenderList&, QRhiResourceUpdateBatch&) override { m_initialized = true; } + void release(RenderList&) override + { + m_outputScene = {}; + m_cachedInputState = nullptr; + m_cachedInputVersion = -1; + m_cachedMode = -1; + m_initialized = false; + } + + void update(RenderList&, QRhiResourceUpdateBatch&, Edge*) override + { + const auto* inState = this->scene.state.get(); + const int64_t inVersion = this->scene.state ? this->scene.state->version : -1; + + bool rebuild = !m_outputScene.state + || inState != m_cachedInputState + || inVersion != m_cachedInputVersion + || m_node.m_mode != m_cachedMode + || this->sceneChanged; + if(!rebuild) + return; + + SceneFilterVisitor vis{m_node.m_mode}; + m_outputScene = vis.rewrite(this->scene); + m_cachedInputState = inState; + m_cachedInputVersion = inVersion; + m_cachedMode = m_node.m_mode; + this->sceneChanged = false; + } + + void runInitialPasses( + RenderList& renderer, QRhiCommandBuffer&, QRhiResourceUpdateBatch*&, + Edge& edge) override + { + if(!m_outputScene.state) + return; + auto* sink = edge.sink; + if(!sink || !sink->node) + return; + auto rn_it = sink->node->renderedNodes.find(&renderer); + if(rn_it == sink->node->renderedNodes.end()) + return; + auto it = std::find(sink->node->input.begin(), sink->node->input.end(), sink); + if(it == sink->node->input.end()) + return; + int port_idx = (int)(it - sink->node->input.begin()); + rn_it->second->process(port_idx, m_outputScene, edge.source); + } + + void runRenderPass(RenderList&, QRhiCommandBuffer&, Edge&) override { } + + // Data-only renderer — no per-edge GPU pass state to release. + void removeOutputPass(RenderList&, Edge&) override { } +}; + +SceneFilterNode::SceneFilterNode() +{ + input.push_back(new Port{this, {}, Types::Scene, {}}); + { + auto* data = new int{0}; + input.push_back(new Port{this, data, Types::Int, {}}); + } + output.push_back(new Port{this, {}, Types::Scene, {}}); +} + +SceneFilterNode::~SceneFilterNode() = default; + +void SceneFilterNode::process(int32_t port, const ossia::value& v) +{ + switch(port) + { + case 1: + m_mode = ossia::convert(v); + materialChange(); + break; + default: + ProcessNode::process(port, v); + break; + } +} + +NodeRenderer* SceneFilterNode::createRenderer(RenderList&) const noexcept +{ + return new RenderedSceneFilterNode{*this}; +} + +} diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/SceneFilterNode.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/SceneFilterNode.hpp new file mode 100644 index 0000000000..c1402e0e4a --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/SceneFilterNode.hpp @@ -0,0 +1,40 @@ +#pragma once +#include + +namespace score::gfx +{ + +/** + * @brief Tree-level filter on a scene_spec. + * + * Walks the incoming scene hierarchy and rebuilds it with only the + * subtrees matching the predicate. Runs on the render thread but does + * exclusively CPU work — no GPU allocation; shared_ptr reuse keeps cost + * minimal when the scene is unchanged. + * + * Inputs: + * - Port 0: Scene (Types::Scene) + * - Port 1: Mode (Types::Int): + * 0 = pass-through (no filtering) + * 1 = keep only scene_nodes with visible == true + * 2 = keep only subtrees whose node name contains the substring set + * in the "Name" control (future-wired; string port missing in the + * renderer for now, so behaves like mode 1 until wired) + * + * Outputs: + * - Port 0: Scene (Types::Scene) + */ +class SCORE_PLUGIN_GFX_EXPORT SceneFilterNode : public ProcessNode +{ +public: + SceneFilterNode(); + ~SceneFilterNode() override; + + score::gfx::NodeRenderer* createRenderer(RenderList& r) const noexcept override; + + void process(int32_t port, const ossia::value& v) override; + + int m_mode{0}; +}; + +} diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/SceneGPUState.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/SceneGPUState.cpp new file mode 100644 index 0000000000..ce68981455 --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/SceneGPUState.cpp @@ -0,0 +1,1012 @@ +#include + +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +namespace score::gfx +{ + +static QMatrix4x4 toQMatrix(const ossia::transform3d& t) +{ + // ossia::transform3d::matrix stores column-major data. + // QMatrix4x4(values, cols, rows) with cols=4, rows=4 reads column-major. + return QMatrix4x4(t.matrix, 4, 4); +} + +static QMatrix4x4 toQMatrix(const ossia::scene_transform& t) +{ + QMatrix4x4 mat; + mat.translate(t.translation[0], t.translation[1], t.translation[2]); + mat.rotate(QQuaternion(t.rotation[3], t.rotation[0], t.rotation[1], t.rotation[2])); + mat.scale(t.scale[0], t.scale[1], t.scale[2]); + return mat; +} + +// packLight removed in task 28c. The Light producer owns a RawLight +// arena slot and writes RawLightData directly in its own update() hook +// (see Threedim/Light.cpp); the preprocessor no longer CPU-composes +// world-space light bytes. Consumer shaders compose direction / position +// on the fly from world_transforms[RawLight.transform_slot]. + +// ---- mesh_primitive → ossia::geometry ------------------------------------ +// +// Builds a transient `ossia::geometry` on the heap that wraps a +// `mesh_primitive`'s buffers and attribute layout. The downstream +// preprocessor copies those handles into its own output, so the converted +// geometry only needs to survive the current flatten pass. CPU-backed +// `buffer_data` flows +// through as `cpu_buffer` (the rendering layer handles the upload); GPU +// handles flow through as `gpu_buffer`. + +static decltype(ossia::geometry::attribute::format) +toGeomAttrFormat(ossia::vertex_format f) noexcept +{ + using V = ossia::vertex_format; + using A = decltype(ossia::geometry::attribute::format); + switch(f) + { + case V::float1: return ossia::geometry::attribute::float1; + case V::float2: return ossia::geometry::attribute::float2; + case V::float3: return ossia::geometry::attribute::float3; + case V::float4: return ossia::geometry::attribute::float4; + case V::half1: return ossia::geometry::attribute::half1; + case V::half2: return ossia::geometry::attribute::half2; + case V::half3: return ossia::geometry::attribute::half3; + case V::half4: return ossia::geometry::attribute::half4; + case V::unorm8x1: return ossia::geometry::attribute::unormbyte1; + case V::unorm8x2: return ossia::geometry::attribute::unormbyte2; + case V::unorm8x4: return ossia::geometry::attribute::unormbyte4; + case V::uint16x1: return ossia::geometry::attribute::ushort1; + case V::uint16x2: return ossia::geometry::attribute::ushort2; + case V::uint16x4: return ossia::geometry::attribute::ushort4; + case V::sint16x1: return ossia::geometry::attribute::sshort1; + case V::sint16x2: return ossia::geometry::attribute::sshort2; + case V::sint16x4: return ossia::geometry::attribute::sshort4; + case V::uint32x1: return ossia::geometry::attribute::uint1; + case V::uint32x2: return ossia::geometry::attribute::uint2; + case V::uint32x3: return ossia::geometry::attribute::uint3; + case V::uint32x4: return ossia::geometry::attribute::uint4; + case V::sint32x1: return ossia::geometry::attribute::sint1; + case V::sint32x2: return ossia::geometry::attribute::sint2; + case V::sint32x3: return ossia::geometry::attribute::sint3; + case V::sint32x4: return ossia::geometry::attribute::sint4; + default: return ossia::geometry::attribute::float3; + } +} + +static auto toGeomTopology(ossia::primitive_topology t) noexcept +{ + using P = ossia::primitive_topology; + using G = decltype(ossia::geometry::topology); + switch(t) + { + case P::points: return G::points; + case P::lines: return G::lines; + case P::line_strip: return G::line_strip; + case P::triangles: return G::triangles; + case P::triangle_strip: return G::triangle_strip; + case P::triangle_fan: return G::triangle_fan; + default: return G::triangles; + } +} + +static void appendBufferResource( + ossia::geometry& g, const ossia::buffer_resource& br) +{ + if(auto* cpu = ossia::get_if(&br.resource)) + { + ossia::geometry::cpu_buffer cb; + // buffer_data::data is shared_ptr; geometry::cpu_buffer::raw_data + // is shared_ptr. The contents are immutable in practice, but the types + // differ — const_pointer_cast reuses the control block without a copy. + cb.raw_data = std::const_pointer_cast(cpu->data); + cb.byte_size = cpu->byte_size; + g.buffers.push_back(ossia::geometry::buffer{.data = cb, .dirty = true}); + } + else if(auto* gpu = ossia::get_if(&br.resource)) + { + ossia::geometry::gpu_buffer gb; + gb.handle = gpu->native_handle; + gb.byte_size = gpu->byte_size; + g.buffers.push_back(ossia::geometry::buffer{.data = gb, .dirty = true}); + } +} + +std::shared_ptr +primitiveToGeometry(const ossia::mesh_primitive& prim) +{ + auto out = std::make_shared(); + + // 1) Buffers: one entry per vertex_buffer, optionally plus the index buffer. + out->buffers.reserve(prim.vertex_buffers.size() + (prim.index_buffer ? 1 : 0)); + for(const auto& vb : prim.vertex_buffers) + { + if(vb) + appendBufferResource(*out, *vb); + else + out->buffers.push_back(ossia::geometry::buffer{ + .data = ossia::geometry::gpu_buffer{}, .dirty = false}); + } + const int index_buffer_idx = prim.index_buffer ? (int)out->buffers.size() : -1; + if(prim.index_buffer) + appendBufferResource(*out, *prim.index_buffer); + + // 2) Bindings: one per unique (buffer_index, byte_stride, rate) tuple. + // Deduping by buffer_index alone is wrong for SceneFromMeshes-style + // primitives, which pack planar pos(12)/uv(8)/color(16) blocks all into + // buffer 0 with distinct strides: collapsing them to a single binding + // would force every attribute through the first stride (12) and produce + // garbage UVs/colors/tangents. The glTF path uses one buffer per + // attribute, so this keying leaves it unchanged. + struct BindingInfo + { + uint32_t buffer_index{}; + uint32_t stride{}; + bool per_instance{}; + }; + std::vector bindings; + auto findBinding = [&](uint32_t bi, uint32_t stride, bool per_instance) -> int { + for(std::size_t k = 0; k < bindings.size(); ++k) + if(bindings[k].buffer_index == bi && bindings[k].stride == stride + && bindings[k].per_instance == per_instance) + return (int)k; + return -1; + }; + auto attrBinding = [&](const ossia::vertex_attribute& a) -> int { + return findBinding( + a.buffer_index, a.byte_stride, + a.rate == ossia::vertex_attribute::input_rate::per_instance); + }; + for(const auto& a : prim.attributes) + { + const bool per_instance + = (a.rate == ossia::vertex_attribute::input_rate::per_instance); + if(findBinding(a.buffer_index, a.byte_stride, per_instance) < 0) + { + BindingInfo b; + b.buffer_index = a.buffer_index; + b.stride = a.byte_stride; + b.per_instance = per_instance; + bindings.push_back(b); + } + } + out->bindings.reserve(bindings.size()); + for(const auto& b : bindings) + { + ossia::geometry::binding gb{}; + gb.byte_stride = b.stride; + gb.classification = b.per_instance + ? ossia::geometry::binding::per_instance + : ossia::geometry::binding::per_vertex; + gb.step_rate = 1; + out->bindings.push_back(gb); + } + + // 3) Input: one entry per binding, pointing to the corresponding buffer. + out->input.reserve(bindings.size()); + for(const auto& b : bindings) + { + // `input` resolves to an ossia-level type in this scope, so reference + // the member type explicitly via a `struct` elaborated tag. + struct ossia::geometry::input entry{}; + entry.buffer = (int)b.buffer_index; + entry.byte_offset = 0; + out->input.push_back(entry); + } + + // 4) Attributes: remap buffer_index → binding index. + out->attributes.reserve(prim.attributes.size()); + for(const auto& a : prim.attributes) + { + ossia::geometry::attribute ga{}; + ga.binding = attrBinding(a); + ga.location = 0; // resolved by the renderer's semantic remap + ga.format = toGeomAttrFormat(a.format); + ga.byte_offset = a.byte_offset; + ga.semantic = a.semantic; + out->attributes.push_back(ga); + } + + // 5) Counts and topology. + out->vertices = (int)prim.vertex_count; + out->indices = (int)prim.index_count; + out->instances = 1; + out->topology = toGeomTopology(prim.topology); + out->cull_mode = ossia::geometry::none; + out->front_face = ossia::geometry::counter_clockwise; + + // 6) Index buffer reference. + if(index_buffer_idx >= 0) + { + out->index.buffer = index_buffer_idx; + out->index.byte_offset = 0; + out->index.format = (prim.index_type == ossia::index_format::uint16) + ? decltype(out->index)::uint16 + : decltype(out->index)::uint32; + } + else + { + out->index.buffer = -1; + } + + // 7) Bounds. + std::memcpy(out->bounds.min, prim.bounds.min, sizeof(float) * 3); + std::memcpy(out->bounds.max, prim.bounds.max, sizeof(float) * 3); + + return out; +} + +// Pack the CPU-side material_component into the 64-byte GPU-layout struct. +// Only factor fields are packed here; `textureRefs[]` are deliberately left +// at their default tex_ref_none() sentinel. ScenePreprocessorNode runs +// `rebuildChannel(ch)` for each of the four channels (BaseColor / +// MetalRough / Normal / Emissive) after the scene walk, which in turn +// calls `patchMaterialRefsFromCache(ch, fs)` (ScenePreprocessorNode.cpp:1944) +// to fill `fs.materials[i].textureRefs[ch]` with the assigned texture-array +// layer index per material per channel. Consumer shaders sample the +// per-channel arrays via `mat.textureRefs.x / .y / .z / .w` against +// `baseColorArray` / `metalRoughArray` / `normalArray` / `emissiveArray`. +MaterialGPU packMaterial(const ossia::material_component& mc) +{ + MaterialGPU gpu; + std::memcpy(gpu.baseColor, mc.base_color_factor, sizeof(float) * 4); + gpu.metallicRoughnessOcclusionUnlit[0] = mc.metallic_factor; + gpu.metallicRoughnessOcclusionUnlit[1] = mc.roughness_factor; + gpu.metallicRoughnessOcclusionUnlit[2] = mc.occlusion_strength; + gpu.metallicRoughnessOcclusionUnlit[3] = mc.unlit ? 1.f : 0.f; + gpu.emissive_strength[0] = mc.emissive_factor[0]; + gpu.emissive_strength[1] = mc.emissive_factor[1]; + gpu.emissive_strength[2] = mc.emissive_factor[2]; + gpu.emissive_strength[3] = mc.emissive_strength; + + // Feature mask — OR in a bit for each active BRDF lobe / texture. + // Producers can override this at authoring time; when writing from + // a scene_state.materials entry we derive from the CPU-side fields. + // Used as SER reorder key + shader-side specialization branch. + uint32_t fm = 0; + using namespace material_feature; + if(mc.base_color_texture.valid()) fm |= has_base_color_texture; + if(mc.metallic_roughness_texture.valid()) fm |= has_metal_rough_texture; + if(mc.normal_texture.valid()) fm |= has_normal_texture; + if(mc.emissive_texture.valid()) fm |= has_emissive_texture; + if(mc.unlit) fm |= unlit; + if(mc.alpha != ossia::alpha_mode::opaque_) fm |= alpha_non_opaque; + if(mc.alpha == ossia::alpha_mode::mask) fm |= alpha_mask; + if(mc.alpha == ossia::alpha_mode::blend) fm |= alpha_blend; + if(mc.double_sided) fm |= double_sided; + // Scene-filter opt-outs — "disabled" semantics keep the common case + // (caster = true) at 0. CSF filter shaders test these bits. + if(!mc.shadow_caster) fm |= shadow_caster_disabled; + if(!mc.reflection_caster) fm |= reflection_caster_disabled; + // Occlusion: set the flag whenever the material has an occlusionTexture + // at all — the shader samples through `mat.occlusion_textureRef` + // unconditionally in the "separate" branch, which works for both + // distinct-source and shared-with-MR (ORM) packings. Routing through + // mr.r as a fallback when no occlusion_texture is present is unsafe: + // the glTF spec leaves pbrMetallicRoughness.R undefined and most + // authoring tools leave it at 0, which silently zeroes the ambient + // floor / IBL occlusion multiplier and turns dark metals pitch-black. + if(mc.occlusion_texture.valid()) + fm |= has_separate_occlusion; + + // Per-channel texcoord_set bits (20-29). Clamp to 1 — glTF allows + // up to TEXCOORD_7 but our MDI layout carries TEXCOORD_0/1 only. + auto pack_tcset = [](uint32_t set_idx, uint32_t shift) -> uint32_t { + return (set_idx > 1u ? 1u : set_idx) << shift; + }; + fm |= pack_tcset(mc.base_color_texture.texcoord_set, 20); + fm |= pack_tcset(mc.metallic_roughness_texture.texcoord_set, 22); + fm |= pack_tcset(mc.normal_texture.texcoord_set, 24); + fm |= pack_tcset(mc.emissive_texture.texcoord_set, 26); + fm |= pack_tcset(mc.occlusion_texture.texcoord_set, 28); + if(mc.clearcoat.factor > 0.f) fm |= has_clearcoat; + if(mc.sheen.color_factor[0] > 0.f + || mc.sheen.color_factor[1] > 0.f + || mc.sheen.color_factor[2] > 0.f) fm |= has_sheen; + if(mc.transmission.factor > 0.f) fm |= has_transmission; + if(mc.volume.thickness_factor > 0.f) fm |= has_volume; + if(mc.specular.factor != 1.f + || mc.specular.color_factor[0] != 1.f + || mc.specular.color_factor[1] != 1.f + || mc.specular.color_factor[2] != 1.f) fm |= has_specular; + if(mc.iridescence.factor > 0.f) fm |= has_iridescence; + if(mc.anisotropy.strength != 0.f) fm |= has_anisotropy; + if(mc.diffuse_transmission.factor > 0.f) fm |= has_diffuse_transmission; + // Subsurface: OpenPBR; no equivalent in ossia material today. + // thin_walled: OpenPBR; not in ossia today either. + gpu.feature_mask = fm; + + // hit_group_id stays at default (0 = standard lit). A future + // pipeline-build step can map feature_mask to a dedicated hit-group + // index when RT lands; producers with a pre-computed mapping can + // set this directly. + gpu.hit_group_id = 0u; + + // alpha_cutoff: glTF spec default is 0.5; only consulted by the + // shader when feature_mask carries `alpha_mask`. + gpu.alpha_cutoff = mc.alpha_cutoff; + + // occlusion_textureRef stays at tex_ref_none() here — the texture + // ref needs the resolved (bucket, layer) from + // patchMaterialRefsFromCache. ScenePreprocessor patches it in the + // 5th-channel pass. + + return gpu; +} + +// Pack the OpenPBR / KHR extension fields from `material_component` into +// MaterialExtensionsGPU (272 B). Field order matches the struct's +// declaration — if you reorder there, reorder here. +// +// `textureRefs[]` is left at the default tex_ref_none() sentinels here. +// The encoded refs are written by ScenePreprocessor::patchMaterialRefs +// FromCache in lockstep with the base-channel refs: the +// `kExtTextureSlots` table in ScenePreprocessorNode.cpp routes each +// MaterialExtensionsGPU::textureRefs[slot] through one of the existing +// 5 channel pools (BaseColor / MetalRough / Normal) based on format +// expectation. No separate ext-channel pool / sampler set — the same +// bucket samplers serve both the main 5 channels and every glTF +// KHR_materials_* extension texture. +MaterialExtensionsGPU packMaterialExtensions(const ossia::material_component& mc) +{ + MaterialExtensionsGPU gpu{}; // default-init = OpenPBR spec defaults + + // Coat — maps to KHR_materials_clearcoat; coat_darkening is an + // OpenPBR extension not in glTF today (defaults to 0 → no darkening). + gpu.coat[0] = mc.clearcoat.factor; + gpu.coat[1] = mc.clearcoat.roughness_factor; + gpu.coat[2] = 1.5f; // coat_ior default (glTF doesn't expose a per-coat IOR) + gpu.coat[3] = 0.f; // coat_darkening + // Base-layer IOR — glTF's KHR_materials_ior applies here. + // No OpenPBR field for base IOR directly; we use it in the specular lobe. + + // Fuzz / sheen + gpu.fuzz_color[0] = mc.sheen.color_factor[0]; + gpu.fuzz_color[1] = mc.sheen.color_factor[1]; + gpu.fuzz_color[2] = mc.sheen.color_factor[2]; + gpu.fuzz_color[3] = mc.sheen.roughness_factor; + + // Transmission + volume. glTF separates thin-walled (transmission) from + // volumetric (volume); OpenPBR folds them: transmission_weight is the + // scalar knob, transmission_depth makes it volumetric. An infinite + // attenuation_distance effectively means "no absorption" → depth = 0. + gpu.transmission[0] = mc.transmission.factor; + gpu.transmission[1] = std::isfinite(mc.volume.attenuation_distance) + ? mc.volume.attenuation_distance : 0.f; + gpu.transmission[2] = 0.f; // dispersion_scale — not in glTF + gpu.transmission[3] = 20.f; // dispersion Abbe number — crown-glass default + gpu.transmission_color[0] = mc.volume.attenuation_color[0]; + gpu.transmission_color[1] = mc.volume.attenuation_color[1]; + gpu.transmission_color[2] = mc.volume.attenuation_color[2]; + gpu.transmission_color[3] = 0.f; // scatter_anisotropy — not in glTF + // transmission_scatter stays at zero (no volumetric scattering in glTF). + + // Specular (KHR_materials_specular) + gpu.specular_weight_color[0] = mc.specular.factor; + gpu.specular_weight_color[1] = mc.specular.color_factor[0]; + gpu.specular_weight_color[2] = mc.specular.color_factor[1]; + gpu.specular_weight_color[3] = mc.specular.color_factor[2]; + gpu.specular_ior_anisotropy[0] = mc.ior; + gpu.specular_ior_anisotropy[1] = mc.anisotropy.strength; + // Anisotropy rotation comes from material_component as a scalar angle + // in radians; OpenPBR wants it split into cos/sin to skip per-fragment + // trig. Bake it here. + gpu.specular_ior_anisotropy[2] = std::cos(mc.anisotropy.rotation); + gpu.specular_ior_anisotropy[3] = std::sin(mc.anisotropy.rotation); + + // Thin-film iridescence. glTF carries min/max thickness; OpenPBR + // reference impl uses a single thickness (the film is nominally + // uniform; spatial variation would need a texture). Average the two. + gpu.thin_film[0] = mc.iridescence.factor; + gpu.thin_film[1] + = (mc.iridescence.thickness_min + mc.iridescence.thickness_max) * 0.5f; + gpu.thin_film[2] = mc.iridescence.ior; + + // Diffuse transmission (KHR_materials_diffuse_transmission) + gpu.diffuse_transmission[0] = mc.diffuse_transmission.factor; + gpu.diffuse_transmission[1] = mc.diffuse_transmission.color_factor[0]; + gpu.diffuse_transmission[2] = mc.diffuse_transmission.color_factor[1]; + gpu.diffuse_transmission[3] = mc.diffuse_transmission.color_factor[2]; + + // Subsurface — stock glTF has no SSS. FbxParser maps FBX + // subsurface_factor / subsurface_color into + // mc.diffuse_transmission as the nearest equivalent slot + // (see FbxParser.cpp's KHR-extension mapping). We leave + // subsurface_* at OpenPBR spec defaults (weight = 0) for the pure- + // glTF case; when a loader grows a dedicated subsurface channel on + // material_component we'll fill it here. + + // Flags: base diffuse roughness + thin-walled. + // `thin_walled` lives in scene_property_map["thin_walled"] when + // FbxParser sees an Arnold thin-walled feature. Presence of the key + // alone means true — the loader inserts the entry only when the flag + // is enabled. Application-level properties outside this hardcoded + // list aren't consumed here. + if(mc.properties.find("thin_walled") != mc.properties.end()) + gpu.flags[1] = 1.f; + + return gpu; +} + +// Dedup key combining a payload identity pointer with the accumulated +// world transform on the walk path that reached it. Plain pointer dedup +// (threedim#1) collapses every instance of a shared prototype into one: +// when an upstream SceneDuplicator references a single prototype +// scene_node_ptr under N distinct transforms, the pointer-only `seenNodes` +// set lets only the first through and silently drops the other N-1 +// instances. Keying by (pointer, world-matrix) instead keeps genuinely +// distinct instances (same prototype, different transform) apart while +// still deduping true DAG re-references reached through an identical +// transform path (bit-identical accumulated matrix → same key). Mesh GPU +// vertex uploads are deduped separately downstream by DrawCall::stable_id, +// so emitting N draws here still uploads the prototype's bytes once. +struct InstanceKey +{ + const void* ptr{}; + std::array world{}; + + bool operator==(const InstanceKey& o) const noexcept + { + return ptr == o.ptr && world == o.world; + } +}; + +struct InstanceKeyHash +{ + // No is_avalanching marker: the combined pointer+matrix mix below is not + // guaranteed well-distributed (std::hash is often identity), so we + // let unordered_dense apply its own final avalanche step. + std::size_t operator()(const InstanceKey& k) const noexcept + { + std::size_t h = std::hash{}(k.ptr); + for(float f : k.world) + { + // Normalize -0.0f to +0.0f so the two compare/hash identically; the + // exact float compare in operator== handles the rest. + std::uint32_t bits; + const float v = (f == 0.f) ? 0.f : f; + std::memcpy(&bits, &v, sizeof(bits)); + h ^= std::size_t(bits) + 0x9e3779b97f4a7c15ULL + (h << 6) + (h >> 2); + } + return h; + } +}; + +static InstanceKey makeInstanceKey(const void* p, const QMatrix4x4& m) +{ + InstanceKey k; + k.ptr = p; + // QMatrix4x4::constData() is column-major, 16 contiguous floats. + std::memcpy(k.world.data(), m.constData(), sizeof(float) * 16); + return k; +} + +// Visitor that walks the scene_payload tree and collects draw calls, lights, cameras. +struct FlattenVisitor +{ + FlatScene& out; + QMatrix4x4 parentWorld; + ossia::scene_node_id currentNodeId{}; + // KHR_materials_variants: set from scene_state::active_variant_index + // at flatten-start. -1 = use each primitive's default material. + int32_t activeVariant{-1}; + + // Most recently encountered producer-authored scene_transform slot on + // the current walk path. 0xFFFFFFFF = none yet. Stamped on each + // DrawCall so PerDrawGPU.transform_slot can point at the corresponding + // world_transforms / world_transforms_prev entry for motion vectors. + std::uint32_t currentTransformSlot{0xFFFFFFFFu}; + + // Identity-based dedup for shared payload pointers reachable through + // multiple tree paths. The visitor's contract is "one entry per unique + // payload object" — repeating the same shared_ptr (e.g. a single + // primitive_cloud_component_ptr referenced by four distinct scene_node + // children, or a mesh_component shared across LOD levels) should + // contribute one bucket / draw call, not N. merge_scenes / SceneGroup + // already dedup roots, so this only triggers on actually-shared + // sub-tree references (the cases the upstream layers can't see). + // Nodes / meshes / clouds dedup by (pointer, accumulated world transform) + // so distinct instances of a shared prototype (SceneDuplicator) survive + // — see InstanceKey above. Lights / cameras / scene_data / instances keep + // plain pointer dedup: they aren't multiplied by the duplicator path here. + ossia::hash_set seenNodes; + ossia::hash_set seenClouds; + // Secondary dedup key for clouds: the raw_data pointer. FormatOverride + // clones the primitive_cloud_component to rewrite format_id but keeps + // the underlying raw_data (~1 GB for a 4M-splat scan) shared via + // shared_ptr — two distinct components pointing at the same raw_data + // are still one upload's worth of GPU bytes. Dedup by raw_data when + // present, fall back to component pointer when raw_data is null. + // Still combined with the world transform so a cloud reused under two + // duplicator transforms renders twice. + ossia::hash_set seenCloudRawData; + ossia::hash_set seenMeshes; + ossia::ptr_set seenLights; + ossia::ptr_set seenCameras; + ossia::ptr_set seenSceneData; + ossia::ptr_set seenInstances; + + void visitPayload(const ossia::scene_payload& payload) + { + if(auto* subnode = ossia::get_if(&payload)) + { + // Key on (node, parentWorld): the same prototype node reached under a + // different accumulated transform (duplicator) is a distinct instance. + if(*subnode + && seenNodes.insert(makeInstanceKey(subnode->get(), parentWorld)).second) + visitNode(**subnode); + } + else if(auto* mesh = ossia::get_if(&payload)) + { + if(*mesh + && seenMeshes.insert(makeInstanceKey(mesh->get(), parentWorld)).second) + visitMesh(**mesh); + } + else if(auto* light = ossia::get_if(&payload)) + { + if(*light && seenLights.insert(light->get()).second) + { + // Arena slot index for shader-side arena-direct light reads + // (task 28b/c — packLight path removed). 0xFFFFFFFF sentinel + // for producer-less lights (e.g. FBX/glTF-embedded lights that + // don't own a RawLight slot yet). Such lights are filtered out + // when building scene_light_indices. + out.lightArenaSlots.push_back( + (*light)->raw_slot.size != 0 + ? (*light)->raw_slot.internal_index + : 0xFFFFFFFFu); + } + } + else if(auto* camera = ossia::get_if(&payload)) + { + if(*camera && seenCameras.insert(camera->get()).second) + { + FlatScene::CameraEntry e; + e.component = *camera; + e.worldTransform = parentWorld; + e.node_id = currentNodeId; + out.cameras.push_back(std::move(e)); + } + } + else if(auto* xform = ossia::get_if(&payload)) + { + // A bare transform applies to subsequent siblings — update parentWorld + parentWorld = parentWorld * toQMatrix(*xform); + // Emit the composed world matrix in walk order so the preprocessor + // can upload it into its private world-transforms SSBO. Only + // producer-authored transforms (stamped raw_slot) get an entry — + // loader-interior transforms participate in hierarchy accumulation + // but aren't individually addressable on GPU. + if(xform->raw_slot.size != 0) + { + out.worldTransforms.push_back( + WorldTransformEmit{parentWorld, xform->raw_slot.internal_index}); + // Remember this slot as the "nearest producer transform" so + // subsequent sibling / child draws can reference it for + // motion-vector / TAA lookups via world_transforms_prev[slot]. + currentTransformSlot = xform->raw_slot.internal_index; + } + } + else if(auto* sd = ossia::get_if(&payload)) + { + // Generic escape hatch: stash it; the ScenePreprocessor forwards every entry + // as an auxiliary_buffer on the output geometry. + if(*sd && seenSceneData.insert(sd->get()).second) + out.scene_data.push_back(*sd); + } + else if(auto* inst = ossia::get_if(&payload)) + { + // GPU-instanced mesh: collect — the ScenePreprocessor emits one DrawCall with + // instances=instance_count and forwards the instance SSBOs. + if(*inst && seenInstances.insert(inst->get()).second) + out.instances.push_back({*inst, parentWorld}); + } + else if(auto* pc + = ossia::get_if(&payload)) + { + // Format-agnostic point cloud / splat: collect — the + // ScenePreprocessor's primitive-cloud branch buckets these by + // format_id and emits one indirect-draw geometry per bucket + // alongside the existing mesh MDI. The cloud's data lives in + // raw_data + format_params; the bucket geometry's auxiliary + // ("raw_splats") forwards it to the format's CSF chain. + // + // Dedup by raw_data pointer rather than the component pointer: + // FormatOverride deliberately clones the component (fresh + // primitive_cloud_component shared_ptr) but keeps the heavy + // raw_data shared, and we don't want format-override to defeat + // dedup. Two distinct components with distinct raw_data are + // independent uploads and are kept; same raw_data through + // multiple paths counts once. + if(*pc) + { + const ossia::buffer_resource* raw = (*pc)->raw_data.get(); + const bool unique + = raw ? seenCloudRawData.insert(makeInstanceKey(raw, parentWorld)) + .second + : seenClouds.insert(makeInstanceKey(pc->get(), parentWorld)) + .second; + if(unique) + { + FlatScene::PrimitiveCloudDraw d; + d.cloud = *pc; + d.worldTransform = parentWorld; + d.transform_slot = currentTransformSlot; + out.primitive_clouds.push_back(std::move(d)); + } + } + } + // gaussian_splat, voxel_field, point_cloud, volume — not rendered yet, + // but the types are transported. Renderers will handle them later. + } + + void visitNode(const ossia::scene_node& node) + { + // Inactive nodes are skipped entirely — no transforms, no children, + // no payload contributions. USD-style non-destructive prune: the + // data stays in the scene tree so downstream toggles can + // re-activate without re-uploading geometry. + if(!node.active) + return; + + // scene_node has no transform of its own in the new design. + // Transforms are scene_payload children (scene_transform). + // We process children in order; transform payloads affect subsequent siblings. + if(!node.has_children()) + return; + + // Save current world so sibling transforms don't leak. Also remember the + // parent node id so camera payloads can be attributed to it for + // active_camera_id resolution. currentTransformSlot is save/restored + // alongside parentWorld — a scene_transform encountered inside this + // node's children scope shouldn't leak to unrelated siblings. + QMatrix4x4 savedWorld = parentWorld; + auto savedNodeId = currentNodeId; + auto savedTransformSlot = currentTransformSlot; + currentNodeId = node.id; + + for(auto& child : *node.children) + { + visitPayload(child); + } + + parentWorld = savedWorld; + currentNodeId = savedNodeId; + currentTransformSlot = savedTransformSlot; + } + + void visitMesh(const ossia::mesh_component& mc) + { + // Modern path: mesh_primitive[]. Build a transient ossia::geometry per + // primitive so the ScenePreprocessor can treat it uniformly with legacy geometry. + for(const auto& prim : mc.primitives) + { + if(prim.vertex_buffers.empty() || prim.vertex_count == 0) + continue; + DrawCall dc; + dc.owned_mesh = primitiveToGeometry(prim); + dc.mesh = dc.owned_mesh.get(); + // Prefer the producer-stamped stable_id (identity survives merge + // reshuffles AND source-primitive pointer churn on rebuilds). + // Fall back to the pointer bits when the producer hasn't stamped + // one yet — legacy behaviour. + dc.stable_id + = prim.stable_id != 0 + ? prim.stable_id + : reinterpret_cast(&prim); + dc.worldTransform = parentWorld; + // Direct pointers — identity survives merge_scenes without a bias + // table. flattenScene dedups these into FlatScene::materials / + // ::skins after the walk and stamps the corresponding indices. + dc.material = prim.material; + // KHR_materials_variants override: when the active variant has + // a non-null mapping for this primitive, swap in the variant's + // material. Out-of-range / null entries fall through to default. + if(activeVariant >= 0 + && (std::size_t)activeVariant < prim.material_variants.size() + && prim.material_variants[activeVariant]) + { + dc.material = prim.material_variants[activeVariant]; + } + dc.skin = mc.skin; + dc.local_bounds = prim.bounds; + dc.transform_slot = currentTransformSlot; + out.draws.push_back(std::move(dc)); + } + + // Legacy geometry_spec path (backward compat for loaders that still use + // mesh_component::legacy_geometry). + auto& geom_spec = mc.legacy_geometry; + if(geom_spec.meshes && !geom_spec.meshes->meshes.empty()) + { + for(auto& geom : geom_spec.meshes->meshes) + { + DrawCall dc; + dc.mesh = &geom; + // Legacy geometry has no producer-stamped stable_id field; + // fall back to its address. + dc.stable_id = reinterpret_cast(&geom); + dc.geometry_ref = geom_spec; + dc.worldTransform = parentWorld; + // Material comes from the first primitive if any, else null. + if(!mc.primitives.empty()) + dc.material = mc.primitives[0].material; + dc.skin = mc.skin; + // Legacy path: fall back to mesh_component bounds (primitive + // bounds may be absent on the old path). The preprocessor + // treats empty bounds as "never cull". + dc.local_bounds = mc.bounds; + dc.transform_slot = currentTransformSlot; + out.draws.push_back(std::move(dc)); + } + } + } + +}; + +void flattenScene(const ossia::scene_spec& scene, FlatScene& out, float aspectRatio) +{ + out.clear(); + + if(!scene.state || scene.state->empty()) + return; + + // Pack materials — base + extensions in lockstep. Both vectors grow + // together so `material_extensions[i]` always corresponds to + // `materials[i]`. Missing extension data (no KHR_* extension on a + // given glTF material) lands as the default-constructed struct, + // which is the OpenPBR spec default (all lobe weights = 0, IORs at + // 1.5, etc.) — consumer shaders can blindly read it and get + // identity behaviour where the file didn't opt in. + if(scene.state->materials) + { + for(auto& mat : *scene.state->materials) + { + if(mat) + { + out.materials.push_back(packMaterial(*mat)); + out.material_extensions.push_back(packMaterialExtensions(*mat)); + } + else + { + out.materials.push_back(MaterialGPU{}); + out.material_extensions.push_back(MaterialExtensionsGPU{}); + } + } + } + + // Pack skeletons: forward kinematics through joint hierarchy, then + // joint_matrix[i] = world_joint[i] × inverse_bind_matrix[i]. Matches the + // glTF skinning convention; consumer shaders multiply vertex position by + // Σ(w_j × joint_matrix[j]). + if(scene.state->skeletons) + { + auto jointLocal = [](const ossia::skeleton_joint& j) { + QMatrix4x4 m; + m.translate(j.translation[0], j.translation[1], j.translation[2]); + m.rotate(QQuaternion(j.rotation[3], j.rotation[0], j.rotation[1], j.rotation[2])); + m.scale(j.scale[0], j.scale[1], j.scale[2]); + return m; + }; + + out.skins.reserve(scene.state->skeletons->size()); + for(const auto& sk : *scene.state->skeletons) + { + SkeletonGPU sg; + if(!sk) + { + out.skins.push_back(std::move(sg)); + continue; + } + + // Multi-pass forward kinematics: resolve any joint whose parent has + // already been resolved, looping until all are done. The glTF 2.0 + // spec does NOT guarantee topological ordering of skin.joints, so + // we cannot assume parent_index < i. For DFS-ordered skins (the + // common case) this converges in a single pass. + const std::size_t N = sk->joints.size(); + std::vector world(N); + std::vector resolved(N, false); + sg.joint_matrices.resize(N); + std::size_t resolvedCount = 0; + int passes = 0; + constexpr int maxPasses = 64; // covers any real skeleton depth + while(resolvedCount < N && passes < maxPasses) + { + bool changed = false; + for(std::size_t i = 0; i < N; ++i) + { + if(resolved[i]) + continue; + const auto& j = sk->joints[i]; + // Root joint or invalid parent index: resolve immediately. + if(j.parent_index < 0 || j.parent_index >= (int32_t)N) + { + world[i] = jointLocal(j); + resolved[i] = true; + ++resolvedCount; + changed = true; + continue; + } + // Otherwise, parent must be resolved first. + if(!resolved[(std::size_t)j.parent_index]) + continue; + world[i] = world[j.parent_index] * jointLocal(j); + resolved[i] = true; + ++resolvedCount; + changed = true; + } + ++passes; + if(!changed) + break; // cycle or orphan: bail out instead of spinning + } + if(resolvedCount < N) + { + qWarning() << "SceneGPUState: skeleton FK did not converge —" + << (N - resolvedCount) << "joint(s) unresolved (cycle or" + << "orphan parent). Falling back to local matrices."; + for(std::size_t i = 0; i < N; ++i) + { + if(!resolved[i]) + world[i] = jointLocal(sk->joints[i]); + } + } + // Stamp joint_matrices = world × inverse_bind_matrix once FK is done. + for(std::size_t i = 0; i < N; ++i) + { + const QMatrix4x4 ibm + = QMatrix4x4(sk->joints[i].inverse_bind_matrix, 4, 4); + sg.joint_matrices[i] = world[i] * ibm; + } + out.skins.push_back(std::move(sg)); + } + } + + // Walk the node tree. mesh_primitive / mesh_component now carry + // direct shared_ptr references to their material and skin, so no + // per-root index-bias bookkeeping is required. + QMatrix4x4 identity; + FlattenVisitor vis{out, identity}; + // KHR_materials_variants: seed the visitor from scene_state. When + // no variants are declared (typical) this stays at -1 and the + // per-draw override branch compiles to a cheap null-check. + vis.activeVariant = scene.state->active_variant_index; + const auto& roots = *scene.state->roots; + for(std::size_t ri = 0; ri < roots.size(); ++ri) + { + // Same dedup contract as visitPayload's scene_node_ptr branch: + // skip roots whose (pointer, world transform) was already walked. + // merge_scenes / SceneGroup are expected to dedup before this point, + // but a scene_state assembled by hand could still place the same root + // in `roots[]` more than once. Roots are walked at the visitor's + // current world (identity here), matching the key visitPayload uses. + if(!roots[ri] + || !vis.seenNodes.insert(makeInstanceKey(roots[ri].get(), vis.parentWorld)) + .second) + continue; + vis.visitNode(*roots[ri]); + } + + // Resolve DrawCall::materialIndex / ::skinIndex from the direct + // shared_ptr references stamped on each draw. materialIndex is the + // position of dc.material inside scene.state->materials (packed + // above into out.materials in the same order), so the shaders can + // continue to SSBO-index into scene_materials[draw.material_index]. + if(scene.state->materials && !scene.state->materials->empty()) + { + ossia::hash_map mat_index; + mat_index.reserve(scene.state->materials->size()); + for(std::size_t i = 0; i < scene.state->materials->size(); ++i) + { + const auto& m = (*scene.state->materials)[i]; + if(m) + mat_index[m.get()] = (int)i; + } + for(auto& dc : out.draws) + { + if(!dc.material) + continue; + auto it = mat_index.find(dc.material.get()); + dc.materialIndex = (it != mat_index.end()) ? it->second : -1; + } + } + if(scene.state->skeletons && !scene.state->skeletons->empty()) + { + ossia::hash_map skin_index; + skin_index.reserve(scene.state->skeletons->size()); + for(std::size_t i = 0; i < scene.state->skeletons->size(); ++i) + { + const auto& s = (*scene.state->skeletons)[i]; + if(s) + skin_index[s.get()] = (int)i; + } + for(auto& dc : out.draws) + { + if(!dc.skin) + continue; + auto it = skin_index.find(dc.skin.get()); + dc.skinIndex = (it != skin_index.end()) ? it->second : -1; + } + } + + // Also surface any cameras registered at scene_state level (producers + // that don't want to embed a camera node can publish via `cameras` only). + // Dedup against the set the tree walk already collected: a camera that + // appears both as a tree payload (with worldTransform) AND in + // scene_state.cameras would otherwise be entered twice — once with + // its real placement, once at identity — and the active-camera resolver + // would pick the wrong one half the time. + if(scene.state->cameras) + { + for(const auto& cam : *scene.state->cameras) + { + if(!cam || !vis.seenCameras.insert(cam.get()).second) + continue; + FlatScene::CameraEntry e; + e.component = cam; + // No world transform context at this level — identity placement. + e.worldTransform = QMatrix4x4{}; + out.cameras.push_back(std::move(e)); + } + } + + // Resolve active camera: match scene_state.active_camera_id against the + // collected camera entries; fall back to the first camera if the id is + // unset or not found. + if(!out.cameras.empty()) + { + out.activeCameraIndex = 0; + if(scene.state->active_camera_id.value != 0) + { + for(std::size_t i = 0; i < out.cameras.size(); ++i) + { + if(out.cameras[i].node_id == scene.state->active_camera_id) + { + out.activeCameraIndex = (int)i; + break; + } + } + } + } + + // Populate legacy single-camera mirror fields so consumers that haven't + // migrated to `cameras[activeCameraIndex]` keep working. + if(out.activeCameraIndex >= 0) + { + const auto& e = out.cameras[(std::size_t)out.activeCameraIndex]; + const auto& cam = *e.component; + out.cameraPosition = e.worldTransform.column(3).toVector3D(); + out.viewMatrix = e.worldTransform.inverted(); + out.cameraFov = cam.yfov * (180.f / float(M_PI)); + out.cameraNear = cam.znear; + out.cameraFar = cam.zfar; + out.projectionMatrix.setToIdentity(); + out.projectionMatrix.perspective( + out.cameraFov, aspectRatio, out.cameraNear, out.cameraFar); + out.hasCamera = true; + } + else + { + out.cameraPosition = QVector3D(0.f, 0.f, 3.f); + out.viewMatrix.setToIdentity(); + out.viewMatrix.lookAt( + out.cameraPosition, QVector3D(0.f, 0.f, 0.f), QVector3D(0.f, 1.f, 0.f)); + out.projectionMatrix.setToIdentity(); + out.projectionMatrix.perspective(60.f, aspectRatio, 0.1f, 1000.f); + out.cameraFov = 60.f; + out.cameraNear = 0.1f; + out.cameraFar = 1000.f; + out.hasCamera = false; + } +} + +} diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/SceneGPUState.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/SceneGPUState.hpp new file mode 100644 index 0000000000..792df38622 --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/SceneGPUState.hpp @@ -0,0 +1,656 @@ +#pragma once +#include + +#include + +#include +#include + +namespace score::gfx +{ + +// GPU-friendly structures for packing scene data into UBOs/SSBOs. +// All matrices are column-major (OpenGL/Vulkan convention). +// +// The structs split into two families: +// +// Raw* — written by source nodes (Camera, Light, Transform3D, +// EnvironmentLoader) into their own GpuResourceRegistry arena +// slot at their operator()() time. View-independent — no +// aspect-ratio math, no scene-graph composition applied. +// +// (CameraUBOData / LightGPU / MaterialGPU / PerDrawGPU / +// WorldTransformMat4) — produced by ScenePreprocessor from +// Raw* arenas + render-target state + scene-topology chain. +// These are what consumer shaders bind. +// +// Materials and env are scene-composition-independent so Raw == Cooked +// for them — no separate RawMaterial / RawEnv structs below, MaterialGPU +// and EnvParamsUBO are used directly from source nodes. + +#pragma pack(push, 1) + +// LightGPU removed in task 28c. Consumer shaders read RawLightData +// directly from the RawLight arena and compose world-space direction +// via world_transforms[transform_slot]. + +// Scene-level UBO: camera + global scene data. +struct SceneUBO +{ + float view[16]{}; + float projection[16]{}; + float viewProjection[16]{}; + float cameraPosition[4]{}; // xyz = position, w = padding + float time{}; + int32_t lightCount{}; + int32_t materialCount{}; + float padding0{}; + float ambientColor[4]{0.03f, 0.03f, 0.03f, 1.f}; +}; + +// Per-mesh UBO: model transform for the current draw call. +struct MeshUBO +{ + float model[16]{}; + float modelViewProjection[16]{}; + float normalMatrix[12]{}; // mat3 in std140 = 3 × vec4 (48 bytes) + int32_t materialIndex{}; + float padding[3]{}; +}; + +// Packed 32-bit texture reference stored in MaterialGPU::textureRefs[]. +// Layout (MSB → LSB): +// bits 31..30 : source (0 = NONE, 1 = STATIC pool, 2 = DYNAMIC pool) +// bits 29..24 : bucket index (0..63) within the selected pool +// bits 23.. 0 : layer index (0..16M) within the bucket's texture array +// +// 0xFFFFFFFF is the "no texture" sentinel — shader should fall back to +// the constant baseColor factor, metallic_factor, etc. +// +// Step 1 of the texture rollout uses only source=STATIC, bucket=0, so the +// low 24 bits hold the layer index directly. Bucketing + dynamic pools will +// slot into this same encoding without a material layout change. +inline constexpr uint32_t tex_ref_none() { return 0xFFFFFFFFu; } +inline constexpr uint32_t tex_ref_static(uint32_t bucket, uint32_t layer) +{ + // Packed layout: source:2 | bucket:7 | layer:23 + // + // The 7-bit bucket field (0..127) gives encoding headroom for up to + // 128 buckets; the runtime cap is kMaxBuckets = 16 in + // GpuResourceRegistry.hpp. Growing the cap requires enlarging the + // shader sampler arrays but needs no change to this encoding. Layer + // field at 23 bits holds 8M layers — 8000× kTextureLayerSize of 1024. + // + // Shader-side decode mirror: `(ref >> 23) & 0x7Fu` for the bucket, + // `ref & 0x007FFFFFu` for the layer. See classic_pbr_full.frag et al. + return (1u << 30) | ((bucket & 0x7Fu) << 23) | (layer & 0x007FFFFFu); +} +// Dynamic texture slot encoding: source=2, bucket unused (0), low 24 bits +// hold the per-channel slot index (0..kMaxDynamicSlots-1). Consumer shaders +// branch on the source bits and sample one of a small fixed set of direct +// sampler2D uniforms named `Dyn0`, `Dyn1`, etc. — no +// CPU decode, no array layer, upstream texture handle is forwarded as-is. +// Used for large runtime textures (8K video, HDR shader outputs) that +// don't fit the 1024² scaled-and-uploaded array path. +inline constexpr uint32_t tex_ref_dynamic(uint32_t slot) +{ + return (2u << 30) | (slot & 0x00FFFFFFu); +} + +// Per-material data for the material SSBO. 80 bytes (5 × vec4). +// +// VJ context → few materials, each potentially heavy (full OpenPBR +// extension set + feature-mask-driven SER sorting). 16 B of runtime +// metadata is a rounding error on a few-dozen materials and leaves +// headroom for future fields (animation ID, LOD hint, shader +// permutation hash) without another ABI break. +struct MaterialGPU +{ + float baseColor[4]{1.f, 1.f, 1.f, 1.f}; + // x = metallic, y = roughness, z = occlusion, w = unlit flag + float metallicRoughnessOcclusionUnlit[4]{0.f, 0.5f, 1.f, 0.f}; + // xyz = emissive, w = emissive strength + float emissive_strength[4]{0.f, 0.f, 0.f, 1.f}; + // Packed texture refs: [0] = base color, [1..3] reserved for MR, normal, + // emissive. See tex_ref_* helpers for encoding. + uint32_t textureRefs[4]{ + 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu}; + + // --- Runtime metadata (16 B) ---------------------------------------- + // Producer-derived bitmask of "which BRDF lobes / features are active" + // for this material. Used as: + // - Coherence key for NVIDIA Shader Execution Reordering + // (`reorderThread(feature_mask)` before closest-hit shading) so + // threads in the same warp converge on the same shading path. + // - Shader-side specialization in the main closest-hit / fragment + // body: `if(fm & HAS_TRANSMISSION) { ... }`. + // Bit layout: + // bit 0 : has_base_color_texture + // bit 1 : has_metal_rough_texture + // bit 2 : has_normal_texture + // bit 3 : has_emissive_texture + // bit 4 : unlit + // bit 5 : alpha_non_opaque (mask OR blend) + // bit 6 : has_clearcoat (KHR_materials_clearcoat) + // bit 7 : has_sheen (KHR_materials_sheen) + // bit 8 : has_transmission (KHR_materials_transmission) + // bit 9 : has_volume (KHR_materials_volume) + // bit 10 : has_specular (KHR_materials_specular) + // bit 11 : has_iridescence (KHR_materials_iridescence) + // bit 12 : has_anisotropy + // bit 13 : has_diffuse_transmission + // bit 14 : has_subsurface + // bit 15 : thin_walled + // bit 16 : alpha_mask (glTF alphaMode = MASK) + // bit 17 : alpha_blend (glTF alphaMode = BLEND) + // bit 18 : double_sided (glTF doubleSided) + // bit 19 : has_separate_occlusion (occlusion ≠ MR source) + // bits 20-21 : BC texcoord_set (0 or 1, glTF TEXCOORD_0/1) + // bits 22-23 : MR texcoord_set + // bits 24-25 : Normal texcoord_set + // bits 26-27 : Emissive texcoord_set + // bits 28-29 : Occlusion texcoord_set + // bit 30 : shadow_caster_disabled (material.shadow_caster == false) + // bit 31 : reflection_caster_disabled (material.reflection_caster == false) + uint32_t feature_mask{0u}; + + // Shader binding table hit-group index for ray tracing pipelines. + // Producers with a pre-computed hit-group mapping stamp this at + // material-authoring time; 0 means "default lit material" and is the + // safe fallback for renderers that haven't computed the mapping yet. + uint32_t hit_group_id{0u}; + + // 5th texture channel (occlusion). glTF separates occlusionTexture + // from metallicRoughnessTexture; conventionally both are sometimes + // packed into the same image (occlusion in R, roughness in G, + // metallic in B). When they're distinct sources, this slot points + // at the occlusion array layer; when they're the same, this stays + // at tex_ref_none() and the shader uses MR.r * occlusion_factor. + uint32_t occlusion_textureRef{0xFFFFFFFFu}; + + // glTF alphaMode = MASK cutoff. Shader does `if(alpha < cutoff) + // discard;` when the `alpha_mask` feature_mask bit is set. + // Default 0.5 matches the glTF spec default. + float alpha_cutoff{0.5f}; +}; +static_assert(sizeof(MaterialGPU) == 80, "MaterialGPU layout must match shader"); + +// Feature-mask bit flags. Producers OR these together to derive the +// per-material feature_mask; hit-group shaders branch on them to +// select the relevant BRDF lobe code path. +namespace material_feature +{ +inline constexpr uint32_t has_base_color_texture = 1u << 0; +inline constexpr uint32_t has_metal_rough_texture = 1u << 1; +inline constexpr uint32_t has_normal_texture = 1u << 2; +inline constexpr uint32_t has_emissive_texture = 1u << 3; +inline constexpr uint32_t unlit = 1u << 4; +inline constexpr uint32_t alpha_non_opaque = 1u << 5; +inline constexpr uint32_t has_clearcoat = 1u << 6; +inline constexpr uint32_t has_sheen = 1u << 7; +inline constexpr uint32_t has_transmission = 1u << 8; +inline constexpr uint32_t has_volume = 1u << 9; +inline constexpr uint32_t has_specular = 1u << 10; +inline constexpr uint32_t has_iridescence = 1u << 11; +inline constexpr uint32_t has_anisotropy = 1u << 12; +inline constexpr uint32_t has_diffuse_transmission = 1u << 13; +inline constexpr uint32_t has_subsurface = 1u << 14; +inline constexpr uint32_t thin_walled = 1u << 15; +// glTF alpha mode (parsed from material.alphaMode). MASK → shader +// discards fragments with alpha < alpha_cutoff. BLEND → shader emits +// translucent alpha (caller handles depth/sort separately). +inline constexpr uint32_t alpha_mask = 1u << 16; +inline constexpr uint32_t alpha_blend = 1u << 17; +// glTF doubleSided. When set, shader flips the surface normal for +// back-facing fragments (so lighting works on both sides). When unset +// AND the pipeline cull mode is `none` (MDI default), shader discards +// back-facing fragments to mimic single-sided culling. +inline constexpr uint32_t double_sided = 1u << 18; +// Separate occlusion texture present (independent from MR texture). +// Shader samples mat.occlusion_textureRef instead of using mr.r. +inline constexpr uint32_t has_separate_occlusion = 1u << 19; +// Scene-filter opt-outs. "Disabled" semantics (default 0 = participates +// in the pass) so the common case stays bit-clear. Packed at bits +// 30/31 — CSF filter shaders test these to drop draws from auxiliary +// passes (shadow-map, reflection capture). +inline constexpr uint32_t shadow_caster_disabled = 1u << 30; +inline constexpr uint32_t reflection_caster_disabled = 1u << 31; +} + +// Per-material EXTENSION data — parallel SSBO, indexed by the same +// `material_index` as MaterialGPU. Shaders that only need the 64-byte +// base material (classic_pbr / classic_pbr_textured / …) ignore this. +// OpenPBR-grade shaders declare `scene_materials_ext` and read the +// full lobe set. +// +// Layout is std430-friendly: every member starts on a 16-byte boundary +// (vec4 / uvec4 alignment rule). Field names track OpenPBR_ +// ResolvedInputs / glTF KHR extension names so translation on the shader +// side is a 1:1 copy. +// +// Texture refs (`textureRefs[16]`) are encoded with the same +// `tex_ref_static / tex_ref_dynamic / tex_ref_none` helpers as +// `MaterialGPU.textureRefs` — shaders branch on the top bits and either +// sample the corresponding per-channel texture array (static) or a +// direct sampler2D slot (dynamic). Slot ordering is documented below; +// the indices MUST match what `packMaterialExtensions` writes and what +// the consumer shader's Material_Ext struct reads. +struct MaterialExtensionsGPU +{ + // --- Coat / clearcoat (KHR_materials_clearcoat) --------------------- + // x = coat_weight, y = coat_roughness, z = coat_ior, w = coat_darkening + float coat[4]{0.f, 0.f, 1.5f, 0.f}; + // x = roughness_anisotropy, y = rotation_cos, z = rotation_sin, w = _pad + float coat_anisotropy[4]{0.f, 1.f, 0.f, 0.f}; + + // --- Fuzz / sheen (KHR_materials_sheen) ----------------------------- + // xyz = color, w = roughness + float fuzz_color[4]{0.f, 0.f, 0.f, 0.f}; + + // --- Transmission + volume (KHR_materials_transmission + _volume) --- + // x = transmission_weight, y = transmission_depth, + // z = dispersion_scale, w = dispersion_abbe_number + float transmission[4]{0.f, 0.f, 0.f, 20.f}; + // xyz = transmission_color, w = scatter_anisotropy + float transmission_color[4]{1.f, 1.f, 1.f, 0.f}; + // xyz = transmission_scatter (vec3), w = _pad + float transmission_scatter[4]{0.f, 0.f, 0.f, 0.f}; + + // --- Specular (KHR_materials_specular) + base specular anisotropy --- + // x = specular_weight, yzw = specular_color + float specular_weight_color[4]{1.f, 1.f, 1.f, 1.f}; + // x = specular_ior, y = roughness_anisotropy, + // z = rotation_cos, w = rotation_sin + float specular_ior_anisotropy[4]{1.5f, 0.f, 1.f, 0.f}; + + // --- Thin-film iridescence (KHR_materials_iridescence) -------------- + // x = thin_film_weight (iridescence factor), + // y = thin_film_thickness (glTF average of min/max), + // z = thin_film_ior, w = _pad + float thin_film[4]{0.f, 400.f, 1.3f, 0.f}; + + // --- Diffuse transmission (KHR_materials_diffuse_transmission) ------ + // x = factor, yzw = color + float diffuse_transmission[4]{0.f, 1.f, 1.f, 1.f}; + + // --- Subsurface (OpenPBR subsurface; not present in stock glTF) ----- + // x = weight, yzw = color + float subsurface_weight_color[4]{0.f, 0.8f, 0.8f, 0.8f}; + // x = radius, yzw = radius_scale + float subsurface_radius_scale[4]{1.f, 1.f, 0.5f, 0.25f}; + + // --- Misc scalars + flags ------------------------------------------- + // x = base_diffuse_roughness (OpenPBR Oren-Nayar knob), + // y = thin_walled (bool-as-float 0/1), + // z = _pad, w = _pad + float flags[4]{0.f, 0.f, 0.f, 0.f}; + + // --- Texture refs --------------------------------------------------- + // Slot layout: + // 0 = coat factor + // 1 = coat roughness + // 2 = coat normal + // 3 = fuzz color (sheen) + // 4 = fuzz roughness + // 5 = transmission + // 6 = specular factor + // 7 = specular color + // 8 = iridescence (thin-film) + // 9 = iridescence thickness + // 10 = anisotropy + // 11 = diffuse transmission + // 12 = diffuse transmission color + // 13 = subsurface factor + // 14 = subsurface color + // 15 = reserved + uint32_t textureRefs[16]{ + 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, + 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, + 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, + 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu}; +}; + +// ─── Raw layouts (source-owned arena slots) ──────────────────────────── +// +// Written by source halp nodes directly into their GpuResourceRegistry +// arena slot at their own operator()() time. ScenePreprocessor reads +// these, applies aspect-ratio / scene-graph composition, and writes the +// cooked equivalents (CameraUBOData / LightGPU / world-transform mat4 / +// …) that consumer shaders bind. + +// Camera parameters before matrix composition. No aspect ratio, no +// view / projection matrices — the preprocessor builds those per render +// target. +struct RawCameraData +{ + float eye[4]{0.f, 0.f, 3.f, 0.f}; // xyz = world-space eye, w = pad + float target[4]{0.f, 0.f, 0.f, 0.f}; // xyz = look-at target, w = pad + float up[4]{0.f, 1.f, 0.f, 0.f}; // xyz = up, w = pad + float yfov{60.f * 3.14159265f / 180.f}; // vertical FOV, radians + float znear{0.1f}; + float zfar{1000.f}; + uint32_t projection{0}; // 0 = perspective, 1 = orthographic +}; + +// Light parameters in local frame. The final world-space direction +// depends on the node's world transform (composed by the preprocessor +// from its scene-node parent chain); this struct stores only what the +// node itself knows. +struct RawLightData +{ + float color[4]{1.f, 1.f, 1.f, 1.f}; // xyz = color, w = intensity + float local_direction[4]{0.f, 0.f, -1.f, 0.f}; // xyz = dir (local), + // w = type enum: + // 0 = directional + // 1 = point + // 2 = spot + // (area / dome modes + // collapse to point / + // directional; dome + // lights are served by + // the scene-global env + // path, see EnvParamsUBO.) + float range_cone[4]{ // x = range (point/spot; + 0.f, 1.f, 0.7071f, 0.005f}; // 0 = infinite) + // y = inner cone cos + // z = outer cone cos + // w = shadow depth bias + // Shadow gate — consumer shadow-receiving shaders (classic_pbr_shadowed, + // etc.) MUST multiply the computed shadow term by `shadow_enabled != 0` + // so lights with shadow casting disabled fall through to unoccluded + // lighting. Per-light, per-frame opt-out; separate from the + // per-material shadow_caster_disabled bit (which controls whether a + // draw participates in the depth-only cast pass). + uint32_t shadow_enabled{0}; + uint32_t decay_mode{2}; // 0=const 1=lin 2=quad 3=cubic + // RawTransform arena slot index for this light's scene_transform. + // Consumer shader reads world_transforms.data[transform_slot] to + // get the world matrix, composes world-space direction / position + // from local_direction on the fly. Replaces the preprocessor's + // CPU-side packLight world composition (task 28b). + uint32_t transform_slot{0}; + // Receiver-plane / slope-scaled bias for shadow sampling. The UI + // already exposes this via Light::inputs.shadow_normal_bias; the + // slot was previously dead padding. PCF shaders add + // `normal_bias * (1 - max(dot(N, Ldir), 0))` to the receiver depth + // before the comparison to kill shadow acne on grazing surfaces. + float normal_bias{0.01f}; +}; +static_assert(sizeof(RawLightData) == 64, "RawLightData must stay 64 B"); + +// Local TRS for a scene_transform. Stamped by the producer and uploaded +// into a RawTransform arena slot. Hierarchy resolution (parent-chain +// composition) stays on the CPU side inside ScenePreprocessor's +// FlattenVisitor — the 2026-standard pattern across UE5 / Bevy / +// Unity DOTS / Godot: scene hierarchy is too small-N for GPU-side +// wavefront evaluation to win. The composed world matrix for each +// transform ends up in the WorldTransform arena at the same offset +// that the RawTransform slot occupies. +struct RawLocalTransform +{ + float translation[4]{0.f, 0.f, 0.f, 0.f}; // xyz + pad + float rotation[4]{0.f, 0.f, 0.f, 1.f}; // quaternion xyzw + float scale[4]{1.f, 1.f, 1.f, 0.f}; // xyz + pad + float _pad[4]{}; // std430 alignment +}; + +// Environment parameters (ambient, fog, exposure, gamma). Already +// view-independent — this is both Raw (source-written) and Cooked +// (shader-bound) in one struct. Published here so EnvironmentLoader +// can write its own slot bytes matching what ScenePreprocessor expects +// on the other end. +struct EnvParamsUBO +{ + float ambient[4]{0.03f, 0.03f, 0.03f, 1.f}; // xyz = color, w = intensity + float fog_color_density[4]{0.8f, 0.8f, 0.8f, 0.f}; // xyz = color, w = density + float fog_range[4]{10.f, 100.f, 0.f, 0.f}; // x = start, y = end, + // z = mode, w = enabled (0/1) + float exposure_gamma[4]{1.f, 2.2f, 0.f, 0.f}; // x = exposure (linear), + // y = gamma, zw = pad +}; + +// World-space mat4 emitted by ScenePreprocessor's FlattenVisitor from +// the scene_node tree (CPU walk with parent-chain accumulation). One +// entry per producer-authored scene_transform, laid out at the same +// byte offset as the producer's RawTransform slot so shaders can +// address either side by `scene_transform::raw_slot.offset`. +struct WorldTransformMat4 +{ + float m[16]{1.f, 0.f, 0.f, 0.f, + 0.f, 1.f, 0.f, 0.f, + 0.f, 0.f, 1.f, 0.f, + 0.f, 0.f, 0.f, 1.f}; +}; + +// Shadow cascades UBO — scene-wide, published by ScenePreprocessor as +// the `shadow_cascades` aux on the output geometry. Shading shaders +// (classic_pbr_shadowed) read this to pick the right cascade per +// fragment and sample the depth-array texture. The depth-only pass +// (shadow_cascades.vert / .frag) also reads light_view_proj from this +// UBO to transform vertices into cascade clip-space; its per-invocation +// `cascade_index` lives in a separate `shadow_draw_cfg` UBO so the +// two use-cases don't fight for the same binding. +// +// std140 layout, 560 B total. Fields mirror +// `ossia::shadow_cascades_info` in geometry_port.hpp: +// light_view_proj[8] — world → cascade clip-space per cascade +// cascade_split_distances[8] — view-space far-plane Z for cascades 0..7; +// entry k is the far plane of cascade k. +// Slots >= cascade_count read as 0. +// cascade_count — how many cascade entries are live (0..8) +struct ShadowCascadesUBO +{ + float light_view_proj[8][16]{}; + // 8 split distances symmetric with light_view_proj[8]. + // std140: two consecutive vec4 rows (32 B total). + float cascade_split_distances[8]{}; + uint32_t cascade_count{0}; + uint32_t _pad0{}; + uint32_t _pad1{}; + uint32_t _pad2{}; +}; +static_assert(sizeof(ShadowCascadesUBO) == 560, + "ShadowCascadesUBO size = mat4[8] (512) + float[8] (32) + 4×uint (16) = 560 B"); + +#pragma pack(pop) + +// CPU-side flattened scene representation. +struct DrawCall +{ + // Points at either a mesh from geometry_ref (legacy_geometry path) OR at + // owned_mesh (mesh_primitive path). `mesh` is always non-null for a valid + // draw; one of geometry_ref or owned_mesh keeps the target alive. + const ossia::geometry* mesh{}; + ossia::geometry_spec geometry_ref; // Legacy path: keeps source alive. + std::shared_ptr owned_mesh; // Primitive path: built from mesh_primitive. + + // Stable cross-frame identity of the source mesh primitive. Unlike + // `mesh`, which for the primitive path points into a freshly-allocated + // ossia::geometry wrapper (different pointer every flatten call), this + // is the source mesh_primitive's stable_id (or the raw pointer bits as + // a fallback when the primitive was emitted by a legacy producer that + // hasn't stamped a stable_id yet). Used by ScenePreprocessor to detect + // "mesh list unchanged vs last frame" and skip vertex/index re-uploads. + uint64_t stable_id{}; + + QMatrix4x4 worldTransform; + + // Direct shared_ptr to the material — null means "no material / use + // the renderer's default factors". Carries the material's gpu_slot_ref + // for GPU-side lookup without any scene-wide index array. + ossia::material_component_ptr material; + + // Direct shared_ptr to the skin — null means "no skinning". When + // present, the ScenePreprocessor attaches a `joint_matrices` auxiliary + // buffer to this draw's output geometry; a downstream skinning compute + // pass (or user shader) deforms positions/normals using + // joints0/weights0 vertex attributes. + ossia::skeleton_component_ptr skin; + + // Index into FlatScene::materials after the flatten pass has + // deduplicated the material pointers into its flat materials array. + // -1 means "material was null / default factors only". Set by + // flattenScene after collecting all draws. + int materialIndex{-1}; + + // Index into FlatScene::skins after dedup. -1 = no skinning. + int skinIndex{-1}; + + // Local-space AABB of the source mesh_primitive. Copied by the + // FlattenVisitor from mesh_primitive::bounds. Empty (inverted) if the + // source didn't compute bounds — downstream per_draw_bounds emitter + // writes an infinite AABB in that case so GPU culling shaders never + // cull the draw. + ossia::aabb local_bounds{}; + + // RawTransform arena slot of the nearest producer-authored + // scene_transform on this draw's walk path (0xFFFFFFFF = none). Stamped + // into PerDrawGPU.transform_slot so shaders can look up + // world_transforms_prev[slot] for motion vectors / TAA / reprojection. + std::uint32_t transform_slot{0xFFFFFFFFu}; +}; + +// Per-skeleton packed joint matrices: joint_matrix[i] = world_joint × inverse_bind. +// One std::vector per skeleton index (parallel to scene_state.skeletons). +struct SkeletonGPU +{ + std::vector joint_matrices; +}; + +// World-matrix emission: one entry per producer-authored +// scene_transform seen during the walk. The preprocessor's private +// world-transforms SSBO (m_worldTransformsBuffer) is laid out as a +// packed array indexed by the scene_transform's `raw_slot.internal_index` +// (the RawTransform arena slot index). Consumer shaders read +// `world_transforms.data[transform_slot]` for any light / particle / +// compute pass that needs to transform a local-space quantity into +// world space for a specific slot-addressable transform. +// +// Multi-preprocessor correctness: each preprocessor owns its own +// m_worldTransformsBuffer, so two preprocessors with different filtered +// views of the same source scene legitimately compute different world +// matrices for the same scene_transform without stomping each other. +struct WorldTransformEmit +{ + QMatrix4x4 world; + uint32_t transform_slot; // RawTransform arena slot index +}; + +struct FlatScene +{ + std::vector draws; + // RawLight arena slot index per light the walk encountered. + // 0xFFFFFFFF for producer-less lights (filtered out when building + // scene_light_indices, the shader-facing compact indices list). + std::vector lightArenaSlots; + std::vector materials; + // Parallel to `materials` — same size, same indexing. Zeroed + // (OpenPBR spec defaults) for materials whose scene material_component + // doesn't set any extension fields. Consumer shaders either ignore + // this SSBO entirely (classic_pbr, classic_pbr_textured, …) or bind + // it as `scene_materials_ext` to pick up the full OpenPBR parameter + // set (classic_pbr_openpbr). + std::vector material_extensions; + std::vector skins; // Parallel to scene_state.skeletons. + + // World matrices to upload into the WorldTransform arena, one per + // producer-authored scene_transform encountered in the walk whose + // raw_slot is valid. Sparse: the arena is indexed by offset, not + // by position in this vector. + std::vector worldTransforms; + + // Loader-emitted scene_data payloads, collected during the walk. + // ScenePreprocessor forwards each entry as an auxiliary_buffer on every output + // geometry (by name). Lifetime held via shared_ptr. + std::vector scene_data; + + // Instance components encountered during the walk. Each pair is a + // (worldTransform, instance_component_ptr) that the ScenePreprocessor emits as + // a dedicated instanced DrawCall with per-instance auxiliaries. + struct InstanceDraw + { + ossia::instance_component_ptr instance; + QMatrix4x4 worldTransform; + }; + std::vector instances; + + // Primitive cloud (splat / point-cloud) entries. Format-agnostic + // payloads whose schema is described by their CSF chain (one + // AUXILIARY with LAYOUT). ScenePreprocessor buckets these by + // `format_id` and emits one indirect-draw geometry per bucket; + // entries with empty format_id are bucketed individually keyed on + // their stable id. + struct PrimitiveCloudDraw + { + ossia::primitive_cloud_component_ptr cloud; + QMatrix4x4 worldTransform; + // RawTransform arena slot index, or 0xFFFFFFFFu if no producer + // transform was on the walk path. Mirrors PerDrawGPU.transform_slot. + uint32_t transform_slot{0xFFFFFFFFu}; + }; + std::vector primitive_clouds; + + // Cameras collected from the scene tree. Each entry keeps its source + // camera_component alive, its accumulated world transform (column 3 = + // eye position, inverse = view matrix), and the scene_node_id of the + // node it was attached to so consumers can resolve `active_camera_id`. + struct CameraEntry + { + ossia::camera_component_ptr component; + QMatrix4x4 worldTransform; + ossia::scene_node_id node_id{}; + }; + std::vector cameras; + + // Index into `cameras` of the currently-active camera. -1 when the scene + // has no cameras; in that case downstream falls back to a default eye + // placement (see the legacy single-camera fields below, populated from + // this slot if valid or from a default otherwise). + int activeCameraIndex{-1}; + + // Camera (from scene or override) — legacy mirror fields. Kept populated + // for consumers that haven't migrated to `cameras[activeCameraIndex]` + // yet. Resolved by flattenScene() after the tree walk: + // - cameras empty → sensible default (eye at (0,1,3)) + // - cameras nonempty → copied from cameras[activeCameraIndex] + QMatrix4x4 viewMatrix; + QMatrix4x4 projectionMatrix; + QVector3D cameraPosition; + float cameraFov{60.f}; + float cameraNear{0.1f}; + float cameraFar{1000.f}; + + bool hasCamera{false}; + + void clear() + { + draws.clear(); + lightArenaSlots.clear(); + materials.clear(); + material_extensions.clear(); + skins.clear(); + scene_data.clear(); + instances.clear(); + primitive_clouds.clear(); + cameras.clear(); + worldTransforms.clear(); + activeCameraIndex = -1; + hasCamera = false; + } +}; + +// Flatten a scene_spec into a FlatScene for GPU consumption. +void flattenScene( + const ossia::scene_spec& scene, + FlatScene& out, + float aspectRatio); + +// Build a transient ossia::geometry that wraps a mesh_primitive's buffers +// and attributes. The result is heap-allocated and owned by shared_ptr so +// callers can keep it alive beyond the flatten pass (see DrawCall::owned_mesh). +std::shared_ptr +primitiveToGeometry(const ossia::mesh_primitive& prim); + +MaterialGPU packMaterial(const ossia::material_component& mc); +MaterialExtensionsGPU packMaterialExtensions(const ossia::material_component& mc); +} diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/ScenePreprocessorNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/ScenePreprocessorNode.cpp new file mode 100644 index 0000000000..6217a96d82 --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/ScenePreprocessorNode.cpp @@ -0,0 +1,5559 @@ +#include "Gfx/Graph/GpuResourceRegistry.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace score::gfx +{ + +namespace +{ + +// std430 layout matching the `per_draw` AUXILIARY block declared in the +// preset rasterizer shaders. Lays down model + normal matrices, the +// material index, and a 32-bit tag hash (rapidhash of material.tag, +// truncated to 32 bits — same primitive that produces filter_tag and +// content_hash everywhere else in the pipeline) for downstream +// per-pass filtering. +// +// `transform_slot` indexes into the `world_transforms` / +// `world_transforms_prev` SSBOs — motion-vector / TAA / reprojection +// shaders do `world_transforms_prev.data[pd.transform_slot]` to recover +// the previous-frame world matrix of this draw. 0xFFFFFFFF = no +// producer-authored transform on the walk path (draw anchored to the +// identity or a loader-interior transform); shaders must treat this as +// "motion = zero" / "no prev data". +// +// `skeleton_offset` is the offset (in joint-matrix units) where this +// draw's skeleton begins inside a consolidated joint_matrices buffer. +// 0xFFFFFFFF = unskinned draw. Today joint_matrices is bound per-draw +// and the offset is functionally always 0 for skinned draws, but we +// stamp the correct concat-offset here so a future consolidation that +// switches to a single arena-style joint_matrices SSBO does not need a +// PerDrawGPU layout change. +struct PerDrawGPU +{ + float model[16]{}; + float normal[16]{}; // mat3 padded as mat4 to keep std430 alignment trivial + uint32_t material_index{}; + uint32_t tag_hash{}; + uint32_t transform_slot{0xFFFFFFFFu}; + uint32_t skeleton_offset{0xFFFFFFFFu}; +}; +static_assert(sizeof(PerDrawGPU) == 144, "PerDrawGPU layout must match shader"); + +// Local-space AABB per draw. Emitted as the `per_draw_bounds` auxiliary +// SSBO (sidecar to `per_draws`, same indexing by drawID / gl_BaseInstance). +// Consumer shaders transform to world space via Arvo's algorithm against +// PerDrawGPU.model and test against the camera's frustum planes for +// GPU frustum / HiZ occlusion culling. +// +// Sentinel convention: when the source mesh didn't compute bounds, we +// emit an "infinite" AABB (min = -FLT_MAX, max = +FLT_MAX) so culling +// shaders leave the draw alone rather than degenerating to a point at +// the origin. +struct PerDrawBoundsGPU +{ + float aabb_min[4]{}; // xyz = local-space min, w = unused (padding) + float aabb_max[4]{}; // xyz = local-space max, w = unused (padding) +}; +static_assert(sizeof(PerDrawBoundsGPU) == 32, + "PerDrawBoundsGPU layout must match shader (2 × vec4)"); + +// Pack an ossia::aabb into PerDrawBoundsGPU. Empty (inverted) input means +// the source mesh didn't compute bounds — emit a ±FLT_MAX "infinite" box +// so culling shaders never cull the draw. This keeps sources that can't +// easily supply bounds (GPU-resident procedural meshes like PBRMesh) +// rendering correctly through a cull pass. +inline PerDrawBoundsGPU packBounds(const ossia::aabb& b) noexcept +{ + PerDrawBoundsGPU g{}; + if(b.empty()) + { + constexpr float kPos = std::numeric_limits::max(); + constexpr float kNeg = -std::numeric_limits::max(); + g.aabb_min[0] = kNeg; g.aabb_min[1] = kNeg; g.aabb_min[2] = kNeg; + g.aabb_max[0] = kPos; g.aabb_max[1] = kPos; g.aabb_max[2] = kPos; + } + else + { + g.aabb_min[0] = b.min[0]; g.aabb_min[1] = b.min[1]; g.aabb_min[2] = b.min[2]; + g.aabb_max[0] = b.max[0]; g.aabb_max[1] = b.max[1]; g.aabb_max[2] = b.max[2]; + } + return g; +} + +// MaterialGPU = 4 × vec4 in the shader (baseColor, MR-occlusion-unlit, +// emissive_strength, textureRefs). Layout drift here silently corrupts +// every textured draw — keep the size check. +static_assert(sizeof(MaterialGPU) == 80, "MaterialGPU layout must match shader"); + +// Per-material per-channel UV transforms (KHR_texture_transform). +// 5 channels × (offset.xy + scale.xy) + rotations packed in 2 vec4 +// = 7 vec4 = 112 B. Channels match MaterialChannel enum: 0=BC, 1=MR, +// 2=Normal, 3=Em, 4=Occlusion. Identity transform: offset=(0,0), +// scale=(1,1), rotation=0 — the default-constructed value, which +// makes glTFs without the extension pass through `(uv) → uv` and +// incur zero shader cost. +struct MaterialUVTransformGPU +{ + float bc_offset_scale[4]{0.f, 0.f, 1.f, 1.f}; // ox, oy, sx, sy + float mr_offset_scale[4]{0.f, 0.f, 1.f, 1.f}; + float normal_offset_scale[4]{0.f, 0.f, 1.f, 1.f}; + float em_offset_scale[4]{0.f, 0.f, 1.f, 1.f}; + float occ_offset_scale[4]{0.f, 0.f, 1.f, 1.f}; + float rotations0[4]{0.f, 0.f, 0.f, 0.f}; // bc, mr, nrm, em (radians) + float rotations1[4]{0.f, 0.f, 0.f, 0.f}; // occ, _pad×3 +}; +static_assert(sizeof(MaterialUVTransformGPU) == 112, + "MaterialUVTransformGPU layout must match shader (7 × vec4)"); + +// Material texture channels. Each channel has its own QRhiTextureArray with +// the appropriate pixel format (sRGB vs linear) and dedup map. Index into +// MaterialGPU::textureRefs[]. +enum MaterialChannel : int +{ + ChannelBaseColor = 0, + ChannelMetalRough = 1, + ChannelNormal = 2, + ChannelEmissive = 3, + ChannelOcclusion = 4, // Separate glTF occlusionTexture (when distinct from MR). + ChannelCount = 5 +}; + +// Whole texture_ref for a given channel, or nullptr for out-of-range. +// Used by both the static path (reads .source) and the dynamic path +// (reads .texture.native_handle). +inline const ossia::texture_ref* +channelRef(MaterialChannel ch, const ossia::material_component& m) noexcept +{ + switch(ch) + { + case ChannelBaseColor: return &m.base_color_texture; + case ChannelMetalRough: return &m.metallic_roughness_texture; + case ChannelNormal: return &m.normal_texture; + case ChannelEmissive: return &m.emissive_texture; + case ChannelOcclusion: return &m.occlusion_texture; + default: return nullptr; + } +} + +// Shader-visible name for each channel — matches the INPUT entries consuming +// shaders declare (sampler2DArray baseColorArray; etc). Names follow the +// existing classic_pbr_textured convention (camelCase) so the aux-texture +// auto-resolve path slots in without shader edits. +inline const char* channelName(MaterialChannel ch) noexcept +{ + switch(ch) + { + case ChannelBaseColor: return "baseColorArray"; + case ChannelMetalRough: return "metalRoughArray"; + case ChannelNormal: return "normalArray"; + case ChannelEmissive: return "emissiveArray"; + case ChannelOcclusion: return "occlusionArray"; + default: return ""; + } +} + +// Dynamic-slot aux-texture name base. The full name is +// `` (e.g., "baseColorDyn0"), matching the uniform +// names consumer shaders declare for the dynamic branch. +inline const char* channelDynBaseName(MaterialChannel ch) noexcept +{ + switch(ch) + { + case ChannelBaseColor: return "baseColorDyn"; + case ChannelMetalRough: return "metalRoughDyn"; + case ChannelNormal: return "normalDyn"; + case ChannelEmissive: return "emissiveDyn"; + case ChannelOcclusion: return "occlusionDyn"; + default: return ""; + } +} + +// Authoritative kMaxDynamicSlots constant lives on +// GpuResourceRegistry::kMaxDynamicSlots (header). Removed the local +// duplicate that drifted out of sync; the registry value is what actually +// gates the dynamic-slot cap (see resolveDynamicSlot at line ~386 in +// GpuResourceRegistry.cpp). + +// sRGB channels (base color, emissive) get hardware sRGB→linear on sample. +// Metallic-roughness and normal are data, not color — must stay linear. +inline QRhiTexture::Flags channelFlags(MaterialChannel ch) noexcept +{ + switch(ch) + { + case ChannelBaseColor: + case ChannelEmissive: + return QRhiTexture::sRGB; + default: + return {}; + } +} + +// ============================================================================= +// Ext-texture slot routing (KHR_materials_*) +// ============================================================================= +// +// Each MaterialExtensionsGPU::textureRefs[slot] is fed by an ext texture from +// material_component, registered into one of the 5 existing channel pools +// (BaseColor / MetalRough / Normal). Pool choice = format expectation: +// ChannelBaseColor → sRGB color textures (sheen color, specular color, +// diffuse-transmission color, subsurface color) +// ChannelMetalRough → linear scalar/factor textures (clearcoat factor + +// roughness, sheen roughness, transmission, specular +// factor, iridescence, diffuse-transmission factor, +// subsurface factor) +// ChannelNormal → tangent-space data (clearcoat normal, anisotropy +// direction) +// +// Slot numbering matches MaterialExtensionsGPU::textureRefs[] documented in +// SceneGPUState.hpp — they MUST stay in sync; this table is the loader-side +// counterpart of the shader-side switch (see classic_pbr_openpbr.frag). +// +// Slots 13/14 (subsurface factor / color) and 15 (reserved) are intentionally +// absent from this table: stock glTF has no SSS extension and material_ +// component carries no source texture_ref to drive them. Future loaders +// growing `material_component::subsurface` fields can extend the table +// here — the rebuild + patch walkers iterate kExtTextureSlots without +// hard-coded slot count, so a single new entry is all it takes. +struct ExtTextureSlot +{ + int slot; // 0..15 in MaterialExtensionsGPU::textureRefs + MaterialChannel channel; // which existing pool this texture lands in + // Accessor returns a reference into `m`'s ext struct; the caller does + // its `valid()` / `source.get()` test on the resulting texture_ref. + // Returning by reference avoids dangling on temporary structs the + // accessor would have to construct otherwise. + const ossia::texture_ref& (*accessor)(const ossia::material_component& m); +}; + +inline constexpr ExtTextureSlot kExtTextureSlots[] = { + // KHR_materials_clearcoat — slots 0..2. + { 0, ChannelMetalRough, + +[](const ossia::material_component& m) -> const ossia::texture_ref& { + return m.clearcoat.texture; } }, + { 1, ChannelMetalRough, + +[](const ossia::material_component& m) -> const ossia::texture_ref& { + return m.clearcoat.roughness_texture; } }, + { 2, ChannelNormal, + +[](const ossia::material_component& m) -> const ossia::texture_ref& { + return m.clearcoat.normal_texture; } }, + + // KHR_materials_sheen — slots 3..4. + { 3, ChannelBaseColor, + +[](const ossia::material_component& m) -> const ossia::texture_ref& { + return m.sheen.color_texture; } }, + { 4, ChannelMetalRough, + +[](const ossia::material_component& m) -> const ossia::texture_ref& { + return m.sheen.roughness_texture; } }, + + // KHR_materials_transmission — slot 5. + { 5, ChannelMetalRough, + +[](const ossia::material_component& m) -> const ossia::texture_ref& { + return m.transmission.texture; } }, + + // KHR_materials_specular — slots 6..7. + { 6, ChannelMetalRough, + +[](const ossia::material_component& m) -> const ossia::texture_ref& { + return m.specular.texture; } }, + { 7, ChannelBaseColor, + +[](const ossia::material_component& m) -> const ossia::texture_ref& { + return m.specular.color_texture; } }, + + // KHR_materials_iridescence — slots 8..9. + { 8, ChannelMetalRough, + +[](const ossia::material_component& m) -> const ossia::texture_ref& { + return m.iridescence.texture; } }, + { 9, ChannelMetalRough, + +[](const ossia::material_component& m) -> const ossia::texture_ref& { + return m.iridescence.thickness_texture; } }, + + // KHR_materials_anisotropy — slot 10. + { 10, ChannelNormal, + +[](const ossia::material_component& m) -> const ossia::texture_ref& { + return m.anisotropy.texture; } }, + + // KHR_materials_diffuse_transmission — slots 11..12. + { 11, ChannelMetalRough, + +[](const ossia::material_component& m) -> const ossia::texture_ref& { + return m.diffuse_transmission.texture; } }, + { 12, ChannelBaseColor, + +[](const ossia::material_component& m) -> const ossia::texture_ref& { + return m.diffuse_transmission.color_texture; } }, +}; + +QMatrix4x4 transformToMatrix(const ossia::scene_transform& t) +{ + QMatrix4x4 mat; + mat.translate(t.translation[0], t.translation[1], t.translation[2]); + mat.rotate(QQuaternion(t.rotation[3], t.rotation[0], t.rotation[1], t.rotation[2])); + mat.scale(t.scale[0], t.scale[1], t.scale[2]); + return mat; +} + +// writeMat4 comes from Gfx/Graph/CameraMath.hpp (included above) — same +// signature, column-major memcpy. Keeping a local copy would create an +// ambiguous overload at every call site. + +} + +struct RenderedScenePreprocessorNode final : NodeRenderer +{ + // Texture arrays now live in GpuResourceRegistry and are destroyed + // by RenderList::release → registry.destroy(). Nothing to clean up + // here — the destructor is defaulted. + + const ScenePreprocessorNode& m_node; + + // Output owned GPU buffers (one set per flatten cycle). Sized to scene needs. + // scene_light_indices SSBO: compact list of RawLight arena slot + // indices for the current scene's live lights. Shader iterates + // 0..scene_counts.light_count and reads + // scene_lights.entries[scene_light_indices.data[i]] (task 28b phase 3). + QRhiBuffer* m_lightIndicesBuffer{}; + int64_t m_lightIndicesCap{}; + std::vector m_cachedLightIndices; + // scene_materials is now served by the Material arena directly + // (registry.buffer(Arena::Material)) — no preprocessor-owned mirror. + // MaterialExtensions stays preprocessor-owned pending its own arena + // migration (larger struct, less pressure to move). + QRhiBuffer* m_materialsExtBuffer{}; // MaterialExtensionsGPU[] + // KHR_texture_transform: per-material per-channel UV offset/scale/ + // rotation. Parallel to scene_materials, indexed by material_index. + // Identity for materials without the extension (zero shader cost). + QRhiBuffer* m_materialUVTransformsBuffer{}; + int64_t m_materialUVTransformsCap{}; + std::vector m_cachedMaterialUVTransforms; + + // One QRhiBuffer per forwarded scene_data entry — allocated when the + // scene_data carries CPU-side `buffer_data`, borrowed from the upstream + // when it already holds a `gpu_buffer_handle`. Parallel to fs.scene_data. + struct SceneDataBinding + { + QRhiBuffer* buffer{}; + std::string name; + int64_t byte_size{}; + bool owned{false}; + }; + std::vector m_sceneDataBuffers; + + // One per skeleton in scene_state.skeletons, holding the packed + // joint_matrices (mat4[N]). Grow-only; skinned draws attach one of these + // as a `joint_matrices` auxiliary. + struct SkinBinding + { + QRhiBuffer* buffer{}; + int64_t capacity{}; + int64_t byte_size{}; + }; + std::vector m_skinBuffers; + + // std140-packed counts UBO: shaders read `scene_counts.light_count`, + // `.material_count`, `.draw_count` instead of `scene_lights.entries + // .length()`, so the SSBOs can keep their growth-only capacity without + // forcing shaders to iterate ghost tail entries. Uploaded on every + // change (partial uploads to scene_lights etc. may leave dead tail + // slots when counts shrink, and we want the shader to ignore them). + struct SceneCountsUBO + { + uint32_t light_count{}; + uint32_t material_count{}; + uint32_t draw_count{}; + uint32_t _pad0{}; + }; + static_assert(sizeof(SceneCountsUBO) == 16, "scene_counts UBO layout"); + QRhiBuffer* m_sceneCountsBuffer{}; + SceneCountsUBO m_cachedSceneCounts{~0u, ~0u, ~0u, 0u}; + + // `shadow_cascades` aux UBO — light_view_proj[8] + split distances + + // cascade_count. Populated from `scene.state->shadow_cascades` (authored + // upstream by ShadowCascadeSetup). Diff-uploaded against the cached + // snapshot; unchanged frames cost zero bytes. Emitted to downstream as + // an `auxiliary_buffer` named "shadow_cascades" — classic_pbr_shadowed + // reads it to PCF-sample the right cascade; shadow_cascades.vert reads + // its `light_view_proj` array to transform vertices into cascade + // clip-space (its per-invocation `cascade_index` lives in a separate + // `shadow_draw_cfg` UBO that the depth-pass pipeline binds locally). + QRhiBuffer* m_shadowCascadesBuffer{}; + ShadowCascadesUBO m_cachedShadowCascades{}; + bool m_shadowCascadesSeeded{false}; + + // Per-camera std140 UBO array. Size = max(1, ncameras) * sizeof(CameraUBOData). + // First entry is always the active camera (resolved by flattenScene from + // scene_state.active_camera_id). When the scene has no cameras we publish + // a single default entry so the shader never sees a null binding. + // Bound as the `camera` aux buffer on Geometry Out — try_bind_from_geometry + // in the shader consumer resolves it by port name. + QRhiBuffer* m_camerasBuffer{}; + int64_t m_camerasCap{}; + std::vector m_cachedCameras; + + // One-frame history for motion-vector reprojection. Bound as the aux UBO + // `camera_prev`; consumer post-process shaders reconstruct world position + // from current depth + current camera, then reproject through this. + // On the first frame (no history) we seed prev = current so MV = 0. + // Filled each frame from m_cachedCameras BEFORE m_camerasBuffer is + // overwritten — same "GPU snapshot of last frame" semantics as + // m_worldTransformsPrevBuffer, just on a Dynamic UBO via CPU shadow + // upload instead of copyBuffer (which Dynamic UBOs don't support). + QRhiBuffer* m_camerasPrevBuffer{}; + + // Per-frame guard for packAndUploadCameras. update() is invoked once + // per outgoing edge by RenderList::renderInternal — for a + // ScenePreprocessor with N consumers, that's N calls per frame. The + // camera-prev semantic ("upload m_cachedCameras BEFORE overwriting + // it with fresh") only holds on the first call; on the second call, + // m_cachedCameras has already been replaced by fresh, so re-running + // would clobber camera_prev with current camera content. + // Keep packAndUploadCameras idempotent within a frame by tracking + // the last frame index we ran on (RenderList::frame, incremented at + // the end of each renderInternal). -1 = not yet run. + int64_t m_lastCameraUploadFrame{-1}; + + // Per-preprocessor world-transforms SSBO. One WorldTransformMat4 per + // producer-authored scene_transform seen during the walk, laid out in + // walk order. Not a shared registry arena — different preprocessors + // consuming different filtered views of the same source scene + // legitimately compute different world matrices for the same + // scene_transform, so each keeps its own buffer. Consumer shaders + // bind `world_transforms` by aux name and index via + // `per_draws[draw_id].transform_slot`. + QRhiBuffer* m_worldTransformsBuffer{}; + int64_t m_worldTransformsCap{0}; + + // Previous-frame snapshot of m_worldTransformsBuffer. Bound as the + // `world_transforms_prev` aux buffer on Geometry Out; consumer + // shaders read it alongside `world_transforms` for motion-vector / + // TAA / reprojection passes. Maintained by a deferred-write scheme: + // update() stashes this frame's per-slot WorldTransformMat4 writes + // into m_pendingWorldXformWrites WITHOUT touching the resource- + // update batch. runInitialPasses then (a) issues a single GPU-side + // copyBuffer(current → prev) on the command buffer — at this point + // current still holds frame-N-1 data because the deferred writes + // haven't been applied yet — then (b) drains the pending list into + // the next resource-update batch (`res`), which RenderList submits + // AFTER runInitialPasses returns. Net: prev captures frame N-1's + // state, current then receives frame N's writes; consumer render + // passes downstream see the correct (prev, current) pair. + // Same Static + StorageBuffer constraint as the current buffer + // (QRhi forbids Dynamic + StorageBuffer). + QRhiBuffer* m_worldTransformsPrevBuffer{}; + + // Per-slot world-transform writes deferred from update() to + // runInitialPasses so that the prev-snapshot copy captures frame + // N-1 data before frame N's writes overwrite current. Drained once + // per frame, gated by m_lastSnapshotFrame. + std::vector> + m_pendingWorldXformWrites; + // Single-fire-per-frame guard for the prev-snapshot + pending-writes + // drain. runInitialPasses is invoked once per outgoing edge, so without + // a gate the snapshot would queue N copies and the pending-writes drain + // would double-upload. We compare against renderer.frame (the monotonic + // per-renderer frame counter that the camera path also uses, see the + // packAndUploadCameras / camera prev-snapshot sites). NB: the previous + // QRhiCommandBuffer-pointer discriminator was broken — every QRhi + // backend (Vulkan/D3D11/D3D12/Metal/GL) returns the address of a single + // by-value cbWrapper member from QRhiSwapChain::currentFrameCommandBuffer, + // so the pointer is constant across frames and the gate fired exactly + // once per swapchain lifetime, freezing world_transforms / _prev at + // their frame-0 contents (motion vectors / TAA / reprojection broken). + // Cleared on teardown (see release()). + int64_t m_lastSnapshotFrame{-1}; + + // Single-fire-per-frame guard for issuePendingGpuCopies (threedim#13). + // runInitialPasses fires once per outgoing edge; without a gate a node + // feeding K consumers issues K identical copy batches per frame (the + // destination MDI buffers are shared, so one batch already serves every + // consumer). Kept separate from m_lastSnapshotFrame because the snapshot + // block only sets that token when the world-transforms buffer exists — + // a dedicated token gates the copies unconditionally. Cleared on teardown. + int64_t m_lastGpuCopiesFrame{-1}; + + // Environment params UBO: preprocessor-owned Env arena slot. Each + // EnvironmentLoader / CubemapLoader contributes disjoint fields (via + // `params_set` bits on scene_environment); merge_scenes composes them + // field-by-field into this->scene.state->environment. The preprocessor + // packs the MERGED CPU-side env into m_envSlot here so consumers + // reading `env` see the composed result, not any one producer's + // contribution. The per-producer Env slots owned by EnvironmentLoader + // etc. remain valid but are no longer the binding target — they're + // just CPU-side marker that the producer is participating. + GpuResourceRegistry::Slot m_envSlot{}; + uint32_t m_env_aux_offset{0}; + // Cache the last uploaded EnvParamsUBO bytes so we can skip re-upload + // when the merged environment content doesn't change frame-to-frame. + EnvParamsUBO m_lastEnvUpload{}; + bool m_envSlotSeeded{false}; + + // ─── MDI state (Plan 09 S4) ───────────────────────────────────────── + // Post-migration, the vertex/index streams live in the registry's + // MeshArenaManager. Only per_draws + indirect_draw_cmds remain + // preprocessor-owned — they're small, scene-wide SSBOs tied to a + // specific preprocessor's filtered view of the scene and not + // shareable across preprocessors. + struct MDIState + { + QRhiBuffer* per_draws{}; + QRhiBuffer* indirect_draw_cmds{}; + // Sidecar bounds SSBO parallel to per_draws. Same draw indexing + // (baseInstance / gl_BaseInstance), read by GPU culling shaders to + // transform local-space AABBs to world space and test against the + // camera frustum. + QRhiBuffer* per_draw_bounds{}; + int64_t perDrawsCap{}; + int64_t indirectCap{}; + int64_t perDrawBoundsCap{}; + uint32_t totalVertices{}; + uint32_t totalIndices{}; + uint32_t drawCount{}; + }; + MDIState m_mdi; + + // ─── Primitive cloud (splat) bucket resources ─────────────────────── + // One entry per bucket_key (hash(format_id) — or stable_id when + // format_id is empty so each unformatted cloud gets its own bucket). + // Each bucket carries: + // - raw_splats: concatenation of all clouds' raw_data in the bucket + // - cloud_meta: CloudMetaGPU[] (model matrix + slot indices) + // - cloud_id_lookup: uint per primitive -> cloud_meta index + // - indirect: a single IndirectCmd {6, total_primitives, 0, 0, 0} + // + // Buffers are persistent (growBuf-managed) so downstream SRBs see + // pointer-stable handles across frames. A bucket whose key disappears + // from the next flatten gets dropBuf'd in releaseStaleClouds(). + // + // CloudMetaGPU mirrors PerDrawGPU's pattern (model[16] + + // transform_slot) so a CSF chain that wants per-cloud TRS reads it + // exactly the same way mesh shaders read per_draws[gl_DrawID]. + // + // bounds_min / bounds_max are the per-cloud world-space AABB — + // populated by walking the 8 corners of `cloud->bounds` through + // `worldTransform`. Splat-format CSFs use these to do a per-cloud + // frustum-cull pre-pass so off-screen clouds skip all per-primitive + // work (a big win when scenes carry many bucketed clouds). + struct CloudMetaGPU + { + float model[16]; // 64 + float bounds_min[4]; // 80 xyz + pad + float bounds_max[4]; // 96 xyz + pad + uint32_t primitive_offset; // 100 + uint32_t primitive_count; // 104 + uint32_t transform_slot; // 108 + uint32_t format_param_index; // 112 + uint32_t _pad[4]; // 128 — 16-byte align + }; + static_assert(sizeof(CloudMetaGPU) == 128, "CloudMetaGPU std430 layout"); + + struct PrimitiveCloudBucketBuffers + { + QRhiBuffer* raw_splats{}; int64_t rawSplatsCap{}; + QRhiBuffer* cloud_meta{}; int64_t cloudMetaCap{}; + QRhiBuffer* cloud_id_lookup{}; int64_t cloudIdLookupCap{}; + QRhiBuffer* indirect{}; int64_t indirectCap{}; + uint32_t row_stride{}; // cached from cloud->row_stride + uint64_t last_seen_frame{}; // for stale-bucket eviction + // Per-frame content fingerprint over (per cloud in bucket order): + // raw_data identity + content_hash + primitive_count + // + worldTransform bytes + transform_slot + // — i.e. everything the bucket's GPU buffers depend on. When the + // computed fingerprint matches the stored one, the bucket's + // raw_splats / cloud_meta / cloud_id_lookup / indirect buffers are + // already correct from the previous frame and the per-frame CPU + // concat + uploadStaticBuffer work can be skipped wholesale. 0 = + // "never uploaded; force the first frame's upload regardless". + // This is the Phase-1 delta-update step toward the persistent + // arena design (see .claude/PRIMITIVE-CLOUD-ARENA-DESIGN.md). + uint64_t content_fingerprint{}; + }; + ossia::flat_map m_primitiveCloudBuckets; + uint64_t m_primitiveCloudFrame{0}; + + // ─── Unified-MDI per-instance concat buffers ──────────────────────── + // Three parallel arrays sized to K = (Σ regular_cmd_count + Σ + // instance_group_count). One slot per (cmd, instance) pair, contiguous + // within a cmd. Each indirect cmd sets `firstInstance = its first + // slot`, so per-instance VERTEX_INPUTs (translation / color / draw_id) + // step at the right offset on both indirect and CPU-fallback paths + // (firstInstance is honoured uniformly by every QRhi backend). + // + // - m_instTranslations: vec4-padded translation per slot (xyz used, + // w pad). Identity (0,0,0) for regular-mesh slots; actual + // per-particle position for instance-group slots (GPU-copied from + // the Instancer's source buffer with format-aware offsets). + // - m_instColors: vec4 per slot. Identity (1,1,1,1) for regular-mesh + // slots; actual per-instance broadcast colour for groups. + // - m_instDrawIds: uint per slot. Carries the cmd-index of the owning + // draw — replaces gl_DrawID (broken on CPU-fallback) and + // gl_BaseInstance (no longer = drawID once instanceCount > 1). + QRhiBuffer* m_instTranslations{}; + QRhiBuffer* m_instColors{}; + QRhiBuffer* m_instDrawIds{}; + int64_t m_instTranslationsCap{}; + int64_t m_instColorsCap{}; + int64_t m_instDrawIdsCap{}; + uint32_t m_instSlotsUsed{}; + + // CPU mirror of the draw_ids stream so we can diff-upload + cheaply + // pre-fill identity values for regular cmds. Translations / colors + // are GPU-resident sources for instance groups (no CPU mirror — + // copies are GPU→GPU); we pre-fill identity for regular slots + // straight into the GPU buffer via uploadStaticBuffer. + std::vector m_cachedInstDrawIds; + + // Prototype stable-id fallback map. Some producers (notably + // Threedim::Primitive going through halp::geometry → legacy_geometry) + // don't stamp a non-zero `mesh_primitive::stable_id` on their output. + // Without a stable id, the slab arena allocates a fresh slab per + // frame and the OffsetAllocator fragments until exhaustion. We cover + // this by minting a stable id keyed on the prototype's + // mesh_component pointer (which IS stable across frames as long as + // the producer re-emits the same shared_ptr). GC pass at the end of + // update() evicts entries whose pointer no longer appears in fs. + ossia::hash_map m_protoStableIds; + + // Pending GPU→GPU copy ops collected during update()'s accumulator loop + // and executed in runInitialPasses (the only place ScenePreprocessor has a + // live command buffer). Each op corresponds to one attribute of one + // draw whose source buffer is GPU-resident; the CPU accumulator was + // zero-filled in its place so all offsets stay consistent with the + // tight MDI-layout contract. Cleared after being issued. + enum class MdiAttr : uint8_t + { + Positions, + Normals, + Texcoords, + Tangents + }; + struct PendingGpuCopy + { + QRhiBuffer* src{}; + QRhiBuffer* dst{}; // explicit destination — when null, attr names + // a mesh-stream slot resolved via mdiBufferFor() + int src_offset{}; + int dst_offset{}; + int size{}; // bytes if tight-copy, else element_size + int vertex_count{}; + int src_stride{}; // 0 or element_size → tight; else strided + int element_size{}; // BytesPerVertex for this attribute + MdiAttr attr{}; + }; + std::vector m_pendingGpuCopies; + + // Capacities (in bytes) of the two shared scene buffers — for growth-only. + int64_t m_materialsExtCap{}; + + // Per-channel material texture arrays are now owned by + // GpuResourceRegistry and shared across all preprocessors in the same + // RenderList. Sharing is safe because texture-source / layer + // assignments are driven by asset identity (pointer to + // texture_source), which is view-independent — every preprocessor + // computes the same mapping. Shared arrays also let producers + // (PBRMesh, MaterialOverride, loaders) author their own textureRefs + // at update() time via the registry's resolve APIs without a + // preprocessor-local dedup step. + // + // We stash the registry pointer at init() instead of going through + // renderer.registry() at every call site — access is on the hot + // rebuild path. Cleared on release(); m_lastRegistry below remembers + // the previous pointer so the next init() can detect "same registry + // as before release" and skip the cache wipe. + GpuResourceRegistry* m_registry{}; + + // Persist-across-rebuild contract: snapshot of m_registry at + // release() time. Survives the release()/init() cycle so init() can + // compare against the new RL's registry: equal → skip wipe (relink + // graph, viewport resize when the renderer object is reused), unequal + // → wipe (first init / OutputNode-replaced QRhi). Never read in the + // hot path; only inspected from init(). + GpuResourceRegistry* m_lastRegistry{}; + + // Convenience typedef + helper to localise the enum translation. + using TexChannel = GpuResourceRegistry::TextureChannel; + static TexChannel toTexChannel(MaterialChannel ch) noexcept + { + return static_cast(ch); + } + auto& texChannel(MaterialChannel ch) noexcept + { + return m_registry->textureChannel(toTexChannel(ch)); + } + const auto& texChannel(MaterialChannel ch) const noexcept + { + return m_registry->textureChannel(toTexChannel(ch)); + } + + // Uniform layer size — matching across channels keeps the samplers + // interchangeable in shaders and simplifies sampler state. + static constexpr int kChannelLayerSize + = GpuResourceRegistry::kTextureLayerSize; + + // Content-based fingerprint of the materials list we last decoded. A + // vector of raw material_component pointers (shared_ptr-element + // identity). Stable across multi-producer scene merges: merge_scenes + // concatenates material_component_ptr elements without deep-copying, + // so the element pointers themselves don't change from frame to frame + // even though the enclosing `shared_ptr>` does (the + // _contributors > 1 branch in merge_scenes allocates a new vector + // every merge). Comparing by content identity instead of the outer + // pointer keeps the texture cache warm across multi-glTF scenes — + // critical because re-decoding every JPEG and re-uploading every + // 1024² layer every frame is the ~100ms/frame penalty we're fixing. + std::vector m_cachedMaterialsFingerprint; + + // -- Granular invalidation state ------------------------------------------ + // + // We keep CPU mirrors of what's currently on the GPU for each small SSBO, + // plus a fingerprint of the concatenated mesh list. Each frame we: + // * compare the fingerprint — if meshes unchanged, skip vertex/index + // upload entirely and keep m_outputSpec.meshes as the same shared_ptr + // (so downstream sees stable geometry_spec and doesn't rebuild any + // pipeline/SRB). + // * diff the mirror arrays against the freshly packed data and only + // uploadStaticBuffer(offset, size, …) for the contiguous ranges that + // actually changed. Moving a light thus costs one 64-byte partial + // upload; moving an object costs one PerDrawGPU (144 bytes). + // + // Memory cost: ~sizeof(T) × count on CPU (tens of KB for typical scenes). + // + // `m_cachedMeshFingerprint` stores `DrawCall::stable_id` per draw — the + // address of the source mesh_primitive inside the stable mesh_component + // shared_ptr (or the legacy ossia::geometry entry inside a mesh_list). + // NOT `DrawCall::mesh`, because that points at a transient + // primitiveToGeometry() wrapper that's freshly allocated on every + // flattenScene() call and therefore changes every frame. + std::vector m_cachedMeshFingerprint; + // Fingerprint of the primitive_cloud set (threedim#2). The fast path + // (`meshesUnchanged`) skips rebuildPrimitiveClouds entirely — clouds are + // NOT covered by m_cachedMeshFingerprint — so without this a cloud added + // / removed / moved while the mesh fingerprint is unchanged would render + // nothing / leave stale geometry / keep a stale CloudMetaGPU.model. Mixing + // the cloud set into the fast-path gate forces the full rebuild branch + // (which re-runs rebuildMDI + rebuildPrimitiveClouds) on any cloud change. + // Covers the same fields rebuildPrimitiveClouds' internal per-bucket + // fingerprint depends on (raw_data identity/content version, primitive + // count, transform), plus the bucket key so add/remove is detected. + uint64_t m_cachedCloudFingerprint{}; + // m_cachedMaterials is gone — scene_materials is the registry's + // Material arena, not a preprocessor CPU mirror. Producers + the + // loader-material upload pass write directly into arena slots. + std::vector m_cachedMaterialExt; + std::vector m_cachedPerDraws; + // Mirror of the per_draw_bounds SSBO for diff-upload on the fast-path + // (transforms/materials change but topology doesn't → tiny range + // upload instead of full rewrite). Grow-only; same indexing as + // m_cachedPerDraws. + std::vector m_cachedPerDrawBounds; + + // Arena slots allocated by this preprocessor for loader materials + // (materials entering scene_state.materials with raw_slot.size == 0, + // i.e. not authored by a live producer like PBRMesh). The preprocessor + // acts as a producer-on-behalf-of-loader for these: allocates one + // Material arena slot per loader material, writes MaterialGPU bytes, + // frees at release. Producer-authored materials already have their + // own slots — those stay out of this map. + ossia::hash_map< + const ossia::material_component*, GpuResourceRegistry::Slot> + m_loaderMaterialSlots; + + // Remembered accumulator sizes from the last full rebuildMDI. Used to + // pre-reserve the temporary std::vector capacity so we don't pay for + // repeated realloc + memmove when the scene grew or stays the same + // size. Grow-only; never shrinks (negligible memory, big perf win for + // scenes with many verts). + // Plan 09 S4: vertex/index stream byte-sizes no longer tracked + // here — the arena's OffsetAllocator owns sizing. `m_lastDrawCount` + // stays, used to pre-reserve acc.perDraws / acc.indirectCmds. + std::size_t m_lastDrawCount{}; + + // Diff two CPU mirrors and partial-upload only the contiguous ranges + // where fresh != cached. Also grows / shrinks the cached mirror to match + // fresh's size. Returns true if at least one range was uploaded. + // + // When fresh.size() > cached.size() the new tail slots are appended + + // uploaded. When fresh.size() < cached.size() the tail is zero-filled on + // the GPU so stale content can't contribute (e.g. old lights with + // intensity=1 still emitting after the scene shrank). + template + static bool diffUpload( + QRhiResourceUpdateBatch& res, QRhiBuffer* buf, std::vector& cached, + const std::vector& fresh) + { + if(!buf) + return false; + bool changed = false; + + const std::size_t common = std::min(cached.size(), fresh.size()); + for(std::size_t i = 0; i < common;) + { + // Skip equal runs. + if(std::memcmp(&cached[i], &fresh[i], sizeof(T)) == 0) + { + ++i; + continue; + } + // Coalesce contiguous differing slots into one upload. + std::size_t start = i; + while(i < common + && std::memcmp(&cached[i], &fresh[i], sizeof(T)) != 0) + { + cached[i] = fresh[i]; + ++i; + } + res.uploadStaticBuffer( + buf, quint32(start * sizeof(T)), + quint32((i - start) * sizeof(T)), + reinterpret_cast(&fresh[start])); + changed = true; + } + + if(fresh.size() > cached.size()) + { + const std::size_t start = cached.size(); + cached.insert(cached.end(), fresh.begin() + start, fresh.end()); + res.uploadStaticBuffer( + buf, quint32(start * sizeof(T)), + quint32((fresh.size() - start) * sizeof(T)), + reinterpret_cast(&fresh[start])); + changed = true; + } + else if(fresh.size() < cached.size()) + { + // Zero the stale tail on GPU so shaders iterating the buffer's + // capacity don't see ghost entries. + std::vector zeros(cached.size() - fresh.size()); + res.uploadStaticBuffer( + buf, quint32(fresh.size() * sizeof(T)), + quint32(zeros.size() * sizeof(T)), + reinterpret_cast(zeros.data())); + cached.resize(fresh.size()); + changed = true; + } + return changed; + } + + // Last-published geometry_spec; kept alive so downstream shared_ptr equality + // sees stable identity across frames when the scene is unchanged. + ossia::geometry_spec m_outputSpec; + + // Cache: identity of last input scene (raw scene_state* pointer + version). + const ossia::scene_state* m_cachedSceneState{}; + int64_t m_cachedVersion{-1}; + + RenderedScenePreprocessorNode(const ScenePreprocessorNode& n) + : NodeRenderer{n} + , m_node{n} + { + } + + // The incremental-reconciliation path (Graph::incrementalEdgeUpdate) + // creates fresh renderers and calls `initState()` on them, NOT `init()`. + // Our preprocessor has no per-edge state — everything lives at the + // init() level — so both entry points run the same setup. Without + // this delegation a preprocessor created via the incremental path + // never has `m_registry` set, every `rebuildChannel` call early-outs, + // and consumer shaders see empty texture arrays (the exact + // "textures gone on second play" failure mode observed on stop/start). + void initState(RenderList& renderer, QRhiResourceUpdateBatch& res) override + { + init(renderer, res); + } + + void releaseState(RenderList& renderer) override + { + release(renderer); + } + + // Reset every per-RenderList / per-registry cache field to empty. + // Frees registry-allocated slots (loader-material, env) when + // `freeRegistryResources` is true — pass true from release() (we + // still hold a valid m_registry) and false from init() (the prior + // m_registry, if any, may already be torn down: we cannot legally + // free against it; just drop the bookkeeping so arenaSlotForMaterial + // and the env publish path don't reuse stale slot indices on the + // fresh registry). + // + // QRhiBuffer-backed fields (m_materialsExtBuffer, m_lightIndicesBuffer, + // m_camerasBuffer, m_mdi.*, m_inst*, m_skinBuffers, m_sceneDataBuffers, + // m_sceneCountsBuffer, m_shadowCascadesBuffer, m_worldTransforms*Buffer) + // and their paired *Cap counters are NOT touched here — they go + // through dropBuf / renderer.releaseBuffer in release() because they + // need the renderer's release plumbing. + void clearAllCaches(bool freeRegistryResources, uint32_t current_frame = 0u) + { + if(freeRegistryResources && m_registry) + { + for(auto& [mat, slot] : m_loaderMaterialSlots) + if(slot.valid()) + m_registry->free(slot); + if(m_envSlot.valid()) + m_registry->free(m_envSlot); + // MeshSlab leak fix: every (mc, id) pair in m_protoStableIds is a + // stable_id WE minted (resolvePrototypeStableId line 1377). The + // matching slab is in the registry's m_meshSlabs cache. Clearing + // m_protoStableIds without releasing the slabs leaves them as + // orphans: the next renderer instance mints DIFFERENT IDs (mints + // are globally unique), so its acquireMeshSlab calls miss the + // cache and allocate fresh slabs. sweepMeshSlabs ages out the + // orphans after `grace=2` frames -- but rapid drag-resize + // triggers another rebuild before grace elapses, so slabs + // accumulate (used grew 70074 → 420444 in 6 resizes for the + // user's repro). Release explicitly here so the next-frame + // sweep can immediately reclaim. Routes through grace queue so + // any in-flight CB still referencing the slab is safe. + for(auto& [mc, id] : m_protoStableIds) + if(id != 0) + m_registry->releaseMeshSlab(id, current_frame); + } + m_loaderMaterialSlots.clear(); + m_envSlot = {}; + m_envSlotSeeded = false; + m_protoStableIds.clear(); + + m_cachedSceneState = nullptr; + m_cachedVersion = -1; + m_cachedMaterialsFingerprint.clear(); + m_cachedMeshFingerprint.clear(); + m_cachedCloudFingerprint = 0; + m_cachedMaterialExt.clear(); + m_cachedPerDraws.clear(); + m_cachedPerDrawBounds.clear(); + m_cachedShadowCascades = {}; + m_shadowCascadesSeeded = false; + m_cachedSceneCounts = {~0u, ~0u, ~0u, 0u}; + m_cachedMaterialUVTransforms.clear(); + m_cachedCameras.clear(); + m_lastCameraUploadFrame = -1; + m_cachedInstDrawIds.clear(); + m_cachedLightIndices.clear(); + m_lastEnvUpload = {}; + m_outputSpec = {}; + m_lastDrawCount = 0; + } + + void init(RenderList& renderer, QRhiResourceUpdateBatch& res) override + { + m_initialized = true; + + // Persist-across-rebuild contract: if the OutputNode-owned registry + // is the SAME pointer we held in the previous init() / release() + // cycle, every slot index (m_loaderMaterialSlots, m_envSlot, ...) + // and the texture-array channels are still alive — re-allocating + // them on a viewport resize / relink would re-upload ~100 MiB of + // decoded textures and pay the 50–500 ms rebuild burst this whole + // refactor exists to avoid. + // + // Skip the cache wipe in that case. The fingerprint / per-draw / + // cascade caches will naturally match the unchanged scene state on + // the first post-rebuild frame, short-circuiting the REBUILD branch + // (see needsRebuild gate around line 4051) and rebuildChannel's + // sameMaterialsContent fast path → no texture re-upload. + // + // The pre-release pointer is stashed in m_lastRegistry; m_registry + // itself is null between release() and init() (so that any stray + // post-release rebuildChannel call hits its guarded early-out + // instead of dereferencing a stale pointer). m_lastRegistry == null + // means "first ever init on this renderer" → wipe (no-op since + // there's nothing to wipe). m_lastRegistry != new_registry means + // the OutputNode tore its registry down and built a fresh one + // (setSwapchainFormat / QRhi-replacement) → wipe (any slot indices + // we held are stale). + auto* new_registry = &renderer.registry(); + const bool registry_changed = (m_lastRegistry != new_registry); + if(registry_changed) + { + // Drop every per-registry cache before swapping m_registry. If a + // previous RenderList left state behind (incremental edge rebuild + // without an intervening release()), m_loaderMaterialSlots / + // m_envSlot / m_protoStableIds carry slot indices that the new + // registry never allocated — arenaSlotForMaterial would silently + // return them and every mesh would wear the wrong material. The + // fingerprint / per-draw / cascade caches likewise gate dirty + // detection against the prior scene state. We can't legally free + // against the old registry (it may already be torn down), so we + // pass freeRegistryResources=false: just drop the bookkeeping. + clearAllCaches(/*freeRegistryResources=*/false); + } + // else: registry survived (resize fast path / relinkGraph reuse). + // Keep m_loaderMaterialSlots / m_envSlot / fingerprints / per-draw + // caches — they all reference live state in the persistent registry. + m_registry = new_registry; + m_lastRegistry = new_registry; + + // Claim our own Env arena slot for the merged environment upload + // (task #26). Each preprocessor owns a slot — needed because two + // preprocessors can receive different filtered views of the same + // source scene and must not stomp each other's merged env. + if(!m_envSlot.valid()) + { + m_envSlot = m_registry->allocate( + GpuResourceRegistry::Arena::Env, sizeof(EnvParamsUBO)); + m_envSlotSeeded = false; + } + + // Pre-allocate a 1-layer BaseColor array with a white fallback so + // downstream consumers (classic_pbr_textured) building their samplers + // in their own init() get a real texture pointer via textureForOutput, + // not nullptr. update() will reallocate with the right layer count + // once the scene is flattened. First preprocessor to run init() does + // this; subsequent preprocessors see the array already allocated and + // skip (shared registry state). + auto& rhi = *renderer.state.rhi; + auto& bc = texChannel(ChannelBaseColor); + if(!bc.primaryArray()) + { + auto& b = bc.ensurePrimary( + QRhiTexture::RGBA8, + QSize(kChannelLayerSize, kChannelLayerSize)); + b.array = rhi.newTextureArray( + b.format, 1, b.pixelSize, 1, + GpuResourceRegistry::textureChannelFlags(toTexChannel(ChannelBaseColor))); + if(b.array) + { + b.array->setName("GpuResourceRegistry::base_color_array (init fallback)"); + if(!b.array->create()) + { + delete b.array; + b.array = nullptr; + } + } + if(b.array) + { + b.layers = 1; + QImage w(1, 1, QImage::Format_RGBA8888); + w.fill(Qt::white); + w = w.scaled( + kChannelLayerSize, kChannelLayerSize, + Qt::IgnoreAspectRatio, Qt::SmoothTransformation); + QRhiTextureSubresourceUploadDescription sub(w); + QRhiTextureUploadEntry entry(0, 0, sub); + res.uploadTexture( + b.array, QRhiTextureUploadDescription({entry})); + } + else + { + // Allocation failed — drop the empty bucket so primaryArray() + // stays null and callers hit the "no array" fallback path. + bc.buckets.clear(); + } + } + } + + void release(RenderList& renderer) override + { + // QRhiBuffer invariant: go through RenderList::releaseBuffer so any + // buffer still referenced by a downstream mesh's MeshBuffers skips + // deleteLater (the mesh iteration at RenderList::release will + // destroy it via `delete b.handle`). Bypassing releaseBuffer with + // `deleteLater` directly is what caused the "rare segfault on exit" + // — the same pointer ending up in the final `delete b.handle` pass. + auto dropBuf = [&](QRhiBuffer*& b) { + if(b) { renderer.releaseBuffer(b); b = nullptr; } + }; + dropBuf(m_lightIndicesBuffer); + // m_materialsBuffer + m_lightsBuffer removed — scene_materials and + // scene_lights bind the registry arenas directly. + dropBuf(m_materialsExtBuffer); + dropBuf(m_materialUVTransformsBuffer); + m_materialUVTransformsCap = 0; + for(auto& sd : m_sceneDataBuffers) + if(sd.owned && sd.buffer) renderer.releaseBuffer(sd.buffer); + m_sceneDataBuffers.clear(); + for(auto& sk : m_skinBuffers) + if(sk.buffer) renderer.releaseBuffer(sk.buffer); + m_skinBuffers.clear(); + // Plan 09 S4: vertex/index streams are registry-owned; only the + // preprocessor-owned per_draws + indirect_draw_cmds + per_draw_bounds + // drop here. + dropBuf(m_mdi.per_draws); + dropBuf(m_mdi.indirect_draw_cmds); + dropBuf(m_mdi.per_draw_bounds); + m_mdi = {}; + // Per-bucket primitive cloud resources. + for(auto& [k, bb] : m_primitiveCloudBuckets) + { + dropBuf(bb.raw_splats); + dropBuf(bb.cloud_meta); + dropBuf(bb.cloud_id_lookup); + dropBuf(bb.indirect); + } + m_primitiveCloudBuckets.clear(); + dropBuf(m_instTranslations); + dropBuf(m_instColors); + dropBuf(m_instDrawIds); + m_instTranslationsCap = 0; + m_instColorsCap = 0; + m_instDrawIdsCap = 0; + m_instSlotsUsed = 0; + m_lightIndicesCap = 0; + m_materialsExtCap = 0; + // Texture channel arrays are owned by GpuResourceRegistry — no + // per-preprocessor cleanup needed. They get destroyed when the + // RenderList tears down (registry.destroy()). + dropBuf(m_sceneCountsBuffer); + dropBuf(m_shadowCascadesBuffer); + dropBuf(m_camerasBuffer); + dropBuf(m_camerasPrevBuffer); + m_camerasCap = 0; + dropBuf(m_worldTransformsBuffer); + dropBuf(m_worldTransformsPrevBuffer); + m_worldTransformsCap = 0; + m_pendingWorldXformWrites.clear(); + m_pendingWorldXformWrites.shrink_to_fit(); + m_lastSnapshotFrame = -1; + // Symmetric clear for m_pendingGpuCopies: ops record raw QRhiBuffer* + // for src/dst (m_mdi.* and m_primitiveCloudBuckets buffers) which + // dropBuf above just released. Today release() is followed by either + // node teardown (no further runInitialPasses) or init() + a new + // rebuildMDI which clears the queue at its top, so the dangling + // pointers are never dereferenced — but the asymmetry is fragile + // against any future reordering. Defensive. + m_pendingGpuCopies.clear(); + m_pendingGpuCopies.shrink_to_fit(); + m_lastGpuCopiesFrame = -1; + // Env arena buffer is owned by GpuResourceRegistry — nothing to drop here. + // Plan 09 S4: stream byte-size trackers removed (see m_mdi comment). + + // Free per-registry resources on every release(), regardless of + // whether the renderer will be destroyed (recreateOutputRenderList) + // or reused (relinkGraph). The "skip wipe on registry-pointer + // match" optimization the previous version of this comment + // referenced ONLY benefits the relinkGraph path; on resize the + // renderer is freshly constructed so m_loaderMaterialSlots etc. + // are already empty. + // + // The bug it caused: m_envSlot was leaked on every release(). + // The Env arena has only 8 slots (GpuResourceRegistry.cpp:69), so + // after 8 resizes the arena exhausted, m_envSlot allocation fell + // back to slot 0 (or invalid), and the env aux binding pointed at + // slot 0's stale data — wildly wrong lighting / fog / exposure + // that drifts each resize as different stale data lands at slot 0. + // Other arenas have more headroom (Material 32K, RawTransform + // 16K) but they still leak; over many resizes the same drift + // would surface there. + // + // Trade-off: relinkGraph now pays the cost of re-allocating the + // env slot + per-loader-material slots + clearing the texture + // fingerprint (~10s of ms). Acceptable — relinkGraph is rare + // (user changes graph); resize is common (drag-resize fires + // continuously). + clearAllCaches(/*freeRegistryResources=*/true, (uint32_t)renderer.frame); + + // Clear the registry pointer so a post-release rebuildChannel call + // hits its guarded early-out rather than dereferencing the + // pre-release pointer. m_lastRegistry stays populated for any + // future re-init wanting to detect "same registry as before". + m_lastRegistry = m_registry; + m_registry = nullptr; + m_initialized = false; + } + + // Source byte size of one element of an ossia::geometry attribute format. + // Used to bound CPU attribute reads so an attribute authored in a smaller + // format than the consumer expects (threedim#10: an unorm-byte4 color, 4 B, + // read as float4, 16 B) doesn't over-read the source buffer. + static int geomAttrFormatByteSize(int format) noexcept + { + using A = ossia::geometry::attribute; + switch(format) + { + case A::float4: return 16; + case A::float3: return 12; + case A::float2: return 8; + case A::float1: return 4; + case A::unormbyte4: return 4; + case A::unormbyte2: return 2; + case A::unormbyte1: return 1; + case A::uint4: case A::sint4: return 16; + case A::uint3: case A::sint3: return 12; + case A::uint2: case A::sint2: return 8; + case A::uint1: case A::sint1: return 4; + case A::half4: return 8; + case A::half3: return 6; + case A::half2: return 4; + case A::half1: return 2; + case A::ushort4: case A::sshort4: return 8; + case A::ushort3: case A::sshort3: return 6; + case A::ushort2: case A::sshort2: return 4; + case A::ushort1: case A::sshort1: return 2; + default: return 0; // user_struct / unknown + } + } + + // Read a single vertex attribute's full range from a CPU-backed source + // geometry into a freshly-allocated contiguous byte buffer. Returns empty + // if the source uses a GPU handle, is missing, or has an unsupported + // format. `BytesPerVertex` is the consumer's expected element size. + template + static std::vector extractCpuAttribute( + const ossia::geometry& g, ossia::attribute_semantic sem) + { + const auto* a = g.find(sem); + if(!a) + return {}; + if(a->binding < 0 || a->binding >= (int)g.input.size()) + return {}; + const auto& in = g.input[a->binding]; + if(in.buffer < 0 || in.buffer >= (int)g.buffers.size()) + return {}; + const auto& b = g.buffers[in.buffer]; + const auto* cpu = ossia::get_if(&b.data); + if(!cpu || !cpu->raw_data) + return {}; + + const int stride = (a->binding < (int)g.bindings.size()) + ? (int)g.bindings[a->binding].byte_stride + : BytesPerVertex; + + // Copy at most the source element's byte size into the destination + // element (the rest stays zero-filled). An attribute whose source + // format is narrower than BytesPerVertex (e.g. unorm-byte4 color, 4 B, + // consumed as float4, 16 B) must not pull 12 stray bytes per vertex. + const int srcElem = geomAttrFormatByteSize(a->format); + const int copyPerVertex + = (srcElem > 0) ? std::min(BytesPerVertex, srcElem) : BytesPerVertex; + + // Bound every read against the source buffer's actual byte_size: + // an inconsistent producer (short buffer, wrong vertex_count) must not + // over-read off the end of the heap allocation (threedim#10). + const int64_t baseOff = (int64_t)in.byte_offset + (int64_t)a->byte_offset; + const int64_t srcBytes = cpu->byte_size; + if(baseOff < 0 || (srcBytes > 0 && baseOff >= srcBytes)) + return {}; + + std::vector out(std::size_t(g.vertices) * BytesPerVertex); + const auto* raw = reinterpret_cast(cpu->raw_data.get()); + const auto* base = raw + baseOff; + for(int i = 0; i < g.vertices; ++i) + { + const int64_t off = baseOff + (int64_t)i * stride; + // Clamp this element's copy so it never reads past byte_size. + int n = copyPerVertex; + if(srcBytes > 0) + { + const int64_t avail = srcBytes - off; + if(avail <= 0) + break; // remaining vertices stay zero-filled + if(avail < n) + n = (int)avail; + } + std::memcpy(out.data() + std::size_t(i) * BytesPerVertex, + base + (int64_t)i * stride, n); + } + return out; + } + + // GPU-backed counterpart of extractCpuAttribute. Returns the backing + // QRhiBuffer* + source byte offset + stride for the requested semantic + // when the mesh's buffer is a gpu_buffer variant (upstream compute + // shader output, etc). Empty when the attribute is missing or the + // buffer is CPU-resident. + struct GpuAttrView + { + QRhiBuffer* buf{}; + int src_offset{}; + int byte_stride{}; + }; + static GpuAttrView + extractGpuAttribute(const ossia::geometry& g, ossia::attribute_semantic sem) + { + const auto* a = g.find(sem); + if(!a) + return {}; + if(a->binding < 0 || a->binding >= (int)g.input.size()) + return {}; + const auto& in = g.input[a->binding]; + if(in.buffer < 0 || in.buffer >= (int)g.buffers.size()) + return {}; + const auto& b = g.buffers[in.buffer]; + const auto* gpu = ossia::get_if(&b.data); + if(!gpu || !gpu->handle) + return {}; + GpuAttrView v; + v.buf = static_cast(gpu->handle); + v.src_offset = int(in.byte_offset + a->byte_offset); + v.byte_stride = (a->binding < (int)g.bindings.size()) + ? (int)g.bindings[a->binding].byte_stride + : 0; + return v; + } + + static std::vector extractCpuIndices(const ossia::geometry& g) + { + if(g.index.buffer < 0 || g.index.buffer >= (int)g.buffers.size()) + return {}; + const auto& b = g.buffers[g.index.buffer]; + const auto* cpu = ossia::get_if(&b.data); + if(!cpu || !cpu->raw_data) + return {}; + + // Bound the index read against the source byte_size (threedim#10): a + // short / inconsistent index buffer must not over-read the heap. Clamp + // the readable index count to what fits past byte_offset. + const int idxBytes + = (g.index.format == decltype(g.index)::uint16) ? 2 : 4; + const int64_t baseOff = (int64_t)g.index.byte_offset; + const int64_t srcBytes = cpu->byte_size; + if(baseOff < 0 || (srcBytes > 0 && baseOff >= srcBytes)) + return {}; + int readable = g.indices; + if(srcBytes > 0) + { + const int64_t avail = (srcBytes - baseOff) / idxBytes; + if(avail < readable) + readable = (int)std::max(avail, 0); + } + + std::vector out(g.indices); // tail (if clamped) stays 0 + const auto* base = reinterpret_cast(cpu->raw_data.get()) + + baseOff; + if(g.index.format == decltype(g.index)::uint16) + { + const auto* src = reinterpret_cast(base); + for(int i = 0; i < readable; ++i) + out[i] = src[i]; + } + else + { + std::memcpy(out.data(), base, std::size_t(readable) * 4); + } + return out; + } + + // Mesh-deterministic subset of emitDraw's skip predicate (threedim#3). + // emitDraw drops a draw when: + // (a) the mesh has no usable positions (neither CPU nor GPU sourced), or + // (b) it has indices but they're GPU-backed (extractCpuIndices empty). + // Both depend only on the mesh's buffers, which are invariant while the + // mesh fingerprint matches — so the fast path can replicate them here to + // keep its freshPerDraws mirror in lock-step with what emitDraw packed. + // The remaining emitDraw skips (null mesh / vertices<=0 / null registry / + // slab exhaustion) are handled at the fast-path call site or cannot occur + // once a slab is already resident. + static bool meshEmitsDraw(const ossia::geometry& mesh) + { + const bool hasCpuPos + = !extractCpuAttribute<12>(mesh, ossia::attribute_semantic::position) + .empty(); + if(!hasCpuPos) + { + const auto gpu_pos + = extractGpuAttribute(mesh, ossia::attribute_semantic::position); + if(!gpu_pos.buf) + return false; // no positions → emitDraw skips + } + if(mesh.indices > 0 && extractCpuIndices(mesh).empty()) + return false; // GPU-backed indices unsupported → emitDraw skips + return true; + } + + // Grow-only allocate / reuse a single QRhiBuffer. + // + // Releases the old handle via RenderList::releaseBuffer — which is the + // project-wide invariant for QRhiBuffer lifetime: releaseBuffer scans + // the RenderList's m_vertexBuffers for the pointer and either skips + // (when the buffer is still referenced by a mesh, so the mesh iteration + // at RenderList::release will clean it up) or deleteLater's (when it + // isn't referenced). Calling QRhiBuffer::deleteLater directly bypasses + // that check and causes a double-free on RenderList::release for any + // buffer that was also stored in a MeshBuffers entry — the "sometimes + // segfault on exit" crash pattern. + // Returns true when the buffer was (re)allocated this call. Callers + // pairing the buffer with a diffUpload-managed CPU mirror MUST clear + // that mirror on `true` so diffUpload re-emits the full fresh + // contents into the new (uninitialised) allocation. Without this, + // diffUpload's equal-prefix short-circuit (lines 779-801) leaves the + // freshly-allocated GPU buffer's prefix bytes uninitialised whenever + // the new fresh values match the previous frame's cached values + // (e.g. an Instancer with one prototype emits draw_id=0 for every + // slot — every cross of the power-of-two capacity boundary leaks the + // first cached.size() entries as driver-uninit memory). Manifests as + // "instances disappear at counts 4→5 / 8→9 / 16→17 / …" because the + // prototype's vertex shader reads garbage draw_id and OOBs on + // per_draws[draw_id]. + static bool growBuf( + score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res, + QRhiBuffer*& buf, int64_t& cap, + int64_t need, QRhiBuffer::UsageFlags flags, const char* name) + { + if(buf && cap >= need) + return false; + // Capacity policy: pure power-of-two doubling overshoots badly for + // large buffers (a 1.08 GB request landed on a 2 GB allocation, which + // QRhi/Vulkan/D3D commonly reject around the 2³¹ byte boundary — + // many driver paths cap maxStorageBufferRange at 2GB-4 or use a + // signed-int32 size internally). Switch policy at a 256 MB knee: + // small buffers double (so frequent grows don't thrash); huge + // buffers grow by 25 % over need (still amortised, but never + // doubles past a 2 GB cliff for a sub-2 GB need). Aligned to 16 B + // so std430 structures land on natural strides. + constexpr int64_t kKnee = 256ll * 1024 * 1024; // 256 MB + int64_t newCap = cap > 0 ? cap : 16; + while(newCap < need) + { + if(newCap < kKnee) + newCap *= 2; + else + newCap = (need * 5 / 4 + 15) & ~int64_t{15}; + } + auto* old = buf; + if(buf) + renderer.releaseBuffer(buf); + buf = renderer.state.rhi->newBuffer(QRhiBuffer::Static, flags, newCap); + buf->setName(name); + // QRhi::create() returns false on driver-level allocation failure + // (out of VRAM, exceeds maxBufferSize, signed-32-bit overflow in + // the backend). Without this check we'd publish a zombie wrapper + // whose underlying VkBuffer/D3D buffer is null; uploadStaticBuffer + // becomes a silent no-op and the GPU sees zero-filled memory at + // every read. That's exactly the "all splats collapse to origin" + // signature in the 3DGS pipeline. Surface the failure loudly. + const bool ok = buf->create(); + BUFTRACE() << "ScenePreprocessor::growBuf name=" << name + << " old=" << (void*)old + << " new=" << (void*)buf + << " cap=" << (qint64)cap << "->" << (qint64)newCap + << " need=" << (qint64)need + << " ok=" << ok; + if(!ok) + { + qWarning() << "ScenePreprocessor::growBuf:" << name + << "create() FAILED at cap=" << (qint64)newCap + << "(need=" << (qint64)need + << "). Driver likely refused the allocation —" + " too large, OOM, or hit a backend size limit." + " Downstream reads will return zeros."; + } + else + { + // Zero-fill the freshly allocated buffer. Vulkan does NOT + // zero-initialise new VkBuffers — the underlying device-memory + // page contains whatever was there before. For sparse-uploaded + // SSBOs (per_draws padding past drawCount, world_transforms + // unused arena slots, etc.) the un-touched bytes would otherwise + // be read by shaders (especially when an indexer like + // PerDraw.transform_slot points at a slot the producer hasn't + // populated this frame) and feed garbage into the pipeline. + // After resize, each fresh VkBuffer gets a different page → + // wildly different visual results per resize. RhiClearBuffer + // pulls the zero source bytes from a thread-local pool — no + // per-call std::vector(newCap, 0) allocation. + RhiClearBuffer::clearBuffer( + *renderer.state.rhi, res, buf, 0, (quint32)newCap); + } + cap = newCap; + return true; + } + + // Resolve a material_component pointer to its Material-arena slot + // index. Producer-authored materials carry a live raw_slot; loader + // materials get one allocated in m_loaderMaterialSlots. Returns 0 + // when no slot is found — matches an unused arena entry, so shaders + // fall back to a default-initialised MaterialGPU rather than reading + // undefined bytes. + // + // Task 28a arena-direct path: this is the value stamped into + // `PerDrawGPU.material_index`, NOT the scene.state->materials index. + // Both the fast-path per_draws pack (update()) and the full-rebuild + // pack (rebuildMDI) must use this helper so the arena slot index is + // consistent across meshes-changed and meshes-unchanged paths. + uint32_t arenaSlotForMaterial(const ossia::material_component* mat) const noexcept + { + if(!mat || !m_registry) + return 0u; + if(m_registry->isLive(mat->raw_slot)) + return mat->raw_slot.internal_index; + auto it = m_loaderMaterialSlots.find(mat); + if(it != m_loaderMaterialSlots.end() && it->second.valid()) + return it->second.slot_index; + return 0u; + } + + // Resolve a stable id for an instance prototype. Producers SHOULD stamp + // mesh_primitive::stable_id at construction (loaders do, PBRMesh does); + // when they don't (notably Threedim::Primitive routed through + // halp::geometry → mesh_component::legacy_geometry, which carries no + // primitive list at all and is bridged into a synthesized primitive + // upstream), we mint our own id keyed on the mesh_component pointer + // — stable across frames as long as the producer re-emits the same + // shared_ptr, which the Phase-1 identity-caching pattern enforces. + uint64_t resolvePrototypeStableId( + const ossia::mesh_component* mc, + const ossia::mesh_primitive& prim) noexcept + { + if(prim.stable_id != 0) + return prim.stable_id; + if(!mc) + return reinterpret_cast(&prim); + auto [it, inserted] = m_protoStableIds.emplace(mc, 0u); + if(inserted) + it->second = ossia::mint_stable_id(); + return it->second; + } + + // MDI rebuild: concatenate CPU-backed legacy_geometry meshes into shared + // vertex / index buffers + emit one output geometry with indirect draw + // metadata. Draws whose source is GPU-backed or uses non-standard formats + // are skipped with a warning (they can be rendered through per-mesh mode). + // + // Plan 09 S4 integration (Wave 1): the MeshArenaManager's slab lifecycle + // is exercised here — `acquireMeshSlab` + `markMeshSlabSeen` per-draw, + // `sweepMeshSlabs` at the end. Slabs are allocated, their offsets are + // available, but the concat-and-bulk-upload path below still runs + // unchanged: byte-identical rendering is the Wave 1 acceptance criterion. + // + // TODO (S4 full migration, follow-up): replace `uploadStaticBuffer` at + // offset 0 over concatenated ACC vectors with per-slab + // `registry.uploadMeshStream(slab, Stream, bytes, size)` calls, gated + // by `slab->freshly_allocated`. Output geometry's vertex/index buffer + // bindings switch from `m_mdi.positions` to + // `registry.meshStreamBuffer(MeshStream::Positions)`. indirect_draw_cmds + // entries take their `baseVertex` / `firstIndex` from the slab's + // stream offsets. GPU-to-GPU copies (m_pendingGpuCopies) point at + // slab offsets too. Net effect: adding one mesh uploads only that + // mesh's bytes; no scene-wide reconcat. + // Primitive-cloud branch — buckets fs.primitive_clouds by format_id + // and emits one indirect-draw geometry per bucket. Each bucket + // geometry is appended to m_outputSpec.meshes after the mesh MDI + // entry (if any). Per bucket emits: + // - one auxiliary SSBO `raw_splats` (concatenation of cloud + // raw_data buffers; same row stride across the bucket's clouds) + // - one auxiliary SSBO `cloud_meta` (CloudMetaGPU[] mirroring + // PerDrawGPU's model[16] + transform_slot pattern) + // - one auxiliary SSBO `cloud_id_lookup` (uint per primitive -> + // index into cloud_meta) + // - one indirect cmd buffer {vertex_count=6, instance_count=Σ + // primitive_counts, ...} so RawRaster's existing m_mesh->draw() + // path picks up the draw via cb.drawIndirect or the cpu_draw + // fallback. + // + // The format's first CSF stage reads `raw_splats` via AUXILIARY + // LAYOUT (no per-column SSBO bindings, so descriptor budget stays + // tight on integrated Metal — see .claude/PRIMITIVE-CLOUD-PLAN.md). + void rebuildPrimitiveClouds( + RenderList& renderer, QRhiResourceUpdateBatch& res, + const FlatScene& fs) + { + ++m_primitiveCloudFrame; + if(fs.primitive_clouds.empty()) + { + // No clouds this frame — keep buckets around for one frame in + // case the scene briefly goes empty during a graph rebuild, but + // the persistent buffers are released by releaseBuffer() when + // the renderer torn down. Stale eviction only fires when the + // primitive_clouds list is non-empty (below). + return; + } + + // Bucket the entries. flat_map>. + // bucket_key was already chosen by the visitor: hash(format_id) or + // stable_id when format_id is empty (each unformatted cloud + // becomes its own bucket). + struct Bucket + { + uint32_t bucket_key; + ossia::small_vector draws; + uint64_t total_primitives{}; + uint32_t row_stride{}; + int64_t raw_splats_bytes{}; + }; + ossia::flat_map buckets; + + for(const auto& d : fs.primitive_clouds) + { + if(!d.cloud || d.cloud->primitive_count == 0) + continue; + // Bucket by format_id when set, else by cloud's address (stable + // pointer keyed bucket). Mirrors the visitor's intent. Hash matches + // the canonical filter_tag stamp (ossia::hash_string truncated to + // 32 bits) so a downstream FlattenedSceneFilterNode "format_id == + // match_str" route lines up byte-for-byte with this bucket key. + uint32_t key = 0; + if(!d.cloud->format_id.empty()) + { + key = (uint32_t)ossia::hash_string(d.cloud->format_id); + } + else + { + key = (uint32_t)((uintptr_t)d.cloud.get() & 0xffffffffu); + } + + auto& b = buckets[key]; + if(b.draws.empty()) + { + b.bucket_key = key; + b.row_stride = d.cloud->row_stride; + } + else if(b.row_stride != d.cloud->row_stride) + { + // Row-stride mismatch in a same-key bucket: skip the + // mismatched cloud rather than corrupt the concat. Indicates + // a tagging error in the producer. + qWarning() << "ScenePreprocessor::rebuildPrimitiveClouds: " + "row_stride mismatch within bucket" + << QString::fromStdString(d.cloud->format_id) + << " expected" << b.row_stride + << "got" << d.cloud->row_stride; + continue; + } + b.draws.push_back(&d); + b.total_primitives += d.cloud->primitive_count; + } + + // Drop buckets whose key did not appear this frame. + for(auto it = m_primitiveCloudBuckets.begin(); + it != m_primitiveCloudBuckets.end();) + { + if(buckets.find(it->first) == buckets.end()) + { + auto& bb = it->second; + if(bb.raw_splats) renderer.releaseBuffer(bb.raw_splats); + if(bb.cloud_meta) renderer.releaseBuffer(bb.cloud_meta); + if(bb.cloud_id_lookup) renderer.releaseBuffer(bb.cloud_id_lookup); + if(bb.indirect) renderer.releaseBuffer(bb.indirect); + it = m_primitiveCloudBuckets.erase(it); + } + else + { + ++it; + } + } + + using UF = QRhiBuffer::UsageFlags; + + // Lazily ensure m_outputSpec.meshes exists so we can append. + if(!m_outputSpec.meshes) + m_outputSpec.meshes = std::make_shared(); + if(!m_outputSpec.filters) + m_outputSpec.filters = std::make_shared(); + + // Cow if shared with downstream — the mesh MDI rebuilds via + // make_shared() so the typical state is non-shared + // here. If a downstream reader is holding the previous list, we + // need a fresh one to avoid mutating it. + if(m_outputSpec.meshes.use_count() > 1) + { + auto fresh = std::make_shared(); + fresh->meshes = m_outputSpec.meshes->meshes; + fresh->dirty_index = m_outputSpec.meshes->dirty_index; + m_outputSpec.meshes = std::move(fresh); + } + + auto wrapGpu = [](QRhiBuffer* b, int64_t size) { + ossia::geometry::gpu_buffer gb; + gb.handle = b; + gb.byte_size = size; + return ossia::geometry::buffer{.data = gb, .dirty = true}; + }; + + bool any_emitted = false; + for(auto& [key, b] : buckets) + { + if(b.draws.empty() || b.total_primitives == 0 || b.row_stride == 0) + continue; + + auto& bb = m_primitiveCloudBuckets[key]; + bb.row_stride = b.row_stride; + bb.last_seen_frame = m_primitiveCloudFrame; + + // ── Indirect-draw command shape (used both for size accounting + // upfront and for the CPU build inside the upload guard). + struct IndirectCmd + { + uint32_t indexOrVertexCount; + uint32_t instanceCount; + uint32_t firstIndexOrVertex; + int32_t baseVertex; // for indexed draws — unused (vertex_count path) + uint32_t baseInstance; + }; + + // ── Upfront sizing (needed by growBuf AND by the per-bucket + // geometry construction further down, which references the + // owned buffer pointers regardless of upload/skip). raw_splats + // needs VertexBuffer alongside StorageBuffer because the bucket + // exposes the buffer through both paths: as an AUXILIARY SSBO + // (CSF reads the row layout via std430) AND as a per-vertex + // ATTRIBUTE buffer (Raw Raster's setVertexInput pulls every + // g.input entry — even on procedural draws — and Vulkan + // requires VK_BUFFER_USAGE_VERTEX_BUFFER_BIT for vertex + // bindings). + const int64_t rawBytes + = (int64_t)b.total_primitives * (int64_t)b.row_stride; + const uint32_t bucketCloudCount = (uint32_t)b.draws.size(); + const int64_t cmBytes + = (int64_t)bucketCloudCount * (int64_t)sizeof(CloudMetaGPU); + const int64_t lookupBytes + = (int64_t)b.total_primitives * (int64_t)sizeof(uint32_t); + const int64_t icBytes = (int64_t)sizeof(IndirectCmd); + + growBuf(renderer, res,bb.raw_splats, bb.rawSplatsCap, rawBytes, + UF(QRhiBuffer::StorageBuffer | QRhiBuffer::VertexBuffer), + "ScenePreprocessor::cloud.raw_splats"); + growBuf(renderer, res,bb.cloud_meta, bb.cloudMetaCap, cmBytes, + UF(QRhiBuffer::StorageBuffer), + "ScenePreprocessor::cloud.cloud_meta"); + growBuf(renderer, res,bb.cloud_id_lookup, bb.cloudIdLookupCap, lookupBytes, + UF(QRhiBuffer::StorageBuffer | QRhiBuffer::VertexBuffer), + "ScenePreprocessor::cloud.cloud_id_lookup"); +#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0) + growBuf(renderer, res,bb.indirect, bb.indirectCap, icBytes, + UF(QRhiBuffer::StorageBuffer | QRhiBuffer::IndirectBuffer), + "ScenePreprocessor::cloud.indirect"); +#else + growBuf(renderer, res,bb.indirect, bb.indirectCap, icBytes, + UF(QRhiBuffer::StorageBuffer), + "ScenePreprocessor::cloud.indirect"); +#endif + + // ── Phase-1 delta-update fingerprint ───────────────────────── + // Hash everything the four GPU buffers depend on. When this + // matches the last frame's value, the buckets are byte-equal + // to what the previous frame uploaded — the per-frame CPU + // concat + uploadStaticBuffer ×4 is pure waste, skip it. + // For the user's "drop a 1 GB PLY into a static scene" case + // this brings raw_splats per-frame work from ~720 MB/s of GPU + // memcpy down to zero. The growBuf calls above already + // short-circuited (cap >= need), so on the steady state the + // entire bucket loop becomes O(draws.size()) hashing. + uint64_t fp = 0; + ossia::hash_combine(fp, (uint64_t)bucketCloudCount); + ossia::hash_combine(fp, (uint64_t)b.row_stride); + ossia::hash_combine(fp, (uint64_t)b.total_primitives); + for(const auto* d : b.draws) + { + const auto* raw = d->cloud->raw_data.get(); + ossia::hash_combine(fp, (uint64_t)(uintptr_t)raw); + // raw_data carries an explicit content_hash for fast + // diff-skip when the producer can stamp one (PlyParser + // sets it from the storage pointer); fall back to + // dirty_index for producers that don't. + const uint64_t content_id + = raw ? (raw->content_hash != 0 + ? raw->content_hash + : (uint64_t)raw->dirty_index) + : 0u; + ossia::hash_combine(fp, content_id); + ossia::hash_combine(fp, (uint64_t)d->cloud->primitive_count); + ossia::hash_combine(fp, (uint64_t)d->transform_slot); + // worldTransform: 16 floats × 4 = 64 bytes column-major. + ossia::hash_combine( + fp, + ossia::hash_bytes(d->worldTransform.constData(), 64)); + } + + // 0 = "never uploaded yet, force the first frame's upload + // regardless of fingerprint matching". growBuf may also have + // just allocated a fresh VkBuffer (cap < need), in which case + // the old data is gone; the fingerprint differs from frame N-1 + // because the size constraint changed (total_primitives or + // row_stride is part of fp). Either way the !unchanged branch + // runs and we re-upload. + const bool unchanged = (bb.content_fingerprint != 0) + && (bb.content_fingerprint == fp) + && (bb.raw_splats != nullptr); + + if(!unchanged) + { + // ── raw_splats: concatenation of all clouds' raw bytes ──────── + // Bucket-internal format_id mismatch was rejected above so all + // clouds in this bucket share row_stride. + std::vector concat; + concat.resize((std::size_t)rawBytes); + uint8_t* dst = concat.data(); + for(const auto* d : b.draws) + { + const auto& br = d->cloud->raw_data; + if(!br) continue; + const int64_t bytes + = (int64_t)d->cloud->primitive_count * (int64_t)b.row_stride; + if(auto* cpu = ossia::get_if(&br->resource)) + { + if(cpu->data && cpu->byte_size >= bytes) + { + std::memcpy(dst, cpu->data.get(), (std::size_t)bytes); + } + else + { + std::memset(dst, 0, (std::size_t)bytes); + } + } + else + { + // GPU-resident raw_data: out of scope for v1 (would need a + // GPU-to-GPU copy via copyBuffer). Zero-fill so the bucket + // is at least well-defined. See PRIMITIVE-CLOUD-ARENA-DESIGN.md + // for the planned Phase-2 path (slot-based GPU-resident + // producers writing into the per-format arena directly). + std::memset(dst, 0, (std::size_t)bytes); + } + dst += bytes; + } + res.uploadStaticBuffer(bb.raw_splats, 0, rawBytes, concat.data()); + + // ── cloud_meta + cloud_id_lookup ───────────────────────────── + std::vector cmData; + cmData.resize(bucketCloudCount); + + std::vector lookup; + lookup.resize((std::size_t)b.total_primitives); + + uint32_t prim_offset = 0; + uint32_t prim_lookup_pos = 0; + for(uint32_t ci = 0; ci < bucketCloudCount; ++ci) + { + const auto* d = b.draws[ci]; + CloudMetaGPU& gm = cmData[ci]; + + // Composed world matrix from the FlattenVisitor walk + // (parentWorld). QMatrix4x4 is column-major and we want a + // column-major float[16] — its constData() returns column- + // major memory directly. + const float* m = d->worldTransform.constData(); + for(int k = 0; k < 16; ++k) gm.model[k] = m[k]; + + // Per-cloud world-space AABB: 8-corner walk of the local + // bounds through worldTransform. Mirrors the bucket-bounds + // loop below at :~1776, but kept per-cloud so format CSFs + // can frustum-cull individual clouds inside a bucket. + const auto& lb = d->cloud->bounds; + if(lb.empty()) + { + // Sentinel: empty bounds -> produce an inverted AABB so + // any frustum test in the shader trivially marks it + // visible (consumers can also check for the inversion). + gm.bounds_min[0] = gm.bounds_min[1] = gm.bounds_min[2] = 1.f; + gm.bounds_max[0] = gm.bounds_max[1] = gm.bounds_max[2] = -1.f; + } + else + { + const QMatrix4x4& W = d->worldTransform; + float minx = std::numeric_limits::infinity(); + float miny = minx, minz = minx; + float maxx = -minx, maxy = -minx, maxz = -minx; + for(int corner = 0; corner < 8; ++corner) + { + const float x = (corner & 1) ? lb.max[0] : lb.min[0]; + const float y = (corner & 2) ? lb.max[1] : lb.min[1]; + const float z = (corner & 4) ? lb.max[2] : lb.min[2]; + const QVector3D p = W.map(QVector3D(x, y, z)); + minx = std::min(minx, p.x()); maxx = std::max(maxx, p.x()); + miny = std::min(miny, p.y()); maxy = std::max(maxy, p.y()); + minz = std::min(minz, p.z()); maxz = std::max(maxz, p.z()); + } + gm.bounds_min[0] = minx; gm.bounds_min[1] = miny; gm.bounds_min[2] = minz; + gm.bounds_max[0] = maxx; gm.bounds_max[1] = maxy; gm.bounds_max[2] = maxz; + } + gm.bounds_min[3] = 0.f; + gm.bounds_max[3] = 0.f; + + gm.primitive_offset = prim_offset; + gm.primitive_count = (uint32_t)d->cloud->primitive_count; + gm.transform_slot = d->transform_slot; // 0xFFFFFFFFu = none + gm.format_param_index = 0; // unused for v1 + gm._pad[0] = gm._pad[1] = gm._pad[2] = gm._pad[3] = 0; + + // Fill lookup[prim_offset..prim_offset+count] = ci + for(uint32_t p = 0; p < gm.primitive_count; ++p) + lookup[prim_lookup_pos + p] = ci; + prim_lookup_pos += gm.primitive_count; + prim_offset += gm.primitive_count; + } + + res.uploadStaticBuffer( + bb.cloud_meta, 0, cmBytes, cmData.data()); + res.uploadStaticBuffer( + bb.cloud_id_lookup, 0, lookupBytes, lookup.data()); + + // ── indirect_draw_cmds: one cmd, vertex_count=N (one slot per + // primitive). The bucket geometry is a flat point cloud — the + // CSF stage downstream (e.g. 01_Decode for 3dgs.classic) reads + // `$VERTEX_COUNT_geoIn = N` and emits the instanced 6×N quad + // topology its draw stage expects. Format CSF chains may rewrite + // this cmd post-cull to shrink the active set; the unculled + // total is the safe default. + const IndirectCmd cmd{ + /*indexOrVertexCount*/ (uint32_t)b.total_primitives, + /*instanceCount*/ 1u, + /*firstIndexOrVertex*/ 0u, + /*baseVertex*/ 0, + /*baseInstance*/ 0u}; + res.uploadStaticBuffer(bb.indirect, 0, icBytes, &cmd); + + bb.content_fingerprint = fp; + } + + // ── Build the bucket geometry ───────────────────────────────── + ossia::geometry g; + const int rawSplatsBufIdx = (int)g.buffers.size(); + g.buffers.push_back(wrapGpu(bb.raw_splats, rawBytes)); + const int cloudMetaBufIdx = (int)g.buffers.size(); + g.buffers.push_back(wrapGpu(bb.cloud_meta, cmBytes)); + const int cloudLookupBufIdx = (int)g.buffers.size(); + g.buffers.push_back(wrapGpu(bb.cloud_id_lookup, lookupBytes)); + const int indirectBufIdx = (int)g.buffers.size(); + g.buffers.push_back(wrapGpu(bb.indirect, icBytes)); + + g.auxiliary.push_back({ + .name = "raw_splats", + .buffer = rawSplatsBufIdx, + .byte_offset = 0, .byte_size = rawBytes}); + g.auxiliary.push_back({ + .name = "cloud_meta", + .buffer = cloudMetaBufIdx, + .byte_offset = 0, .byte_size = cmBytes}); + + // Expose the cloud→primitive mapping as a per-vertex ATTRIBUTE + // (one uint per primitive), not as AUXILIARY. The CSF binder + // converts ATTRIBUTES into named SSBOs accessible as + // `geo_cloud_id_in[idx]`, and — crucially — the presence of a + // read_only ATTRIBUTE on the input geometry resource is what + // makes the CSF node *create an input port*. Without at least + // one such attribute the node has no way to be wired up. + ossia::geometry::binding cidBinding{}; + cidBinding.byte_stride = 4; + cidBinding.classification = ossia::geometry::binding::per_vertex; + const int cidBindingIdx = (int)g.bindings.size(); + g.bindings.push_back(cidBinding); + + struct ossia::geometry::input cidInput{}; + cidInput.buffer = cloudLookupBufIdx; + cidInput.byte_offset = 0; + g.input.push_back(cidInput); + + ossia::geometry::attribute cidAttr{}; + cidAttr.binding = cidBindingIdx; + cidAttr.location = 0; + cidAttr.format = ossia::geometry::attribute::uint1; + cidAttr.byte_offset = 0; + cidAttr.semantic = ossia::attribute_semantic::custom; + cidAttr.name = "cloud_id"; + g.attributes.push_back(cidAttr); + + // When the producer named a struct type for the per-row payload + // (e.g. PlyParser sets "Splat3DGS" for 3dgs.classic), expose + // raw_splats *also* as a per-vertex ATTRIBUTE of format + // user_struct. The CSF binder generates a `Splat3DGS + // geo_splat_in[]` SSBO declaration matching the consumer's + // `TYPES.Splat3DGS` block, so shaders read rows as + // `ISF_READ(geoIn, splat)[idx].field` directly. The legacy + // raw_splats AUXILIARY entry above stays so older presets keep + // working through the migration; once all bundled presets move + // to TYPES the AUXILIARY emit can drop. + const auto* rep = b.draws[0]->cloud.get(); + if(rep && !rep->struct_type_name.empty()) + { + ossia::geometry::binding splatBinding{}; + splatBinding.byte_stride = (uint32_t)b.row_stride; + splatBinding.classification = ossia::geometry::binding::per_vertex; + const int splatBindingIdx = (int)g.bindings.size(); + g.bindings.push_back(splatBinding); + + struct ossia::geometry::input splatInput{}; + splatInput.buffer = rawSplatsBufIdx; + splatInput.byte_offset = 0; + g.input.push_back(splatInput); + + ossia::geometry::attribute splatAttr{}; + splatAttr.binding = splatBindingIdx; + splatAttr.location = 1; + splatAttr.format = ossia::geometry::attribute::user_struct; + splatAttr.element_byte_size = (uint32_t)b.row_stride; + splatAttr.user_type_name = rep->struct_type_name; + splatAttr.byte_offset = 0; + splatAttr.semantic = ossia::attribute_semantic::custom; + splatAttr.name = "splat"; + g.attributes.push_back(splatAttr); + } + + // Forward the camera UBO (uploaded earlier in update() before + // rebuildMDI) so cloud-format CSF stages can read view / + // projection / cameraPosition / renderSize without manual + // wiring. Same name ("camera") that mesh shaders use, so a + // single GLSL UBO declaration works for both paths. + if(m_camerasBuffer) + { + const int camBufIdx = (int)g.buffers.size(); + g.buffers.push_back( + wrapGpu(m_camerasBuffer, (int64_t)sizeof(CameraUBOData))); + g.auxiliary.push_back({ + .name = "camera", + .buffer = camBufIdx, + .byte_offset = 0, + .byte_size = (int64_t)sizeof(CameraUBOData)}); + } + if(m_sceneCountsBuffer) + { + const int countsBufIdx = (int)g.buffers.size(); + g.buffers.push_back( + wrapGpu(m_sceneCountsBuffer, (int64_t)sizeof(SceneCountsUBO))); + g.auxiliary.push_back({ + .name = "scene_counts", + .buffer = countsBufIdx, + .byte_offset = 0, + .byte_size = (int64_t)sizeof(SceneCountsUBO)}); + } + + // Indirect draw shape: vertex_count=N points, instance_count=1. + // The bucket is a flat point cloud — instancing is introduced by + // the format's CSF preprocessor (which converts each input + // "vertex" into a 6-vertex×N-instance quad topology its raster + // stage consumes). + ossia::geometry::gpu_buffer ic_gpu; + ic_gpu.handle = bb.indirect; + ic_gpu.byte_size = icBytes; + g.indirect_count = ic_gpu; + + // Mirror the IndirectCmd shape uploaded inside the !unchanged guard + // (or kept stable from a previous frame). Values are derived directly + // from b.total_primitives + the bucket's "one cmd, instance=1" shape; + // re-deriving here avoids hoisting `cmd` itself out of the upload + // guard just to read its fields. + g.cpu_draw_commands.push_back({ + .index_or_vertex_count = (uint32_t)b.total_primitives, + .instance_count = 1u, + .first_index_or_vertex = 0u, + .base_vertex = 0, + .first_instance = 0u}); + + g.vertices = (int)b.total_primitives; + g.instances = 1; + g.topology = ossia::geometry::points; + g.cull_mode = ossia::geometry::none; + g.front_face = ossia::geometry::counter_clockwise; + // Splats need alpha-blend; tag the geometry so a downstream + // RawRaster picks the right pipeline state. The format's actual + // PIPELINE_STATE in its .frag overrides this if more specific. + g.blend = ossia::geometry::blend_premultiplied_alpha; + g.depth_write = false; + + // Surface format_id as filter_tag (rapidhash truncated to 32 bits) + // so a downstream FlattenedSceneFilterNode in "format_id == + // match_str" mode can route this bucket to its format-specific + // shader chain. Same hash that the bucket key above uses, so the + // producer-side bucketing and the consumer-side filter agree + // byte-for-byte. Empty format_id leaves filter_tag at 0 (the + // "untagged" sentinel — string-match mode treats both as "no + // tag" and matches when match_str is also empty). + if(rep && !rep->format_id.empty()) + g.filter_tag = (uint32_t)ossia::hash_string(rep->format_id); + + // Bounds: union of cloud world-space AABBs. + ossia::aabb worldBounds{}; + worldBounds.min[0] = worldBounds.min[1] = worldBounds.min[2] = 1.f; + worldBounds.max[0] = worldBounds.max[1] = worldBounds.max[2] = -1.f; + for(const auto* d : b.draws) + { + const auto& lb = d->cloud->bounds; + if(lb.empty()) + continue; + // 8 corners of the local AABB transformed to world space. + const QMatrix4x4& W = d->worldTransform; + for(int corner = 0; corner < 8; ++corner) + { + const float x = (corner & 1) ? lb.max[0] : lb.min[0]; + const float y = (corner & 2) ? lb.max[1] : lb.min[1]; + const float z = (corner & 4) ? lb.max[2] : lb.min[2]; + // Use QMatrix4x4::map() (inline member, no QtGui operator + // export needed). Equivalent to (W * vec4(x,y,z,1)).xyz. + const QVector3D p = W.map(QVector3D(x, y, z)); + worldBounds.expand(p.x(), p.y(), p.z()); + } + } + if(!worldBounds.empty()) + { + g.bounds.min[0] = worldBounds.min[0]; + g.bounds.min[1] = worldBounds.min[1]; + g.bounds.min[2] = worldBounds.min[2]; + g.bounds.max[0] = worldBounds.max[0]; + g.bounds.max[1] = worldBounds.max[1]; + g.bounds.max[2] = worldBounds.max[2]; + } + + m_outputSpec.meshes->meshes.push_back(std::move(g)); + any_emitted = true; + } + + if(any_emitted) + { + m_outputSpec.meshes->dirty_index += 1; + } + } + + void rebuildMDI( + RenderList& renderer, QRhiResourceUpdateBatch& res, const FlatScene& fs, + const std::vector& materialTagHashes) + { + // Plan 09 S4 (full migration). Per-mesh slab allocation replaces + // the old concat-and-bulk-upload path. Flow per draw: + // 1. acquireMeshSlab(stable_id, vc, ic) — hit OR fresh allocation + // into the 5 per-stream OffsetAllocators in GpuResourceRegistry. + // 2. If slab.freshly_allocated: extract CPU bytes (or queue a GPU + // copy for GPU-backed sources) and uploadMeshStream into the + // slab's byte offset on each stream. Existing slabs: zero upload. + // 3. indirect_draw_cmds baseVertex / firstIndex come from the slab's + // byte offsets divided by stream stride. + // 4. markMeshSlabSeen so the per-frame sweep doesn't reclaim it. + // The grace queue (2 frames by default) prevents the arena from + // returning a live slab's offset to another allocation while an + // in-flight draw still references it. + // + // Output layout unchanged from Wave 1's byte-identical state: four + // vertex bindings (pos/nrm/uv/tan) + one index buffer + all the + // scene auxiliaries. Consumer shaders see identical output shape. + // + // What's NOT in this function anymore: + // - Concatenated CPU byte vectors (acc.positions / .normals / …). + // - Running baseVertex / firstIndex counters. + // - uploadStaticBuffer(offset=0, totalBytes) for vertex/index streams + // — those buffers are registry-owned; we write per-slab only. + // - growBuf for vertex/index streams — pre-sized at registry init. + // What IS here: the per_draws + indirect_draw_cmds upload (small + // preprocessor-owned SSBOs), per-draw metadata pack, output + // geometry construction. + auto& rhi = *renderer.state.rhi; + const uint32_t current_frame = (uint32_t)renderer.frame; + + struct Acc + { + std::vector perDraws; + std::vector perDrawBounds; + struct IndirectCmd + { + uint32_t indexCount, instanceCount, firstIndex; + int32_t baseVertex; + uint32_t baseInstance; + }; + std::vector indirectCmds; + } acc; + + acc.perDraws.reserve(std::max(m_lastDrawCount, fs.draws.size())); + acc.perDrawBounds.reserve(std::max(m_lastDrawCount, fs.draws.size())); + acc.indirectCmds.reserve(std::max(m_lastDrawCount, fs.draws.size())); + + // Concat-offsets for joint matrices across all skeletons in this + // flatten. skinJointOffsets[k] = sum of joint counts for skins < k. + // Stamped into PerDrawGPU.skeleton_offset so a future consolidated + // `joint_matrices` SSBO (single buffer across all skeletons) is a + // drop-in change on the shader side — offsets already point at the + // correct record. 0xFFFFFFFF sentinel is written for unskinned + // draws. + std::vector skinJointOffsets; + skinJointOffsets.reserve(fs.skins.size()); + { + uint32_t running = 0; + for(const auto& sk : fs.skins) + { + skinJointOffsets.push_back(running); + running += (uint32_t)sk.joint_matrices.size(); + } + } + + // Reset pending GPU copies for this frame — populated below when a + // draw's attributes are GPU-resident; issued in runInitialPasses. + m_pendingGpuCopies.clear(); + + // Queue one copy op targeting a slab's byte offset in the arena + // stream. No accumulator pre-reservation here: dst_offset is the + // slab's allocator-assigned offset, not an accumulator-relative + // position. + auto queueSlabCopy = [&](MdiAttr attr, const GpuAttrView& view, + int elem_size, int vertex_count, + uint32_t dst_slab_offset) { + PendingGpuCopy op; + op.attr = attr; + op.src = view.buf; + op.src_offset = view.src_offset; + op.dst_offset = (int)dst_slab_offset; + op.vertex_count = vertex_count; + op.src_stride = view.byte_stride; + op.element_size = elem_size; + op.size = (op.src_stride == 0 || op.src_stride == elem_size) + ? vertex_count * elem_size + : elem_size; // per-vertex path computes size each iter + m_pendingGpuCopies.push_back(op); + }; + + // Scratch CPU buffers reused across draws to hold the padded + // vec3→vec4 conversions for positions / normals and the fallback + // (1,0,0,1) tangents. Grow-only; never shrinks. Avoids re-allocating + // for each per-draw upload. + std::vector scratch; + + uint32_t totalVertices = 0; + uint32_t totalIndices = 0; + bool warned_missing_stable_id = false; + + using Stream = GpuResourceRegistry::MeshStream; + + // Running cursor into the unified per-instance concat space. Each + // emitted indirect cmd consumes `instanceCount` contiguous slots and + // writes its own cmd-index into draw_ids[slot..slot+instanceCount-1]. + // For regular fs.draws cmds (instanceCount=1) cmd_index == slot + // index. For fs.instances groups (instanceCount=N) cmd_index != + // slot index, so the shader CANNOT use gl_BaseInstance/gl_DrawID to + // recover the cmd index — it reads the per-instance `draw_id` + // attribute that this cursor populates. + uint32_t slot_cursor = 0; + + // Records of instance-group slot ranges so the post-loop CPU + // bookkeeping can pre-fill draw_ids and queue the GPU copies for + // upstream translation / color buffers into the right concat + // offsets without a second pass over fs.instances. + struct InstanceSlotRecord + { + uint32_t slot_base; + uint32_t count; + uint32_t cmd_index; + QRhiBuffer* src_translations; + uint32_t src_translation_offset; + uint32_t src_translation_stride; + QRhiBuffer* src_colors; + uint32_t src_color_offset; + }; + std::vector instanceRecords; + + // Shared per-cmd processor. Used by the fs.draws loop and the + // fs.instances loop. Performs: + // - attribute extraction (CPU + GPU paths) from the wrapper + // ossia::geometry + // - slab acquire / per-stream upload (only on freshly_allocated) + // - per_draws + per_draw_bounds push + // - indirect cmd push with firstInstance = slot_cursor + // - slot_cursor += instanceCount + // Returns the cmd_index that was emitted (== acc.indirectCmds.size() + // BEFORE the push, == sentinel if the cmd was skipped). + constexpr uint32_t kCmdSkipped = 0xFFFFFFFFu; + auto emitDraw = [&]( + const ossia::geometry* mesh, uint64_t stable_id, + const QMatrix4x4& worldTransform, + const ossia::material_component* materialPtr, + int materialIndex, uint32_t transform_slot, + int skinIndex, const ossia::aabb& local_bounds, + uint32_t instanceCount) -> uint32_t + { + if(!mesh || mesh->vertices <= 0 || !m_registry || instanceCount == 0) + return kCmdSkipped; + if(stable_id == 0) + { + if(!warned_missing_stable_id) + { + qWarning() << "ScenePreprocessor::rebuildMDI: draw has no " + "stable_id — synthesising from mesh pointer. " + "Producer should stamp mesh_primitive::stable_id " + "for cache stability."; + warned_missing_stable_id = true; + } + stable_id = (uint64_t)((uintptr_t)mesh) + ^ ((uint64_t)mesh->vertices << 32) + ^ (uint64_t)mesh->indices; + if(stable_id == 0) + stable_id = 1; + } + + // CPU extraction — still the hot path for loaded glTF/FBX scenes. + auto pos = extractCpuAttribute<12>(*mesh, ossia::attribute_semantic::position); + auto nrm = extractCpuAttribute<12>(*mesh, ossia::attribute_semantic::normal); + auto uv = extractCpuAttribute<8>(*mesh, ossia::attribute_semantic::texcoord0); + auto uv1 = extractCpuAttribute<8>(*mesh, ossia::attribute_semantic::texcoord1); + auto col = extractCpuAttribute<16>(*mesh, ossia::attribute_semantic::color0); + auto tan = extractCpuAttribute<16>(*mesh, ossia::attribute_semantic::tangent); + + GpuAttrView gpu_pos, gpu_nrm, gpu_uv, gpu_tan; + if(pos.empty()) + gpu_pos = extractGpuAttribute(*mesh, ossia::attribute_semantic::position); + if(nrm.empty()) + gpu_nrm = extractGpuAttribute(*mesh, ossia::attribute_semantic::normal); + if(uv.empty()) + gpu_uv = extractGpuAttribute(*mesh, ossia::attribute_semantic::texcoord0); + if(tan.empty()) + gpu_tan = extractGpuAttribute(*mesh, ossia::attribute_semantic::tangent); + + if(pos.empty() && !gpu_pos.buf) + return kCmdSkipped; + + std::vector idx; + if(mesh->indices > 0) + { + idx = extractCpuIndices(*mesh); + if(idx.empty()) + return kCmdSkipped; // GPU-backed indices not yet supported. + } + else + { + idx.resize(mesh->vertices); + for(int v = 0; v < mesh->vertices; ++v) + idx[v] = (uint32_t)v; + } + + const uint32_t drawIndexCount = (uint32_t)idx.size(); + const int vc = mesh->vertices; + + auto* slab = m_registry->acquireMeshSlab( + stable_id, (uint32_t)vc, drawIndexCount, current_frame); + if(!slab) + return kCmdSkipped; + + m_registry->markMeshSlabSeen(stable_id, current_frame); + + if(slab->freshly_allocated) + { + // ── Position ── vec3→vec4 padding when CPU-sourced. + const uint32_t posOff + = m_registry->meshSlabOffsetBytes(*slab, Stream::Positions); + if(!pos.empty()) + { + scratch.assign(std::size_t(vc) * 16, std::byte{}); + for(int v = 0; v < vc; ++v) + std::memcpy(scratch.data() + v * 16, pos.data() + v * 12, 12); + m_registry->uploadMeshStream( + res, *slab, Stream::Positions, + scratch.data(), (uint32_t)scratch.size()); + } + else + { + queueSlabCopy(MdiAttr::Positions, gpu_pos, 16, vc, posOff); + } + + // ── Normals ── vec3→vec4 padding; zero fallback when missing. + const uint32_t nrmOff + = m_registry->meshSlabOffsetBytes(*slab, Stream::Normals); + if(!nrm.empty()) + { + scratch.assign(std::size_t(vc) * 16, std::byte{}); + for(int v = 0; v < vc; ++v) + std::memcpy(scratch.data() + v * 16, nrm.data() + v * 12, 12); + m_registry->uploadMeshStream( + res, *slab, Stream::Normals, + scratch.data(), (uint32_t)scratch.size()); + } + else if(gpu_nrm.buf) + { + queueSlabCopy(MdiAttr::Normals, gpu_nrm, 16, vc, nrmOff); + } + else + { + scratch.assign(std::size_t(vc) * 16, std::byte{}); + m_registry->uploadMeshStream( + res, *slab, Stream::Normals, + scratch.data(), (uint32_t)scratch.size()); + } + + // ── Texcoords ── vec2; zero fallback when missing. + const uint32_t uvOff + = m_registry->meshSlabOffsetBytes(*slab, Stream::Texcoords); + if(!uv.empty()) + { + m_registry->uploadMeshStream( + res, *slab, Stream::Texcoords, + uv.data(), (uint32_t)uv.size()); + } + else if(gpu_uv.buf) + { + queueSlabCopy(MdiAttr::Texcoords, gpu_uv, 8, vc, uvOff); + } + else + { + scratch.assign(std::size_t(vc) * 8, std::byte{}); + m_registry->uploadMeshStream( + res, *slab, Stream::Texcoords, + scratch.data(), (uint32_t)scratch.size()); + } + + // ── Tangents ── vec4; (1,0,0,1) fallback. + const uint32_t tanOff + = m_registry->meshSlabOffsetBytes(*slab, Stream::Tangents); + if(!tan.empty()) + { + m_registry->uploadMeshStream( + res, *slab, Stream::Tangents, + tan.data(), (uint32_t)tan.size()); + } + else if(gpu_tan.buf) + { + queueSlabCopy(MdiAttr::Tangents, gpu_tan, 16, vc, tanOff); + } + else + { + scratch.assign(std::size_t(vc) * 16, std::byte{}); + float fb[4] = {1.f, 0.f, 0.f, 1.f}; + for(int v = 0; v < vc; ++v) + std::memcpy(scratch.data() + v * 16, fb, 16); + m_registry->uploadMeshStream( + res, *slab, Stream::Tangents, + scratch.data(), (uint32_t)scratch.size()); + } + + // ── Colors ── vec4; (1,1,1,1) fallback. + if(!col.empty()) + { + m_registry->uploadMeshStream( + res, *slab, Stream::Colors, + col.data(), (uint32_t)col.size()); + } + else + { + scratch.assign(std::size_t(vc) * 16, std::byte{}); + float fb[4] = {1.f, 1.f, 1.f, 1.f}; + for(int v = 0; v < vc; ++v) + std::memcpy(scratch.data() + v * 16, fb, 16); + m_registry->uploadMeshStream( + res, *slab, Stream::Colors, + scratch.data(), (uint32_t)scratch.size()); + } + + // ── Texcoords1 ── vec2; zero fallback. + if(!uv1.empty()) + { + m_registry->uploadMeshStream( + res, *slab, Stream::Texcoords1, + uv1.data(), (uint32_t)uv1.size()); + } + else + { + scratch.assign(std::size_t(vc) * 8, std::byte{}); + m_registry->uploadMeshStream( + res, *slab, Stream::Texcoords1, + scratch.data(), (uint32_t)scratch.size()); + } + + // ── Indices ── + m_registry->uploadMeshStream( + res, *slab, Stream::Indices, + idx.data(), (uint32_t)(idx.size() * 4)); + } + + // Per-draw GPU record. + PerDrawGPU pd{}; + writeMat4(pd.model, worldTransform); + QMatrix4x4 nm = worldTransform.inverted().transposed(); + nm.setColumn(3, QVector4D(0, 0, 0, 1)); + nm.setRow(3, QVector4D(0, 0, 0, 1)); + writeMat4(pd.normal, nm); + pd.material_index = arenaSlotForMaterial(materialPtr); + pd.tag_hash + = (materialIndex >= 0 + && (std::size_t)materialIndex < materialTagHashes.size()) + ? materialTagHashes[(std::size_t)materialIndex] + : 0u; + pd.transform_slot = transform_slot; + pd.skeleton_offset + = (skinIndex >= 0 + && (std::size_t)skinIndex < skinJointOffsets.size()) + ? skinJointOffsets[(std::size_t)skinIndex] + : 0xFFFFFFFFu; + acc.perDraws.push_back(pd); + acc.perDrawBounds.push_back(packBounds(local_bounds)); + + const uint32_t cmd_index = (uint32_t)acc.indirectCmds.size(); + Acc::IndirectCmd cmd{ + drawIndexCount, + instanceCount, + slab->index_slot.offset, + (int32_t)slab->vertex_slot.offset, + slot_cursor}; + acc.indirectCmds.push_back(cmd); + slot_cursor += instanceCount; + + totalVertices += (uint32_t)vc; + totalIndices += drawIndexCount; + return cmd_index; + }; + + for(std::size_t i = 0; i < fs.draws.size(); ++i) + { + const auto& dc = fs.draws[i]; + emitDraw( + dc.mesh, dc.stable_id, dc.worldTransform, dc.material.get(), + dc.materialIndex, dc.transform_slot, dc.skinIndex, dc.local_bounds, + /*instanceCount=*/1u); + } + + // Number of per_draws entries that the fs.draws loop actually emitted + // (i.e. after emitDraw's skip predicate). The fast path's diff-upload + // mirror must be seeded from exactly this prefix — emitDraw can skip + // draws (slab exhaustion, GPU-backed indices, missing positions) that a + // naive `vertices > 0` filter would wrongly keep, which would desync the + // mirror from the GPU per_draws layout (threedim#3). + const std::size_t meshDrawCount = acc.perDraws.size(); + + // ── fs.instances ── one cmd per instance_component, instanceCount = + // group's instance count, firstInstance = slot_cursor before the + // cmd. Per-instance translations / colors are GPU-copied from the + // upstream Instancer's source buffers into the concat per-instance + // arrays at offset slot_base * stride; CPU-side draw_ids[slot..] + // get the cmd-index of the owning group (populated below, after + // both loops complete and slot_cursor stops moving). + // + // Defensive null-handle skip: the upstream Instancer may republish + // a fresh `instance_component` whose buffer handles haven't been + // populated yet (CSF compute pass mid-rebuild, etc). Skipping the + // group for that frame is correct — next frame the upstream is + // ready and the group renders. + for(std::size_t k = 0; k < fs.instances.size(); ++k) + { + const auto& inst_draw = fs.instances[k]; + if(!inst_draw.instance) + continue; + const auto& inst = *inst_draw.instance; + if(!inst.prototype || inst.prototype->primitives.empty()) + continue; + if(inst.instance_count == 0) + continue; + + const auto& prim = inst.prototype->primitives[0]; + if(prim.vertex_count == 0) + continue; + + // Defensive null-handle skip on prototype buffers — happens during + // model swaps when the new prototype's data hasn't been uploaded + // yet. The next frame retries. + bool prototype_buffers_ready = true; + for(const auto& vb : prim.vertex_buffers) + { + if(!vb) + continue; + if(auto* gpu = ossia::get_if(&vb->resource)) + { + if(!gpu->native_handle) + { prototype_buffers_ready = false; break; } + } + else if(auto* cpu = ossia::get_if(&vb->resource)) + { + if(!cpu->data || cpu->byte_size == 0) + { prototype_buffers_ready = false; break; } + } + else + { prototype_buffers_ready = false; break; } + } + if(prim.index_buffer && prototype_buffers_ready) + { + const auto& ib = *prim.index_buffer; + if(auto* gpu = ossia::get_if(&ib.resource)) + { + if(!gpu->native_handle) prototype_buffers_ready = false; + } + else if(auto* cpu = ossia::get_if(&ib.resource)) + { + if(!cpu->data || cpu->byte_size == 0) prototype_buffers_ready = false; + } + } + if(!prototype_buffers_ready) + continue; + + // Per-instance source buffers — translations may carry vec3 / trs / + // mat4 layouts; we currently only support `translation` (the + // shader's per-instance VERTEX_INPUT is vec3). trs / mat4 land in + // a follow-up (Phase 3.5). + QRhiBuffer* srcTranslations = nullptr; + uint32_t srcTranslationOffset = 0; + uint32_t srcTranslationStride = 16; // CSF emitters pad to vec4. + // Per-format byte offset of the translation within the source + // element. For column-major mat4 (64 B), the translation is + // column 3 at offset 48; vec4 / trs put translation at offset 0. + uint32_t srcTranslationColumnOffset = 0; + if(inst.instance_transforms) + { + if(auto* gpu = ossia::get_if( + &inst.instance_transforms->resource)) + { + if(!gpu->native_handle) + continue; + srcTranslations = static_cast(gpu->native_handle); + srcTranslationOffset = (uint32_t)gpu->byte_offset; + using TF = ossia::instance_component::transform_format; + switch(inst.transform_type) + { + case TF::translation: srcTranslationStride = 16; break; + case TF::trs: srcTranslationStride = 40; break; + case TF::mat4: + srcTranslationStride = 64; + srcTranslationColumnOffset = 48; + break; + } + } + } + QRhiBuffer* srcColors = nullptr; + uint32_t srcColorOffset = 0; + if(inst.instance_colors) + { + if(auto* gpu = ossia::get_if( + &inst.instance_colors->resource)) + { + if(!gpu->native_handle) + continue; + srcColors = static_cast(gpu->native_handle); + srcColorOffset = (uint32_t)gpu->byte_offset; + } + } + + // Build a transient ossia::geometry from the prototype primitive + // and feed it into the shared emitDraw closure. + auto proto_geom = primitiveToGeometry(prim); + if(!proto_geom) + continue; + + const uint32_t slot_base = slot_cursor; + const uint64_t prim_id = resolvePrototypeStableId( + inst.prototype.get(), prim); + + const uint32_t cmd_index = emitDraw( + proto_geom.get(), prim_id, inst_draw.worldTransform, + prim.material.get(), /*materialIndex=*/-1, + inst.raw_slot.size != 0 ? inst.raw_slot.internal_index + : 0xFFFFFFFFu, + /*skinIndex=*/-1, prim.bounds, inst.instance_count); + if(cmd_index == kCmdSkipped) + continue; + + InstanceSlotRecord rec{}; + rec.slot_base = slot_base; + rec.count = inst.instance_count; + rec.cmd_index = cmd_index; + rec.src_translations = srcTranslations; + rec.src_translation_offset = srcTranslationOffset + srcTranslationColumnOffset; + rec.src_translation_stride = srcTranslationStride; + rec.src_colors = srcColors; + rec.src_color_offset = srcColorOffset; + instanceRecords.push_back(rec); + } + + // GC slabs not seen this frame. Grace = 2 protects against the CB + // still referencing a culled slab's offset through its indirect- + // draw-cmds entry from frame N-1. + m_registry->sweepMeshSlabs(current_frame, 2u); + + // Garbage-collect prototype-id map entries that no longer appear in + // the live scene. Keeps the map bounded across long sessions where + // Instancer prototypes get swapped (Box.gltf → Duck.gltf etc). + { + ossia::hash_set live_protos; + live_protos.reserve(fs.instances.size()); + for(const auto& id : fs.instances) + { + if(id.instance && id.instance->prototype) + live_protos.insert(id.instance->prototype.get()); + } + for(auto it = m_protoStableIds.begin(); it != m_protoStableIds.end();) + { + if(live_protos.find(it->first) == live_protos.end()) + it = m_protoStableIds.erase(it); + else + ++it; + } + } + + m_mdi.totalVertices = totalVertices; + m_mdi.totalIndices = totalIndices; + m_mdi.drawCount = (uint32_t)acc.indirectCmds.size(); + m_lastDrawCount = std::max(m_lastDrawCount, acc.indirectCmds.size()); + m_instSlotsUsed = slot_cursor; + + // drawCount==0: no mesh draws this frame, but procedural-only consumers + // (classic_skybox, fullscreen-triangle effects) still need the + // scene-wide aux table — `camera` rides on the geometry, so an empty + // mesh_list would leave them with no camera UBO. Fall through and + // build a 0-vertex carrier mesh that exposes the full auxiliary + // list; mesh-consuming downstream nodes see vertices==0 and skip + // their draw call. The drawCount-dependent uploads below are gated + // on non-empty sources; the binding extents fall back to one + // element so RHI accepts the bindings. + + const int64_t pdBytes = std::max( + sizeof(PerDrawGPU), + (int64_t)acc.perDraws.size() * sizeof(PerDrawGPU)); + const int64_t icBytes = std::max( + sizeof(Acc::IndirectCmd), + (int64_t)acc.indirectCmds.size() * sizeof(Acc::IndirectCmd)); + const int64_t pdbBytes + = (int64_t)acc.perDrawBounds.size() * sizeof(PerDrawBoundsGPU); + + // Grow-only for the preprocessor-owned small SSBOs (arena streams + // don't grow — pre-sized in registry.init()). On realloc we drop + // the diff-upload mirror so the next diffUpload call (fast path + // at lines 4744 / 4751) treats the new buffer as empty and uploads + // the full fresh contents — see growBuf's prefix-staleness comment. + // The slow path's `uploadStaticBuffer(per_draws, 0, full_size, ...)` + // at lines 2478-2486 already covers a slow-frame realloc; the + // mirror clear here defends the (less common) case where a fast + // frame's grow is followed by another fast-frame diffUpload before + // a slow frame intervenes. + using UF = QRhiBuffer::UsageFlags; + if(growBuf(renderer, res,m_mdi.per_draws, m_mdi.perDrawsCap, pdBytes, + QRhiBuffer::StorageBuffer, + "ScenePreprocessor::mdi.per_draws")) + m_cachedPerDraws.clear(); + if(growBuf(renderer, res,m_mdi.per_draw_bounds, m_mdi.perDrawBoundsCap, pdbBytes, + QRhiBuffer::StorageBuffer, + "ScenePreprocessor::mdi.per_draw_bounds")) + m_cachedPerDrawBounds.clear(); +#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0) + growBuf(renderer, res,m_mdi.indirect_draw_cmds, m_mdi.indirectCap, icBytes, + UF(QRhiBuffer::StorageBuffer | QRhiBuffer::IndirectBuffer), + "ScenePreprocessor::mdi.indirect_draw_cmds"); +#else + growBuf(renderer, res,m_mdi.indirect_draw_cmds, m_mdi.indirectCap, icBytes, + QRhiBuffer::StorageBuffer, + "ScenePreprocessor::mdi.indirect_draw_cmds"); +#endif + + // Gate uploads on non-empty sources: when drawCount==0 the carrier + // mesh path keeps the buffers at their element-size minimums (already + // grown by growBuf above) and skips the upload. Procedural consumers + // never read these slots; mesh consumers don't draw, so contents are + // irrelevant. + if(!acc.perDraws.empty()) + res.uploadStaticBuffer( + m_mdi.per_draws, 0, + (int64_t)acc.perDraws.size() * sizeof(PerDrawGPU), + acc.perDraws.data()); + if(!acc.indirectCmds.empty()) + res.uploadStaticBuffer( + m_mdi.indirect_draw_cmds, 0, + (int64_t)acc.indirectCmds.size() * sizeof(Acc::IndirectCmd), + acc.indirectCmds.data()); + if(pdbBytes > 0) + res.uploadStaticBuffer( + m_mdi.per_draw_bounds, 0, pdbBytes, acc.perDrawBounds.data()); + + // Seed the fast-path diff-upload mirror from the ACTUALLY-EMITTED set + // (acc.perDraws / acc.perDrawBounds), restricted to the fs.draws prefix + // (instance-group entries are never compared on the fast path — it's + // gated on fs.instances.empty()). Seeding from `freshPerDraws` (filtered + // only by vertices>0) would diverge whenever emitDraw skipped a draw, + // making diffUpload write a neighbour's model matrix into the wrong slot + // (threedim#3). + m_cachedPerDraws.assign( + acc.perDraws.begin(), + acc.perDraws.begin() + (std::ptrdiff_t)meshDrawCount); + m_cachedPerDrawBounds.assign( + acc.perDrawBounds.begin(), + acc.perDrawBounds.begin() + (std::ptrdiff_t)meshDrawCount); + + // ── Per-instance concat buffers (Phase 2 unified MDI) ────────────── + // + // Three parallel arrays sized to slot_cursor: + // - draw_ids[k] : cmd index of the cmd that owns slot k + // - translations[k] : vec4 (xyz used) — identity for regular cmd + // slots, GPU-copied per-particle position for + // instance group slots + // - colors[k] : vec4 — identity (1,1,1,1) for regular cmd + // slots, GPU-copied per-instance color for + // groups + // + // Layout invariant: every regular fs.draws cmd at acc index i lands + // at slot i (instanceCount=1). Instance groups follow contiguously + // (slot >= acc.indirectCmds.size() - fs.instances.size() in general, + // but the bookkeeping is captured per-group in instanceRecords). The + // shader reads `draw_id` as a per-instance VERTEX_INPUT and indexes + // per_draws[draw_id] — works on both indirect and CPU-fallback paths + // because firstInstance is the only state needed (no gl_DrawID + // dependency). + if(slot_cursor > 0) + { + const int64_t drawIdsBytes = (int64_t)slot_cursor * 4; + const int64_t translationsBytes = (int64_t)slot_cursor * 16; + const int64_t colorsBytes = (int64_t)slot_cursor * 16; + + // m_instDrawIds: paired with diff-upload via m_cachedInstDrawIds + // at line 2544. On realloc we MUST clear the mirror — this is the + // root cause of the "instances disappear at p2-of instance count" + // bug (4→5, 8→9, 16→17 …). For an Instancer with one prototype + // every slot's draw_id is the same value (the cmd_index, usually + // 0), so cached and fresh are byte-identical for the prefix and + // diffUpload's equal-runs branch (line 783) skips them — leaving + // the new buffer's prefix as uninitialised driver memory. The + // basic-unlit / classic_pbr vertex shader then OOBs on + // per_draws[draw_id] for every "garbage" instance. Translations / + // colors are immune (full GPU copy at lines 2606+), so they don't + // need the clear, but cleaning the diff-upload one is mandatory. + if(growBuf(renderer, res,m_instDrawIds, m_instDrawIdsCap, drawIdsBytes, + UF(QRhiBuffer::VertexBuffer | QRhiBuffer::StorageBuffer), + "ScenePreprocessor::inst.draw_ids")) + m_cachedInstDrawIds.clear(); + growBuf(renderer, res,m_instTranslations, m_instTranslationsCap, + translationsBytes, + UF(QRhiBuffer::VertexBuffer | QRhiBuffer::StorageBuffer), + "ScenePreprocessor::inst.translations"); + growBuf(renderer, res,m_instColors, m_instColorsCap, colorsBytes, + UF(QRhiBuffer::VertexBuffer | QRhiBuffer::StorageBuffer), + "ScenePreprocessor::inst.colors"); + + // Build the full draw_ids vector. For a regular fs.draws cmd at + // acc index i: draw_ids[i] = i. For instance group records: the + // group's slot range gets cmd_index repeated `count` times. + // Diff-uploaded via the m_cachedInstDrawIds mirror so steady-state + // frames touch zero bytes when the topology is unchanged. + std::vector fresh_draw_ids(slot_cursor, 0u); + // Regular cmds: each occupies one slot at acc index = slot index. + const std::size_t n_regular_cmds + = acc.indirectCmds.size() - instanceRecords.size(); + for(std::size_t i = 0; i < n_regular_cmds; ++i) + fresh_draw_ids[i] = (uint32_t)i; + for(const auto& rec : instanceRecords) + { + for(uint32_t k = 0; k < rec.count; ++k) + fresh_draw_ids[rec.slot_base + k] = rec.cmd_index; + } + diffUpload(res, m_instDrawIds, m_cachedInstDrawIds, fresh_draw_ids); + + // Regular-slot identity values for translations + colors. Instance + // group slots (offset >= n_regular_cmds * 16) are filled by the + // GPU copies below — uploadStaticBuffer here covers ONLY the + // regular range so we don't stomp the GPU-copied data. Instance + // group slot ranges that overlap stale content from a previous + // frame are overwritten by the per-frame GPU copy. + if(n_regular_cmds > 0) + { + std::vector regular_translations(n_regular_cmds * 4, 0.f); + std::vector regular_colors(n_regular_cmds * 4, 1.f); + res.uploadStaticBuffer( + m_instTranslations, 0, + (quint32)(n_regular_cmds * 16), + regular_translations.data()); + res.uploadStaticBuffer( + m_instColors, 0, + (quint32)(n_regular_cmds * 16), + regular_colors.data()); + } + + // Queue GPU copies for instance groups. Each record copies + // `count` instances from the upstream Instancer's source buffer + // into the concat array at `slot_base * stride` bytes. The + // record's src_translation_offset is biased per source format so + // each strided slice lands on the actual translation bytes: + // - translation (vec4): bytes [0..15] = (x, y, z, w). + // - trs (vec3 T + ...): bytes [0..15] = T + 4 leading bytes + // of R; the shader binds vec3 from offset 0 so stray R bytes + // are never sampled. + // - mat4 (col-major): bytes [48..63] = column 3 = (Tx, Ty, Tz, 1). + auto queueInstanceCopy = [&]( + QRhiBuffer* src, uint32_t srcOffset, uint32_t srcStride, + QRhiBuffer* dst, uint32_t dstOffset, uint32_t count, + uint32_t elemSize) + { + if(!src || !dst || count == 0) + return; + PendingGpuCopy op; + op.attr = MdiAttr::Positions; // unused when dst is set explicitly + op.src = src; + op.dst = dst; + op.src_offset = (int)srcOffset; + op.dst_offset = (int)dstOffset; + op.vertex_count = (int)count; + op.src_stride = (int)srcStride; + op.element_size = (int)elemSize; + op.size = (op.src_stride == 0 || op.src_stride == op.element_size) + ? op.vertex_count * op.element_size + : op.element_size; + m_pendingGpuCopies.push_back(op); + }; + for(const auto& rec : instanceRecords) + { + // Translation: copy 12 bytes per instance into the leading + // bytes of each vec4-stride slot. The slot's trailing 4 bytes + // remain garbage / leftover (identity uploads only cover the + // regular range above) — the shader binds vec3 from offset 0 + // so the trailing pad is never sampled. + if(rec.src_translations) + { + queueInstanceCopy( + rec.src_translations, rec.src_translation_offset, + rec.src_translation_stride, + m_instTranslations, rec.slot_base * 16, rec.count, + /*elemSize=*/16); + } + if(rec.src_colors) + { + queueInstanceCopy( + rec.src_colors, rec.src_color_offset, /*srcStride=*/16, + m_instColors, rec.slot_base * 16, rec.count, + /*elemSize=*/16); + } + } + } + + // Build the output geometry referencing the arena stream buffers + // (pointer-stable across frames and scene churn). + ossia::geometry g; + auto wrapGpu = [](QRhiBuffer* b, int64_t size) { + ossia::geometry::gpu_buffer gb; + gb.handle = b; + gb.byte_size = size; + return ossia::geometry::buffer{.data = gb, .dirty = true}; + }; + + // The "byte_size" on each gpu_buffer is the binding extent + // downstream consumers use when constructing their MeshBuffer + // BufferViews. Using the arena's full capacity (stable across + // frames) keeps downstream pointer identity + extent identical + // frame-over-frame — the per-draw `baseVertex` in + // indirect_draw_cmds addresses into this range. + const int64_t posCapBytes + = (int64_t)GpuResourceRegistry::kMeshCapBytes[(int)Stream::Positions]; + const int64_t nrmCapBytes + = (int64_t)GpuResourceRegistry::kMeshCapBytes[(int)Stream::Normals]; + const int64_t uvCapBytes + = (int64_t)GpuResourceRegistry::kMeshCapBytes[(int)Stream::Texcoords]; + const int64_t tanCapBytes + = (int64_t)GpuResourceRegistry::kMeshCapBytes[(int)Stream::Tangents]; + const int64_t colCapBytes + = (int64_t)GpuResourceRegistry::kMeshCapBytes[(int)Stream::Colors]; + const int64_t uv1CapBytes + = (int64_t)GpuResourceRegistry::kMeshCapBytes[(int)Stream::Texcoords1]; + const int64_t idxCapBytes + = (int64_t)GpuResourceRegistry::kMeshCapBytes[(int)Stream::Indices]; + + // Buffer slot order is wired through to the index-buffer slot + // below — keep buffers 0..5 as the six vertex streams, slot 6 as + // the index buffer. Adding/reordering here REQUIRES updating + // g.index.buffer at the end of this block. + g.buffers.push_back(wrapGpu( + m_registry->meshStreamBuffer(Stream::Positions), posCapBytes)); + g.buffers.push_back(wrapGpu( + m_registry->meshStreamBuffer(Stream::Normals), nrmCapBytes)); + g.buffers.push_back(wrapGpu( + m_registry->meshStreamBuffer(Stream::Texcoords), uvCapBytes)); + g.buffers.push_back(wrapGpu( + m_registry->meshStreamBuffer(Stream::Tangents), tanCapBytes)); + g.buffers.push_back(wrapGpu( + m_registry->meshStreamBuffer(Stream::Colors), colCapBytes)); + g.buffers.push_back(wrapGpu( + m_registry->meshStreamBuffer(Stream::Texcoords1), uv1CapBytes)); + g.buffers.push_back(wrapGpu( + m_registry->meshStreamBuffer(Stream::Indices), idxCapBytes)); + + // MDI uses vec4 stride (16 B) for position and normal even though the + // shader binding format is float3. Vulkan reads the first 12 bytes of + // each 16-byte slot for vec3, so the last 4 bytes are unused padding. + // Why: GPU-resident vertex sources (compute-shader outputs) naturally + // emit vec3 inside a 16-byte-aligned slot due to std430/std140 layout + // rules. Matching MDI stride lets us turn what would be a per-vertex + // strided copyBuffer loop (O(N) vkCmdCopyBuffer regions per frame) + // into a single tight blit. Cost: 33 % extra memory for pos/nrm only. + ossia::geometry::binding bPos{}; bPos.byte_stride = 16; bPos.classification = ossia::geometry::binding::per_vertex; + ossia::geometry::binding bNrm{}; bNrm.byte_stride = 16; bNrm.classification = ossia::geometry::binding::per_vertex; + ossia::geometry::binding bUv{}; bUv.byte_stride = 8; bUv.classification = ossia::geometry::binding::per_vertex; + ossia::geometry::binding bTan{}; bTan.byte_stride = 16; bTan.classification = ossia::geometry::binding::per_vertex; + ossia::geometry::binding bCol{}; bCol.byte_stride = 16; bCol.classification = ossia::geometry::binding::per_vertex; + ossia::geometry::binding bUv1{}; bUv1.byte_stride = 8; bUv1.classification = ossia::geometry::binding::per_vertex; + g.bindings.push_back(bPos); + g.bindings.push_back(bNrm); + g.bindings.push_back(bUv); + g.bindings.push_back(bTan); + g.bindings.push_back(bCol); + g.bindings.push_back(bUv1); + + // `input` is both the type and the vector member on geometry; use the + // elaborated `struct` tag to disambiguate in this scope. + using GeomInput = struct ossia::geometry::input; + g.input.push_back(GeomInput{.buffer = 0, .byte_offset = 0}); + g.input.push_back(GeomInput{.buffer = 1, .byte_offset = 0}); + g.input.push_back(GeomInput{.buffer = 2, .byte_offset = 0}); + g.input.push_back(GeomInput{.buffer = 3, .byte_offset = 0}); + g.input.push_back(GeomInput{.buffer = 4, .byte_offset = 0}); + g.input.push_back(GeomInput{.buffer = 5, .byte_offset = 0}); + + auto pushAttr = [&](ossia::attribute_semantic sem, int binding, + decltype(ossia::geometry::attribute::format) fmt) { + ossia::geometry::attribute a{}; + a.binding = binding; + a.byte_offset = 0; + a.format = fmt; + a.semantic = sem; + g.attributes.push_back(a); + }; + pushAttr(ossia::attribute_semantic::position, 0, ossia::geometry::attribute::float3); + pushAttr(ossia::attribute_semantic::normal, 1, ossia::geometry::attribute::float3); + pushAttr(ossia::attribute_semantic::texcoord0, 2, ossia::geometry::attribute::float2); + pushAttr(ossia::attribute_semantic::tangent, 3, ossia::geometry::attribute::float4); + pushAttr(ossia::attribute_semantic::color0, 4, ossia::geometry::attribute::float4); + pushAttr(ossia::attribute_semantic::texcoord1, 5, ossia::geometry::attribute::float2); + + // ── Per-instance vertex bindings (Phase 2 unified MDI) ───────────── + // + // Three PerInstance step_rate=1 bindings carry the unified-MDI + // per-instance state. Each indirect cmd (regular or instance group) + // sets `firstInstance = its own slot offset` so these bindings + // address the right slice of each concat buffer on both the + // indirect path and the CPU-fallback drawIndexed loop. + // + // Buffer slot order in `g.buffers`: + // 0..5 per-vertex streams (pos / nrm / uv0 / tan / col / uv1) + // 6 index buffer + // 7 inst_translations (vec4 stride 16) + // 8 inst_colors (vec4 stride 16) + // 9 inst_draw_ids (uint stride 4) + // Adding more slots HERE shifts every subsequent aux's buf index; + // the post-section building auxiliaries computes its base via + // `baseBuf = (int)g.buffers.size()` so it doesn't need changing. + if(slot_cursor > 0 && m_instTranslations && m_instColors && m_instDrawIds) + { + // Index buffer must come before per-instance buffers since + // g.index.buffer is hard-coded to slot 6 below; per-instance + // buffers occupy slots 7, 8, 9. + g.buffers.push_back(wrapGpu( + m_instTranslations, (int64_t)slot_cursor * 16)); + g.buffers.push_back(wrapGpu( + m_instColors, (int64_t)slot_cursor * 16)); + g.buffers.push_back(wrapGpu( + m_instDrawIds, (int64_t)slot_cursor * 4)); + + ossia::geometry::binding bInstT{}; + bInstT.byte_stride = 16; + bInstT.classification = ossia::geometry::binding::per_instance; + bInstT.step_rate = 1; + const int instTBindIdx = (int)g.bindings.size(); + g.bindings.push_back(bInstT); + + ossia::geometry::binding bInstC{}; + bInstC.byte_stride = 16; + bInstC.classification = ossia::geometry::binding::per_instance; + bInstC.step_rate = 1; + const int instCBindIdx = (int)g.bindings.size(); + g.bindings.push_back(bInstC); + + ossia::geometry::binding bInstD{}; + bInstD.byte_stride = 4; + bInstD.classification = ossia::geometry::binding::per_instance; + bInstD.step_rate = 1; + const int instDBindIdx = (int)g.bindings.size(); + g.bindings.push_back(bInstD); + + g.input.push_back(GeomInput{.buffer = 7, .byte_offset = 0}); + g.input.push_back(GeomInput{.buffer = 8, .byte_offset = 0}); + g.input.push_back(GeomInput{.buffer = 9, .byte_offset = 0}); + + // Per-instance attributes. Translation reuses the existing + // `translation` semantic (no per-vertex `translation` ever exists, + // so no collision). Color uses the dedicated `instance_color0` + // semantic added to libossia for unified MDI to avoid the + // per-vertex / per-instance `color0` collision in + // findGeometryAttribute. draw_id uses `instance_draw_id` + // (uint-typed; required by every shader using per_draws[] in + // Phase 2). + pushAttr(ossia::attribute_semantic::translation, + instTBindIdx, ossia::geometry::attribute::float3); + pushAttr(ossia::attribute_semantic::instance_color0, + instCBindIdx, ossia::geometry::attribute::float4); + pushAttr(ossia::attribute_semantic::instance_draw_id, + instDBindIdx, ossia::geometry::attribute::uint1); + } + + g.vertices = (int)m_mdi.totalVertices; + g.indices = (int)m_mdi.totalIndices; + g.instances = 1; + g.topology = ossia::geometry::triangles; + // glTF doubleSided: pipeline-side culling is OFF for the MDI + // batch. Per-fragment culling is shader-side, driven by each + // material's `feature_mask`: + // - single-sided (no `double_sided` bit): shader discards + // `!gl_FrontFacing` fragments → matches CULL_BACK behaviour. + // - double-sided: shader keeps both sides and flips the surface + // normal for back-facing fragments so lighting works on both. + // Splitting the MDI batch by cull mode would multiply the draw + // count and lose much of the indirect-draw benefit; per-fragment + // gating is the simpler trade. + g.cull_mode = ossia::geometry::none; + g.front_face = ossia::geometry::counter_clockwise; + + g.index.buffer = 6; // Slot order: pos=0, nrm=1, uv=2, tan=3, col=4, uv1=5, idx=6. + g.index.byte_offset = 0; + g.index.format = decltype(g.index)::uint32; + + // filter_tag / filter_material_index are per-geometry metadata + // used by Tier-2 mesh-level filters (FlattenedSceneFilterNode). + // The preprocessor emits ONE geometry per MDI batch spanning many + // materials, so there's no single value that would be meaningful + // here — we stamp 0 so Tier-2 filters either drop or keep the + // whole batch. Per-draw material / tag filtering belongs to a + // Tier-3 compute-shader filter that consumes indirect_draw_cmds + + // per_draws (CSF-based, see docs on scene_filter_* presets). + g.filter_tag = 0; + g.filter_material_index = 0; + + // Attach scene-wide auxiliaries. Shaders pick these up by NAME via + // try_bind_from_geometry, so there's no need for downstream nodes to + // wire every SSBO/UBO manually — the geometry cable already carries + // scene lights / materials / per-draws / indirect / counts / camera + // / env. The names here MUST match the shader's `INPUTS[].NAME`. + const int baseBuf = (int)g.buffers.size(); + // scene_lights → RawLight arena directly (task 28b-shader flip). + // Every classic_pbr_*.frag's Light struct now matches the arena + // layout and the light loop reads + // scene_lights.entries[scene_light_indices.data[i]], composing + // world-space direction from world_transforms[transform_slot]. + { + auto* lightArena + = renderer.registry().buffer(GpuResourceRegistry::Arena::RawLight); + const int64_t lightArenaBytes + = (int64_t)renderer.registry().arenaSlotStride( + GpuResourceRegistry::Arena::RawLight) + * (int64_t)renderer.registry().arenaSlotCount( + GpuResourceRegistry::Arena::RawLight); + g.buffers.push_back(wrapGpu(lightArena, lightArenaBytes)); + } + // scene_materials binding points at the Material arena directly. + // Shader indexes entries[material_index] where material_index is + // the arena slot index (stamped in PerDrawGPU above) and the SSBO + // stride matches sizeof(MaterialGPU) = 80B. Eliminates the + // per-frame CPU-side repack + upload that m_materialsBuffer used + // to carry. + { + auto* matArena + = renderer.registry().buffer(GpuResourceRegistry::Arena::Material); + const int64_t matArenaBytes + = (int64_t)renderer.registry().arenaSlotStride( + GpuResourceRegistry::Arena::Material) + * (int64_t)renderer.registry().arenaSlotCount( + GpuResourceRegistry::Arena::Material); + g.buffers.push_back(wrapGpu(matArena, matArenaBytes)); + } + g.buffers.push_back(wrapGpu(m_materialsExtBuffer, m_materialsExtCap)); + g.buffers.push_back(wrapGpu(m_mdi.per_draws, pdBytes)); + g.buffers.push_back(wrapGpu(m_mdi.indirect_draw_cmds, icBytes)); + g.buffers.push_back(wrapGpu(m_sceneCountsBuffer, sizeof(SceneCountsUBO))); + // Only bind the ACTIVE camera slot (first 240 bytes) — shaders declare + // `uniform camera_t camera` as a single entry, not an array. Slot 0 is + // guaranteed to be the active camera by packAndUploadCameras. + g.buffers.push_back(wrapGpu(m_camerasBuffer, sizeof(CameraUBOData))); + g.buffers.push_back(wrapGpu(m_camerasPrevBuffer, sizeof(CameraUBOData))); + // Env UBO: bind a PREPROCESSOR-owned slot, not any single producer's + // slot. With multi-producer env composition (task #26) the merged + // scene_environment is built field-by-field by merge_scenes from + // every contributing EnvironmentLoader / CubemapLoader — no single + // producer's slot holds the merged result. The preprocessor packs + // the merged CPU-side env into m_envSlot here and consumers bind + // that offset. + m_env_aux_offset = renderer.registry().slotOffset(m_envSlot); + g.buffers.push_back(wrapGpu( + renderer.registry().buffer(GpuResourceRegistry::Arena::Env), + sizeof(EnvParamsUBO))); + // World transforms — arena-slot-indexed (task 28b phase 1). Consumer + // shaders read world_transforms.data[slot_index] for any light / + // particle / compute pass that needs slot-addressable world-space + // composition. Preprocessor-private so multi-filter pipelines don't + // stomp each other. + g.buffers.push_back(wrapGpu( + m_worldTransformsBuffer, m_worldTransformsCap)); + // Previous-frame snapshot of the same layout; consumer shaders + // declare an AUXILIARY / storage input named `world_transforms_prev` + // to read it for motion vectors, TAA, reprojection, etc. + g.buffers.push_back(wrapGpu( + m_worldTransformsPrevBuffer, m_worldTransformsCap)); + // scene_light_indices — compact list of RawLight arena slot indices + // for the scene's live lights (task 28b phase 3). Shader iterates + // 0..scene_counts.light_count, reads + // scene_lights.entries[scene_light_indices.data[i]]. + g.buffers.push_back(wrapGpu( + m_lightIndicesBuffer, m_lightIndicesCap)); + + { + const int64_t lightArenaBytes + = (int64_t)renderer.registry().arenaSlotStride( + GpuResourceRegistry::Arena::RawLight) + * (int64_t)renderer.registry().arenaSlotCount( + GpuResourceRegistry::Arena::RawLight); + g.auxiliary.push_back({ + .name = "scene_lights", .buffer = baseBuf, + .byte_offset = 0, + .byte_size = lightArenaBytes}); + } + { + const int64_t matArenaBytes + = (int64_t)renderer.registry().arenaSlotStride( + GpuResourceRegistry::Arena::Material) + * (int64_t)renderer.registry().arenaSlotCount( + GpuResourceRegistry::Arena::Material); + g.auxiliary.push_back({ + .name = "scene_materials", .buffer = baseBuf + 1, + .byte_offset = 0, + .byte_size = matArenaBytes}); + } + // Parallel to scene_materials — same element count, same indexing. + // OpenPBR-grade shaders bind this as a second SSBO and use the same + // material_index to read the extension struct. + // byte_size = full buffer capacity. The buffer is sized in update() + // to (max_arena_slot + 1) * sizeof(MaterialExtensionsGPU) — see the + // arenaSlotEntries computation there. The shader indexes by + // pd.material_index (arena slot), so the binding extent must cover + // the full arena range. + g.auxiliary.push_back({ + .name = "scene_materials_ext", .buffer = baseBuf + 2, + .byte_offset = 0, + .byte_size = m_materialsExtCap}); + g.auxiliary.push_back({ + .name = "per_draws", .buffer = baseBuf + 3, + .byte_offset = 0, .byte_size = pdBytes}); + g.auxiliary.push_back({ + .name = "indirect_draw_cmds", .buffer = baseBuf + 4, + .byte_offset = 0, .byte_size = icBytes}); + g.auxiliary.push_back({ + .name = "scene_counts", .buffer = baseBuf + 5, + .byte_offset = 0, .byte_size = (int64_t)sizeof(SceneCountsUBO)}); + g.auxiliary.push_back({ + .name = "camera", .buffer = baseBuf + 6, + .byte_offset = 0, .byte_size = (int64_t)sizeof(CameraUBOData)}); + g.auxiliary.push_back({ + .name = "camera_prev", .buffer = baseBuf + 7, + .byte_offset = 0, .byte_size = (int64_t)sizeof(CameraUBOData)}); + g.auxiliary.push_back({ + .name = "env", .buffer = baseBuf + 8, + .byte_offset = (int64_t)m_env_aux_offset, + .byte_size = (int64_t)sizeof(EnvParamsUBO)}); + g.auxiliary.push_back({ + .name = "world_transforms", .buffer = baseBuf + 9, + .byte_offset = 0, + .byte_size = m_worldTransformsCap}); + // Previous-frame snapshot for motion-vector / TAA / reprojection + // shaders. Snapshot is produced in runInitialPasses via a single + // GPU-side copyBuffer; the per-slot writes for the same frame + // are deferred from update() into the next resource-update batch + // so the copy reads the still-frame-N-1 contents of current. + g.auxiliary.push_back({ + .name = "world_transforms_prev", .buffer = baseBuf + 10, + .byte_offset = 0, + .byte_size = m_worldTransformsCap}); + g.auxiliary.push_back({ + .name = "scene_light_indices", .buffer = baseBuf + 11, + .byte_offset = 0, + .byte_size = m_lightIndicesCap}); + + // KHR_texture_transform: per-material per-channel UV transforms. + // Parallel to scene_materials, indexed by material_index. Identity + // transforms for materials without the extension — zero shader cost. + { + const int buf_idx = (int)g.buffers.size(); + g.buffers.push_back(wrapGpu( + m_materialUVTransformsBuffer, m_materialUVTransformsCap)); + g.auxiliary.push_back({ + .name = "scene_material_uv_xforms", .buffer = buf_idx, + .byte_offset = 0, + .byte_size = m_materialUVTransformsCap}); + } + + // per_draw_bounds — sidecar to per_draws, one local-space AABB per + // draw (std430 2×vec4 = 32 B). Consumer: GPU culling shaders + // (scene_filter_aabb_cull.csf and the future HiZ variant) read this + // together with per_draws[i].model to frustum-test each draw and + // rewrite indirect_draw_cmds[i] with indexCount=0 when culled. + { + const int buf_idx = (int)g.buffers.size(); + g.buffers.push_back(wrapGpu(m_mdi.per_draw_bounds, pdbBytes)); + g.auxiliary.push_back({ + .name = "per_draw_bounds", .buffer = buf_idx, + .byte_offset = 0, .byte_size = pdbBytes}); + } + + // shadow_cascades UBO — 544 B, std140. Consumer: classic_pbr_shadowed + // PCF cascade pick + light_view_proj sampling, and the shadow-pass + // depth-only shader's light_view_proj array. Populated from + // scene_state.shadow_cascades (Threedim::ShadowCascadeSetup). Always + // published — when no upstream authored cascades, cascade_count=0 + // signals consumers to skip shadow sampling (the shader-side guard + // already handles this). + if(m_shadowCascadesBuffer) + { + const int buf_idx = (int)g.buffers.size(); + g.buffers.push_back(wrapGpu( + m_shadowCascadesBuffer, (int64_t)sizeof(ShadowCascadesUBO))); + g.auxiliary.push_back({ + .name = "shadow_cascades", .buffer = buf_idx, + .byte_offset = 0, + .byte_size = (int64_t)sizeof(ShadowCascadesUBO)}); + } + + // Attach per-channel material texture arrays + skybox as auxiliary + // textures. Consumer shaders (classic_pbr_textured / classic_pbr_ibl / + // classic_pbr_full) pick them up by NAME through the same + // try_bind_texture_from_geometry mechanism as the buffer auxes above — + // no manual cable required. Null handles are filtered out so a shader + // missing a given channel falls back to its own sampler (emptyTexture). + appendTextureAuxes(g); + + // Mid-pipeline aux injection from InjectBuffer / InjectTexture nodes + // upstream. Name collisions with preprocessor-owned auxes are resolved + // last-wins: we append these AFTER the preprocessor's own entries, and + // consumer-side find_auxiliary / find_auxiliary_texture return the + // LAST match when we pre-remove colliding earlier entries below. + // + // Buffer injections: wrap each handle as a geometry-buffer slot, add + // an auxiliary_buffer entry pointing at it. + if(this->scene.state) + { + for(const auto& ib : this->scene.state->inject_buffers) + { + if(!ib.native_handle || ib.name.empty()) + continue; + // Remove any earlier entry with the same name so the injection + // wins (consumer find_auxiliary returns first-match; easier to + // maintain "last-wins" semantics by purging the earlier one). + auto& aux_list = g.auxiliary; + aux_list.erase( + std::remove_if( + aux_list.begin(), aux_list.end(), + [&](const ossia::geometry::auxiliary_buffer& a) { + return a.name == ib.name; + }), + aux_list.end()); + const int buf_idx = (int)g.buffers.size(); + g.buffers.push_back( + wrapGpu(static_cast(ib.native_handle), ib.byte_size)); + g.auxiliary.push_back( + {.name = ib.name, + .buffer = buf_idx, + .byte_offset = 0, + .byte_size = ib.byte_size}); + } + for(const auto& it : this->scene.state->inject_textures) + { + if(!it.native_handle || it.name.empty()) + continue; + auto& tex_list = g.auxiliary_textures; + tex_list.erase( + std::remove_if( + tex_list.begin(), tex_list.end(), + [&](const ossia::geometry::auxiliary_texture& a) { + return a.name == it.name; + }), + tex_list.end()); + g.auxiliary_textures.push_back( + {.name = it.name, .native_handle = it.native_handle}); + } + } + + // Use the existing indirect_count slot for the draw count — renderers + // that support drawIndexedIndirect pick it up automatically. + // + // drawCount==0 carrier-mesh path: leave indirect_count.handle null + // so CustomMesh::drawSingleMesh skips its indirect-draw branch + // (which would otherwise issue cb.drawIndirect against a buffer + // whose contents weren't uploaded this frame, yielding the + // UINT32_MAX-firstIndex Vulkan validation error). The carrier still + // gets pushed onto m_outputSpec.meshes as a pure aux carrier for + // procedural-only consumers (skybox, fullscreen effects); they read + // the auxiliary list and don't issue an indirect draw themselves. + // Mesh consumers fall through to `cb.draw(0, 0)` — a no-op. + ossia::geometry::gpu_buffer ic_count; + if(!acc.indirectCmds.empty()) + { + ic_count.handle = m_mdi.indirect_draw_cmds; + ic_count.byte_size = icBytes; + } + g.indirect_count = ic_count; + + // CPU-side copy of indirect draw commands for the Qt < 6.12 fallback + // path. CustomMesh::draw iterates these and issues per-command + // drawIndexed calls with the correct firstInstance / baseVertex. + g.cpu_draw_commands.reserve(acc.indirectCmds.size()); + for(const auto& cmd : acc.indirectCmds) + { + g.cpu_draw_commands.push_back({ + .index_or_vertex_count = cmd.indexCount, + .instance_count = cmd.instanceCount, + .first_index_or_vertex = cmd.firstIndex, + .base_vertex = cmd.baseVertex, + .first_instance = cmd.baseInstance}); + } + + auto meshes = std::make_shared(); + meshes->meshes.push_back(std::move(g)); + meshes->dirty_index + = (m_outputSpec.meshes ? m_outputSpec.meshes->dirty_index : 0) + 1; + + m_outputSpec.meshes = std::move(meshes); + if(!m_outputSpec.filters) + m_outputSpec.filters = std::make_shared(); + } + + + // Decode a texture_source to an RGBA8888 QImage. Single-texture-point of + // decode so the rebuild code below can dedupe upstream of JPEG decoding. + // + // Plan 09 S1 path: when `src.content_hash != 0` and an AssetTable is + // available, peek the cache first. On hit: skip decode, return the + // cached QImage directly. On miss: decode, stage into the cache so + // future RenderLists (other outputs, reloads within the session) hit + // without re-decoding. Zero-hash sources (legacy parsers that don't + // populate the hash) always take the decode path. + static QImage decodeTextureSource( + const ossia::texture_source& src, Gfx::AssetTable* cache) + { + if(cache && src.content_hash != 0) + { + if(auto asset = cache->peek(src.content_hash); asset && !asset->image.isNull()) + return asset->image; + } + + std::optional decoded; + if(src.embedded_data && !src.embedded_data->empty()) + { + QByteArray bytes( + reinterpret_cast(src.embedded_data->data()), + (qsizetype)src.embedded_data->size()); + decoded = decodeImageFromMemory( + bytes, QString::fromStdString(src.mime_type)); + } + else if(!src.file_path.empty()) + { + decoded = decodeImageFromPath(QString::fromStdString(src.file_path)); + } + if(decoded && !decoded->image.isNull()) + { + // Stage into the cross-output decode cache so the next + // RenderList / reload hits without re-decoding. Stage is + // idempotent — same hash re-staged is a no-op. + if(cache && src.content_hash != 0) + cache->stage(src.content_hash, decoded->image); + return decoded->image; + } + QImage fallback(1, 1, QImage::Format_RGBA8888); + fallback.fill(Qt::white); + return fallback; + } + + // Build a content fingerprint of the current materials list — keyed on + // material_component::stable_id rather than the raw pointer. Stable + // across producer rebuilds (the producer re-emits a fresh shared_ptr + // with the same id) AND across merge_scenes contributor reshuffles. + // Falls back to the pointer bits when stable_id is zero so un-stamped + // legacy producers still work (just with less-stable semantics). + void computeMaterialsFingerprint(std::vector& out) const + { + out.clear(); + if(!this->scene.state || !this->scene.state->materials) + return; + const auto& mats = *this->scene.state->materials; + out.reserve(mats.size()); + for(const auto& m : mats) + { + if(!m) + { + out.push_back(0); + continue; + } + out.push_back( + m->stable_id != 0 + ? m->stable_id + : reinterpret_cast(m.get())); + } + } + + // (Re)allocate a material-texture channel's array, deduping by + // texture_source pointer so N materials that share one image upload + // ONE layer, not N. Patches fs.materials[i].textureRefs[ch] with the + // packed layer ref for material i. + // + // Call sequence in update(): + // flattenScene → fs.materials ← un-patched, all textureRefs=NONE + // computeMaterialsFingerprint(fp) ← snapshot element ptrs + // rebuildChannel(ch, fp, fs, …) ← dedupes + patches textureRefs[ch] + // diffUpload / uploadStaticBuffer of scene_materials SSBO + // + // `sameMaterialsContent` is the result of comparing `fp` to + // `m_cachedMaterialsFingerprint`, computed once per update() and passed + // in so the ChannelCount rebuildChannel calls each frame don't each + // re-walk the list. + // + // Returns true if the channel's QRhiTexture* was (re)allocated — + // caller uses this to trigger downstream SRB rebinds. + // Walk materials and assign dynamic-slot indices for texture_refs that + // carry a GPU handle without a source. Rebuilt every frame because the + // upstream QRhiTexture* can swap without the material_component pointer + // changing (e.g., video-texture resized mid-stream). Cheap: O(n_mats), + // no uploads. Materials past the slot cap recycle the LRU-oldest slot + // (per resolveDynamicSlot's eviction path); the corresponding shader + // sampler now points at the new texture rather than tex_ref_none. + void rebuildDynamicSlots(MaterialChannel ch) + { + // Dynamic slot maps persist across the registry's lifetime — they + // are NOT cleared per-frame (cleared only in GpuResourceRegistry + // init()/destroy()). resolveDynamicSlot is idempotent on the same + // QRhiTexture* handle, so re-registering during this per-channel + // pass is a no-op for handles that haven't changed and refreshes + // the LRU last-use stamp on hit. Producers (PBRMesh, + // MaterialOverride) calling resolveDynamicSlot before this pass + // agree on the same slot index for the same handle. + if(!this->scene.state || !this->scene.state->materials || !m_registry) + return; + + // Resolve a single dynamic-handle texture_ref into the channel's + // dynamic slot map. Static refs (with a CPU-side `source`) and + // empty refs short-circuit out — only refs carrying a runtime GPU + // handle land here. Idempotent for repeated handle / multi-channel + // routing. + const auto resolve_dyn = [this, ch](const ossia::texture_ref& tref) { + if(tref.source) + return; + if(!tref.texture.valid()) + return; + m_registry->resolveDynamicSlot(toTexChannel(ch), tref.texture.native_handle); + }; + + for(const auto& m : *this->scene.state->materials) + { + if(!m) + continue; + // Main channel ref (the existing path). + if(const auto* tref = channelRef(ch, *m); tref) + resolve_dyn(*tref); + // Ext-table refs whose pool matches this channel. + for(const auto& slot : kExtTextureSlots) + if(slot.channel == ch) + resolve_dyn(slot.accessor(*m)); + } + } + + bool rebuildChannel( + MaterialChannel ch, bool sameMaterialsContent, RenderList& renderer, + QRhiResourceUpdateBatch& res, FlatScene& fs) + { + if(!m_registry) + return false; + auto& rhi = *renderer.state.rhi; + auto& channel = texChannel(ch); + + const auto matsPtr + = this->scene.state ? this->scene.state->materials : nullptr; + + // Dynamic slots refresh every frame regardless of sameMaterialsContent: + // runtime handles can swap without the outer material pointer changing. + rebuildDynamicSlots(ch); + + // Fast path: the per-element materials fingerprint matches what we + // last fingerprinted, and this channel's texture array + layer map + // are still valid. Only need to re-patch textureRefs on fs.materials + // so the SSBO upload below carries the cached layer indices (dynamic + // slots patched from the freshly rebuilt dynamicSlotMap). + if(sameMaterialsContent && channel.primaryArray()) + { + patchMaterialRefsFromCache(ch, fs); + return false; + } + + // Wave 2 S2-shader: multi-bucket texture arrays. Each distinct + // (RGBA8, imageSize) tuple goes into its own bucket. Materials + // reference `tex_ref_static(bucket_id, layer_id)`; patchMaterial- + // RefsFromCache walks buckets[] to emit the correct refs. + // + // Algorithm: + // 1. Clear all buckets' layerMaps (we'll rebuild them). + // 2. Walk materials, decode each unique source up-front, route + // it to `findOrCreateBucket(RGBA8, image.size())`. Layer + // indices are bucket-local. + // 3. For each bucket that changed size/layer-count: reallocate + // its QRhiTextureArray at the right native size. + // 4. Upload decoded images into their assigned (bucket, layer) + // slots — no scaling, sizes already match by construction. + // 5. Ensure bucket 0 always has at least 1 fallback layer so + // the default `baseColorArray` binding stays valid for + // single-bucket-era shaders. + // + // Format axis reserved for future: today every bucket is RGBA8. + // HDR emissive / wide-gamut / compressed formats plug into this + // same mechanism by varying the format argument. + + for(auto& b : channel.buckets) + b.layerMap.clear(); + + // Decoded pending uploads + their target (bucket, layer). + struct PendingLayer + { + int bucket_idx; + int layer_idx; + QImage image; + }; + std::vector pendingUploads; + pendingUploads.reserve(16); + + if(matsPtr) + { + // Process a single static texture_ref into this channel's bucket + // pool. Used uniformly for both the main channel ref and every + // ext-table ref whose `channel` matches `ch` — shared logic + // means new ext slots automatically pick up dedup, decode-fail + // handling, and bucket-cap diagnostics for free. + // + // `is_main_occlusion` enables the glTF MR-r packed-occlusion + // shortcut, which only applies to the main occlusion channel ref + // (an ext texture happening to share a source with MR doesn't + // get short-circuited — semantically distinct field). When the + // shortcut fires we also need the material's MR source pointer + // for the comparison; passed in as `mr_source_for_occ_check`. + const auto register_static_ref + = [&](const ossia::texture_ref& tref, + const ossia::texture_source* mr_source_for_occ_check, + bool is_main_occlusion) { + const auto* s = tref.source.get(); + if(!s) + return; + + // Occlusion-from-MR shortcut: when the material's occlusion + // texture and metallic-roughness texture share a source, the + // shader will read occlusion from MR.r * factor (the canonical + // glTF packing convention) and we don't need to allocate a + // separate occlusion layer for this material. patchMaterial- + // RefsFromCache also short-circuits → tex_ref_none() for the + // occlusion ref, the shader feature_mask bit stays clear, and + // the MR.r path takes over. + if(is_main_occlusion && s == mr_source_for_occ_check) + return; + + // Skip if already mapped in any bucket this walk (same source + // referenced by N materials, or by main + ext slots on the + // same material — single upload shared by all). + for(const auto& b : channel.buckets) + if(b.layerMap.find(s) != b.layerMap.end()) + return; + + // Decode now so we know the native size to pick a bucket. + // AssetTable `peek` may return a cached QImage → zero-cost. + QImage img = decodeTextureSource(*s, renderer.assetTable()); + if(img.isNull()) + return; + + // Heuristic: the decode-failure fallback is a 1×1 image; real + // textures are ≥ 8 px on both axes. Skip bucket assignment on + // clearly-degenerate results so we don't spawn a 1×1 bucket. + if(img.width() < 8 || img.height() < 8) + return; + + // Route to bucket keyed on (format, size, sampler_config). The + // sampler_config split lets per-glTF-texture wrap/filter modes + // be honoured even when several materials share a channel + // array — distinct samplers → distinct buckets, each with its + // own QRhiSampler. For the common case (Sponza, DamagedHelmet, + // most glTFs use a single sampler) this collapses to one + // bucket per (format, size). + auto [b_idx, b_ptr] = channel.findOrCreateBucket( + QRhiTexture::RGBA8, img.size(), tref.sampler); + if(b_idx < 0) + { + qWarning().noquote() + << "ScenePreprocessor: channel" << channelName(ch) + << "hit bucket cap (" + << GpuResourceRegistry::kMaxBuckets + << "); texture_source skipped — shader will see tex_ref_none."; + return; + } + + const int layer = (int)b_ptr->layerMap.size(); + b_ptr->layerMap[s] = layer; + pendingUploads.push_back({b_idx, layer, std::move(img)}); + }; + + const auto register_material_refs + = [&](const ossia::material_component& m) { + const auto* mr_source = m.metallic_roughness_texture.source.get(); + // Main channel ref. + if(const auto* tref = channelRef(ch, m); tref) + register_static_ref(*tref, mr_source, ch == ChannelOcclusion); + // Ext-table refs whose pool matches this channel. + for(const auto& slot : kExtTextureSlots) + if(slot.channel == ch) + register_static_ref(slot.accessor(m), mr_source, false); + }; + for(const auto& m : *matsPtr) + if(m) + register_material_refs(*m); + // Instancer-prototype materials live outside scene_state.materials + // (owned by the prototype mesh_component). Walk them here so their + // textures land in the channel buckets and arenaSlotForMaterial + // can patch resolved refs in the upload pass — see diagnostic 029. + for(const auto& inst_draw : fs.instances) + { + const auto* inst = inst_draw.instance.get(); + if(!inst || !inst->prototype) + continue; + for(const auto& prim : inst->prototype->primitives) + if(const auto* mat = prim.material.get(); mat) + register_material_refs(*mat); + } + } + + // Ensure bucket 0 exists for init-time / shader-binding stability. + // If no material landed in it, ensurePrimary() with default size + // gives a safe fallback target. + if(channel.buckets.empty()) + { + channel.ensurePrimary( + QRhiTexture::RGBA8, + QSize(kChannelLayerSize, kChannelLayerSize)); + } + + // Per-bucket allocate / reallocate. + bool anyReallocated = false; + for(std::size_t bi = 0; bi < channel.buckets.size(); ++bi) + { + auto& b = channel.buckets[bi]; + // At least 1 layer — empty bucket gets a fallback at layer 0. + const int wantLayers = std::max(1, (int)b.layerMap.size()); + if(!b.array || b.layers != wantLayers) + { + if(b.array) + b.array->deleteLater(); + b.array = rhi.newTextureArray( + b.format, wantLayers, b.pixelSize, 1, channelFlags(ch)); + if(b.array) + { + b.array->setName( + QByteArray("ScenePreprocessor::") + channelName(ch) + + '[' + QByteArray::number((int)bi) + ']'); + if(!b.array->create()) + { + delete b.array; + b.array = nullptr; + } + else + { + b.layers = wantLayers; + anyReallocated = true; + } + } + } + + // Per-bucket QRhiSampler. Created on first allocation, kept + // alive across rebuilds (the sampler_config is immutable for a + // bucket — bucket identity includes it). Never recreated unless + // the bucket is destroyed. + if(b.array && !b.sampler) + { + auto wrap_to_qrhi = [](ossia::texture_address_mode m) { + switch(m) + { + case ossia::REPEAT: return QRhiSampler::Repeat; + case ossia::CLAMP_TO_EDGE: return QRhiSampler::ClampToEdge; + case ossia::MIRROR: return QRhiSampler::Mirror; + } + return QRhiSampler::Repeat; + }; + auto filter_to_qrhi = [](ossia::texture_filter f, + QRhiSampler::Filter dflt) { + switch(f) + { + case ossia::NONE: return QRhiSampler::None; + case ossia::NEAREST: return QRhiSampler::Nearest; + case ossia::LINEAR: return QRhiSampler::Linear; + } + return dflt; + }; + // Material textures are always uploaded with a full mip chain + // (TextureLoader.cpp::uploadImageToTexture: MipMapped + + // generateMips on first upload). Force the bucket sampler to + // trilinear-filter that chain: + // - mag/min filter promoted to LINEAR when the loader said + // NONE (NEAREST is preserved — that's an explicit author + // choice, e.g. pixel-art assets). + // - mipmap_mode promoted to LINEAR when the loader said NONE + // (the common case where a glTF declared minFilter=LINEAR + // instead of LINEAR_MIPMAP_LINEAR — without this override + // the GPU only ever samples mip 0 and we get the same + // minification noise the mipmap fix was meant to solve). + auto promote_to_linear + = [](ossia::texture_filter f) -> ossia::texture_filter { + return f == ossia::NONE ? ossia::LINEAR : f; + }; + b.sampler = rhi.newSampler( + filter_to_qrhi(promote_to_linear(b.sampler_config.mag_filter), QRhiSampler::Linear), + filter_to_qrhi(promote_to_linear(b.sampler_config.min_filter), QRhiSampler::Linear), + filter_to_qrhi(promote_to_linear(b.sampler_config.mipmap_mode), QRhiSampler::Linear), + wrap_to_qrhi(b.sampler_config.wrap_s), + wrap_to_qrhi(b.sampler_config.wrap_t)); + b.sampler->setName( + QByteArray("ScenePreprocessor::") + channelName(ch) + "_sampler[" + + QByteArray::number((int)bi) + ']'); + if(!b.sampler->create()) + { + delete b.sampler; + b.sampler = nullptr; + } + else + { + // Sampler swap forces SRB rebind on the consumer side. + anyReallocated = true; + } + } + } + + // Upload real textures into their bucket/layer slots. + for(auto& pu : pendingUploads) + { + auto& b = channel.buckets[pu.bucket_idx]; + if(!b.array) + continue; + QImage img = std::move(pu.image); + if(img.format() != QImage::Format_RGBA8888) + img.convertTo(QImage::Format_RGBA8888); + // Sizes match by construction — no scale needed. + QRhiTextureSubresourceUploadDescription sub(img); + QRhiTextureUploadEntry entry(pu.layer_idx, 0, sub); + res.uploadTexture( + b.array, QRhiTextureUploadDescription({entry})); + } + + // Fallback for empty buckets (no real uploads): drop a neutral + // 1-layer default so the shader's bucket-switch case for this + // bucket doesn't sample undefined memory. + for(std::size_t bi = 0; bi < channel.buckets.size(); ++bi) + { + auto& b = channel.buckets[bi]; + if(!b.array || !b.layerMap.empty()) + continue; + QImage fallback(b.pixelSize, QImage::Format_RGBA8888); + switch(ch) + { + case ChannelBaseColor: fallback.fill(Qt::white); break; + case ChannelEmissive: fallback.fill(Qt::black); break; + // MR / packed-extension fallback: white (1,1,1,1) so per-material + // metallic_factor / roughness_factor / clearcoat_factor / sheen / etc. + // apply via multiplication. A non-white fallback would zero out the + // authored factors (e.g., metallic_factor=1 + no MR texture → black + // metal instead of mirror). + case ChannelMetalRough: fallback.fill(Qt::white); break; + case ChannelNormal: fallback.fill(QColor(128, 128, 255, 255)); break; + default: fallback.fill(Qt::white); break; + } + QRhiTextureSubresourceUploadDescription sub(fallback); + QRhiTextureUploadEntry entry(0, 0, sub); + res.uploadTexture( + b.array, QRhiTextureUploadDescription({entry})); + } + + // `arrayReallocated` is the rebuildChannel return value: when any + // bucket's QRhiTexture* was recreated, downstream SRBs need a + // rebind. Caller threads it through the "auxBuffersChanged" + // flag in update(). + const bool arrayReallocated = anyReallocated; + + // Per-channel diagnostic — tells you bucket count, per-bucket size, + // layer count, and how many sources got dropped. Critical for + // understanding "missing textures" symptoms (e.g. Sponza mat 2 + // dropped because white.png is 4×4, below the <8 px decode floor). + if(buftrace_enabled()) + { + QString detail; + detail.reserve(128); + for(std::size_t bi = 0; bi < channel.buckets.size(); ++bi) + { + const auto& b = channel.buckets[bi]; + detail += QStringLiteral(" b%1=%2x%3×%4") + .arg(bi) + .arg(b.pixelSize.width()) + .arg(b.pixelSize.height()) + .arg(b.layers); + } + BUFTRACE() << "[Channel " << channelName(ch) + << "] buckets=" << channel.buckets.size() + << " pendingUploads=" << pendingUploads.size() + << detail + << " realloc=" << anyReallocated; + } + + patchMaterialRefsFromCache(ch, fs); + return arrayReallocated; + } + + // Walk fs.materials in lockstep with scene.state->materials and set + // textureRefs[ch] from channel's layerMap. Called from both the fast + // path (same materials list) and the rebuild path (materials list + // changed). + void patchMaterialRefsFromCache(MaterialChannel ch, FlatScene& fs) + { + if(!this->scene.state || !this->scene.state->materials || !m_registry) + return; + const auto& mats = *this->scene.state->materials; + const auto& channel = texChannel(ch); + const auto& dynMap = channel.dynamicSlotMap; + const std::size_t n = std::min(fs.materials.size(), mats.size()); + const std::size_t n_ext = std::min(n, fs.material_extensions.size()); + + // Channel 4 (Occlusion) lives in `MaterialGPU::occlusion_textureRef`, + // a single uint32 outside the 4-element textureRefs uvec4 (which + // holds BC/MR/Normal/Em only). Branch out the storage target so we + // don't write OOB into textureRefs[4]. + const auto write_main_ref + = [ch](MaterialGPU& m, uint32_t ref) noexcept { + if(ch == ChannelOcclusion) + m.occlusion_textureRef = ref; + else + m.textureRefs[ch] = ref; + }; + + // Encode a single texture_ref into a packed uint per the + // tex_ref_static / tex_ref_dynamic / tex_ref_none scheme. Looks up + // the dynamic handle in this channel's slotMap first (since GPU + // handles take precedence over CPU sources when both are set — + // mirrors the rebuild walker's order). Static sources are matched + // against the per-bucket layerMap that rebuildChannel populated. + // Returns tex_ref_none() for empty refs OR refs that overflowed + // the dynamic slot cap OR static sources we failed to map (decode + // failure, bucket cap, etc.). + const auto encode_ref = [&](const ossia::texture_ref& tref) -> uint32_t { + // Dynamic path: GPU handle without a CPU source. + if(!tref.source && tref.texture.valid()) + { + // Look up by globalResourceId — see GpuResourceRegistry.cpp's + // resolveDynamicSlot for the recycling-safety rationale. + auto* dynTex + = static_cast(tref.texture.native_handle); + auto it + = dynTex ? dynMap.find(dynTex->globalResourceId()) : dynMap.end(); + return (it != dynMap.end()) + ? tex_ref_dynamic((uint32_t)it->second) + : tex_ref_none(); + } + // Static path: walk this channel's buckets for the source pointer. + if(const auto* s = tref.source.get(); s) + { + for(std::size_t bi = 0; bi < channel.buckets.size(); ++bi) + { + auto it = channel.buckets[bi].layerMap.find(s); + if(it != channel.buckets[bi].layerMap.end()) + return tex_ref_static((uint32_t)bi, (uint32_t)it->second); + } + } + return tex_ref_none(); + }; + + for(std::size_t i = 0; i < n; ++i) + { + // Null-material clear: zero out main + all ext slots mapped to + // this channel so a transient nullptr in mats[i] doesn't leave + // stale refs from the previous frame. + if(!mats[i]) + { + write_main_ref(fs.materials[i], tex_ref_none()); + if(i < n_ext) + for(const auto& slot : kExtTextureSlots) + if(slot.channel == ch) + fs.material_extensions[i].textureRefs[slot.slot] + = tex_ref_none(); + continue; + } + + // ── Main channel ref ────────────────────────────────────────── + // Occlusion-from-MR shortcut (see rebuildChannel above): when + // the source is shared with MR, leave the ref as none so the + // shader takes the MR.r packed-occlusion path. + const auto* main_tref = channelRef(ch, *mats[i]); + const bool occ_packed_in_mr + = (ch == ChannelOcclusion + && main_tref + && main_tref->source + && main_tref->source.get() + == mats[i]->metallic_roughness_texture.source.get()); + write_main_ref( + fs.materials[i], + (main_tref && !occ_packed_in_mr) + ? encode_ref(*main_tref) + : tex_ref_none()); + + // ── Ext-slot refs ───────────────────────────────────────────── + // For each ext slot whose pool is `ch`, encode and write to + // MaterialExtensionsGPU::textureRefs[slot]. Slots whose pool + // ≠ ch are written by other rebuildChannel(ch') passes — over + // ChannelCount calls per frame, every slot mapped in + // kExtTextureSlots gets its turn. + if(i < n_ext) + { + for(const auto& slot : kExtTextureSlots) + { + if(slot.channel != ch) + continue; + fs.material_extensions[i].textureRefs[slot.slot] + = encode_ref(slot.accessor(*mats[i])); + } + } + } + } + + // Append all non-null material-texture channels + skybox to the emitted + // geometry as auxiliary_texture entries. Consumer shaders auto-resolve + // by name (base_color_array / metal_rough_array / normal_array / + // emissive_array / skybox) via try_bind_texture_from_geometry — no + // manual cable required. Null handles are filtered out so a shader + // missing a given channel falls back to its own sampler default. + void appendTextureAuxes(ossia::geometry& g) const + { + if(!m_registry) + return; + for(int i = 0; i < ChannelCount; ++i) + { + auto ch = static_cast(i); + const auto& channel = texChannel(ch); + + // Wave 2 S2-shader: emit one `auxiliary_texture` per live bucket, + // named `` (e.g. `baseColorArray0`, + // `baseColorArray1`, …). Consumer shaders declare matching + // sampler2DArray INPUTS per bucket and switch on the 6-bit + // `bucket` field from MaterialGPU::textureRefs. Capped at + // kMaxBuckets. + // + // Back-compat alias: bucket 0 is ALSO emitted under the + // unsuffixed name `` (e.g. `baseColorArray`). That + // keeps single-bucket-era shaders (classic_pbr, classic_pbr_textured, + // etc.) rendering correctly — they only decode bucket 0's + // layers and ignore the higher bits. Multi-bucket scenes that + // hit a non-zero bucket through one of those shaders will + // render bucket 0's layer in place of the intended bucket + // (visibly wrong); users hitting that path should migrate to + // classic_pbr_full or a ladder-aware preset. Zero overhead for + // single-bucket scenes, which remain the common case. + for(std::size_t bi = 0; bi < channel.buckets.size(); ++bi) + { + auto* tex = channel.buckets[bi].array; + if(!tex) + continue; + // sampler_handle is null when the bucket is the init-time + // fallback (bucket 0 with no real sources). Renderer falls + // back to its own shader-config sampler when null. Real + // material buckets populate the per-bucket sampler in + // rebuildChannel above so per-glTF-texture wrap/filter + // modes propagate end-to-end. + void* sampler_h = static_cast(channel.buckets[bi].sampler); + // Suffixed, always. + g.auxiliary_textures.push_back( + {.name = std::string(channelName(ch)) + + std::to_string((int)bi), + .native_handle = tex, + .sampler_handle = sampler_h}); + // Unsuffixed alias only for bucket 0. + if(bi == 0) + { + g.auxiliary_textures.push_back( + {.name = channelName(ch), + .native_handle = tex, + .sampler_handle = sampler_h}); + } + } + // Dynamic slot textures: one aux entry per used slot, named + // `` (e.g., "baseColorDyn0"). Consumer + // shaders declare matching sampler2D uniforms and branch on the + // textureRefs source bits to pick static array vs dyn sampler. + const auto& dyn = texChannel(ch).dynamicTextures; + const char* dynBase = channelDynBaseName(ch); + for(int s = 0; s < (int)dyn.size(); ++s) + { + if(auto* tex = dyn[s]) + { + g.auxiliary_textures.push_back( + {.name = std::string(dynBase) + std::to_string(s), + .native_handle = tex}); + } + } + } + if(this->scene.state) + { + // Scene-wide environment textures, exposed under well-known aux + // names. Consumer shaders declare matching INPUTS (e.g. + // `{"NAME": "irradiance_map", "TYPE": "cubemap"}`) and the + // existing aux-resolver picks them up over the already-wired + // scene cable. No hidden dataflow: the scene cable is explicit; + // we're just publishing named sub-resources onto it (same + // pattern as skybox, base_color_array, etc.). + const auto& env = this->scene.state->environment; + if(auto* skybox = static_cast( + env.skybox_texture.native_handle)) + { + g.auxiliary_textures.push_back( + {.name = "skybox", .native_handle = skybox}); + } + if(auto* t = static_cast(env.irradiance_map.native_handle)) + { + g.auxiliary_textures.push_back( + {.name = "irradiance_map", .native_handle = t}); + } + if(auto* t = static_cast(env.prefiltered_map.native_handle)) + { + g.auxiliary_textures.push_back( + {.name = "prefiltered_map", .native_handle = t}); + } + if(auto* t = static_cast(env.brdf_lut.native_handle)) + { + g.auxiliary_textures.push_back( + {.name = "brdf_lut", .native_handle = t}); + } + // Shadow-map array lives off scene_state (not environment) since + // it's tied to the shadow_cascades_info authored by + // ShadowCascadeSetup. + if(auto* t = static_cast( + this->scene.state->shadow_cascades.shadow_map_array + .native_handle)) + { + g.auxiliary_textures.push_back( + {.name = "shadow_map_array", .native_handle = t}); + } + } + } + + // Texture outputs have been removed — every material-texture array and + // the skybox now ride along on the Geometry output as auxiliary_texture + // entries. Left in place only to satisfy the virtual override; the + // single remaining output port (Geometry) never takes this path. + QRhiTexture* textureForOutput(const Port& /*output*/) override + { + return nullptr; + } + + // Pack every camera collected by flattenScene into a std140 UBO array. + // Slot 0 is always the active camera; remaining slots are the other + // cameras in insertion order. If the scene has no cameras we synthesize a + // single default entry so downstream shaders always have a valid binding. + // + // Diff-uploads against m_cachedCameras to avoid Dynamic-buffer churn when + // camera parameters don't change frame to frame. + void packAndUploadCameras( + RenderList& renderer, QRhiResourceUpdateBatch& res, const FlatScene& fs) + { + // Per-frame idempotency. update() is dispatched once per outgoing + // edge — running this function more than once in the same frame + // would corrupt camera_prev: the snapshot-before-overwrite step + // (line below) reads m_cachedCameras to seed camera_prev, then + // overwrites m_cachedCameras with the new fresh. A second call + // within the same frame would snapshot the just-overwritten + // (current-frame) data into camera_prev → camera_prev == camera → + // motion = 0 even on real motion frames. RenderList::frame is + // incremented at the end of each renderInternal pass, so it's a + // reliable per-frame token here. + if(m_lastCameraUploadFrame == renderer.frame) + return; + + auto& rhi = *renderer.state.rhi; + // Prefer the scene's explicit render target size when an upstream + // producer (EnvironmentLoader / SetRenderTarget-style node) has + // stamped one — that size is correct for whatever off-screen pass + // this preprocessor drives. Fall back to the RenderList's swap-chain + // size, which is only right for the main window pass. + QSize rsize = renderer.state.renderSize; + if(this->scene.state) + { + const auto& env = this->scene.state->environment; + if((env.params_set & ossia::scene_environment::params_render_target_size) + && env.render_target_size[0] > 0 + && env.render_target_size[1] > 0) + { + rsize = QSize( + (int)env.render_target_size[0], + (int)env.render_target_size[1]); + } + } + + std::vector fresh; + if(fs.cameras.empty()) + { + // Default camera used when no camera is present in the scene. + ossia::camera_component cam{}; + QMatrix4x4 view; + view.lookAt( + QVector3D(0.f, 1.f, 3.f), QVector3D(0.f, 0.f, 0.f), + QVector3D(0.f, 1.f, 0.f)); + CameraUBOData d{}; + packCameraUBO(d, cam, view.inverted(), rsize, 0.f); + fresh.push_back(d); + } + else + { + fresh.reserve(fs.cameras.size()); + // Put the active camera first so shaders that index by 0 pick it up + // without knowing about activeCameraIndex. + const int active = std::max(0, fs.activeCameraIndex); + auto packOne = [&](const FlatScene::CameraEntry& e) { + CameraUBOData d{}; + packCameraUBO(d, *e.component, e.worldTransform, rsize, 0.f); + fresh.push_back(d); + }; + packOne(fs.cameras[(std::size_t)active]); + for(std::size_t i = 0; i < fs.cameras.size(); ++i) + { + if((int)i != active) + packOne(fs.cameras[i]); + } + } + + const int64_t bytes = (int64_t)(fresh.size() * sizeof(CameraUBOData)); + + // Pre-allocate a large enough capacity so the buffer pointer is stable + // across typical scene changes — aux-buffer bindings downstream resolve + // to this QRhiBuffer* at geometry-rebuild time, and growing invalidates + // those bindings. 16 cameras × 240 B = 3840 B covers every realistic + // multi-view case (cubemap = 6, stereo = 2, typical single = 1). + constexpr int64_t kMinCap = 16 * (int64_t)sizeof(CameraUBOData); + const int64_t wantCap = std::max(bytes, kMinCap); + + if(!m_camerasBuffer || m_camerasCap < wantCap) + { + if(m_camerasBuffer) + renderer.releaseBuffer(m_camerasBuffer); + if(m_camerasPrevBuffer) + renderer.releaseBuffer(m_camerasPrevBuffer); + m_camerasBuffer = rhi.newBuffer( + QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, (quint32)wantCap); + m_camerasBuffer->setName("ScenePreprocessor::cameras"); + m_camerasBuffer->create(); + m_camerasPrevBuffer = rhi.newBuffer( + QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, (quint32)wantCap); + m_camerasPrevBuffer->setName("ScenePreprocessor::cameras_prev"); + m_camerasPrevBuffer->create(); + m_camerasCap = wantCap; + m_cachedCameras.clear(); + // Force the upload below to actually run after realloc — the + // freshly created buffers contain garbage and must be filled. + m_lastCameraUploadFrame = -1; + } + + // Upload `camera_prev` from the CPU mirror of what's currently in the + // GPU `camera` buffer (= last frame's content, since we're about to + // overwrite it with `fresh` below). On the first frame m_cachedCameras + // is empty — seed prev with current so MV = 0 (no history snap). + // + // Earlier impl held a separate m_prevCameras shadow that was only + // refreshed on cache MISS, while the prev-buffer upload ran every + // frame. With cache-hit/miss alternation (renderSize toggles, multi- + // producer env-merge order, animation tick != render tick) this left + // camera_prev lagging by 2 frames on the post-hit miss frame — + // GPU camera_prev ended up byte-equal to GPU camera, so motion = 0 + // every other frame and downstream temporal upscalers / reproject + // shaders flickered between correct and zero output. + // + // Mirroring m_worldTransformsPrevBuffer's pattern (snapshot-current- + // before-overwrite) makes the prev semantic a function of the GPU + // buffer's last frame content, not of cache-hit history. Always + // upload current too — the diff-skip saved <4 KB of Dynamic-UBO + // churn per frame and was the source of the bug. + const auto& prevPayload + = m_cachedCameras.empty() ? fresh : m_cachedCameras; + const int64_t prevBytes + = (int64_t)(prevPayload.size() * sizeof(CameraUBOData)); + res.updateDynamicBuffer( + m_camerasPrevBuffer, 0, (quint32)prevBytes, prevPayload.data()); + + res.updateDynamicBuffer(m_camerasBuffer, 0, (quint32)bytes, fresh.data()); + m_cachedCameras = std::move(fresh); + m_lastCameraUploadFrame = renderer.frame; + + // The camera UBO isn't exposed on an external output port anymore — + // it rides along on the geometry as the `camera` auxiliary buffer + // (attached in rebuildMDI), so try_bind_from_geometry resolves the + // shader's `uniform camera` input by name without a dedicated cable. + } + + void update(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge*) override + { + // Re-flatten when the CONTENT actually changed, not just when a push + // occurred this frame. Producers (glTF/FBX loaders, Light) + // now re-push every frame so that multi-source scenes stay consistent + // across frames; the merge cache in NodeRenderer keeps the resulting + // scene_state shared_ptr stable when no input changed. That makes the + // pointer + version check a reliable "did the content change" test, + // and we can skip the sceneChanged forced-rebuild entirely. + bool needsRebuild = !m_outputSpec.meshes; + if(this->scene.state.get() != m_cachedSceneState) + needsRebuild = true; + if(this->scene.state && this->scene.state->version != m_cachedVersion) + needsRebuild = true; + + // Always refresh the camera UBOs every frame, regardless of whether + // mesh-rebuild fires. Decoupling camera updates from the rebuild gate + // is required for motion-vector reprojection to be correct: + // + // * "Camera moves, then stops": without per-frame upload, the last + // rebuild leaves camera_prev = old, camera = new in the GPU UBOs. + // scene_state stops bumping its version → no further rebuild → + // UBOs frozen at the motion-in-progress state → motion-vector + // consumers see ghost motion forever after the camera stopped. + // + // * "Static camera + animated geometry": some scene producers bump + // scene_state.version on transform changes, others don't. If the + // gate misses, the camera UBO never updates even when the camera + // does change. Always running packAndUploadCameras here makes + // motion-vector correctness independent of which producer is in + // play. + // + // packAndUploadCameras synthesises a default camera when fs.cameras + // is empty, so this runs unconditionally — keeps m_camerasBuffer + // allocated and bound even when no scene producer is wired yet. + // + // Per-frame guard (threedim#12): update() is dispatched once per + // outgoing edge, and packAndUploadCameras already early-returns when + // it has already run this frame (m_lastCameraUploadFrame == + // renderer.frame). But the flattenScene() feeding it is NOT free — it + // packs every material, runs skeleton FK and allocates a shared_ptr + // wrapper per primitive — so running it once per edge wastes that work + // on edges 2..K whose packAndUploadCameras is a no-op anyway. Gate the + // whole camera flatten+upload on the same per-frame token so it runs at + // most once per frame regardless of edge count. + if(m_lastCameraUploadFrame != renderer.frame) + { + FlatScene cameraFs; + flattenScene(this->scene, cameraFs, /*aspectRatio=*/1.f); + packAndUploadCameras(renderer, res, cameraFs); + } + + if(!needsRebuild) + { + // Still consume the sceneChanged flag so we don't loop on it forever. + this->sceneChanged = false; + return; + } + + BUFTRACE() << "ScenePreprocessor::update REBUILD cached_state=" + << (const void*)m_cachedSceneState + << " cached_ver=" << (qint64)m_cachedVersion + << " new_state=" << (void*)this->scene.state.get() + << " new_ver=" + << (this->scene.state ? (qint64)this->scene.state->version : (qint64)-1) + << " mdi_indices=" + << (void*)(m_registry ? m_registry->meshStreamBuffer( + GpuResourceRegistry::MeshStream::Indices) : nullptr) + << " (downstream shader bindings still reference the " + "pre-rebuild MDI buffers until the next acquireMesh)"; + + // Walk the scene. flattenScene is O(nodes) — cheap compared to any + // GPU upload — so we always do it. The expensive work (vertex/index + // concat + upload) is then gated by the mesh fingerprint below. + { + FlatScene fs; + flattenScene(this->scene, fs, /*aspectRatio=*/1.f); + + std::vector materialTagHashes; + if(this->scene.state && this->scene.state->materials) + { + const auto& mats = *this->scene.state->materials; + materialTagHashes.reserve(mats.size()); + for(const auto& m : mats) + materialTagHashes.push_back( + m ? (uint32_t)ossia::hash_string(m->tag) : 0u); + } + + // Allocate Material arena slots for every loader material (materials + // entering the scene without a live producer's raw_slot) + upload + // MaterialGPU bytes. Producer-authored materials already have valid + // slots kept fresh by their own update(); we skip those here. + // Slot allocation persists across frames via m_loaderMaterialSlots — + // cheap cache hit for scenes that don't change. When a material + // disappears (removed from scene_state.materials), its slot is + // reclaimed by the garbage-collection pass below. + if(this->scene.state && m_registry) + { + const std::vector empty_mats; + const auto& mats = this->scene.state->materials + ? *this->scene.state->materials + : empty_mats; + ossia::hash_set seen; + seen.reserve(mats.size() + fs.instances.size()); + const auto register_loader_material + = [&](const ossia::material_component* mat) { + if(!mat) + return; + seen.insert(mat); + // Producer-authored material: its own update() maintains the + // slot contents every frame. Skip. + if(m_registry->isLive(mat->raw_slot)) + return; + // Loader material: allocate a slot on first sight, upload + // packed MaterialGPU bytes. No per-frame re-upload: loader + // materials are immutable between file-loads, so the slot + // bytes we wrote on first sight are still valid. + auto [it, inserted] + = m_loaderMaterialSlots.emplace(mat, GpuResourceRegistry::Slot{}); + if(inserted) + { + it->second = m_registry->allocate( + GpuResourceRegistry::Arena::Material, sizeof(MaterialGPU)); + // No upload here — textureRefs aren't resolved yet. The + // upload happens after the rebuildChannel loop, once the + // per-channel layerMaps know which source lands on which + // layer. Arena-full case: the GC pass below drops the + // invalid entry on the next material list change. + } + }; + for(const auto& mat_ptr : mats) + register_loader_material(mat_ptr.get()); + // Instancer prototypes carry their own material_component + // pointers that aren't in scene_state.materials (they're owned + // by the prototype mesh_component). Without registering them + // here, arenaSlotForMaterial(prim.material) falls back to slot + // 0 (the seedDefaults white-dielectric) and every loader-built + // instance group renders with that default — see diagnostic 029. + for(const auto& inst_draw : fs.instances) + { + const auto* inst = inst_draw.instance.get(); + if(!inst || !inst->prototype) + continue; + for(const auto& prim : inst->prototype->primitives) + register_loader_material(prim.material.get()); + } + // Garbage-collect slots whose materials disappeared from the + // scene. Scanning after the allocation pass ensures entries + // still present are kept. + for(auto it = m_loaderMaterialSlots.begin(); + it != m_loaderMaterialSlots.end();) + { + if(seen.find(it->first) == seen.end()) + { + if(it->second.valid()) + m_registry->free(it->second); + it = m_loaderMaterialSlots.erase(it); + } + else + { + ++it; + } + } + } + + // Build / refresh every material-texture channel AND patch + // fs.materials[i].textureRefs[ch] with the assigned layer indices. + // Must happen before the scene_materials SSBO upload below so + // materials are written with the right refs. + // + // Each channel has its own QRhiTextureArray (sRGB for base color + // & emissive, linear for MR & normal — see channelFlags). When a + // channel's QRhiTexture* gets reallocated (layer count grew, …) + // the emitted auxiliary_texture entry's native_handle changes — + // downstream's rebindAuxTextures picks that up via the per-frame + // geometry lookup, but ONLY if downstream's geometryChanged fires, + // which requires a fresh meshes shared_ptr. Roll the realloc + // signal into the same `auxBuffersChanged` flag the SSBO-grow path + // uses: rebuildMDI() rebuilds the meshes vector every time that + // flag fires, giving the downstream a pointer identity change. + // + // Fingerprint the materials list once and pass the equality result + // to each channel so we don't re-walk the list ChannelCount times. + std::vector fingerprint; + computeMaterialsFingerprint(fingerprint); + // Append prototype-material identity into the fingerprint so a + // prototype-only change (model swap, variant select) re-triggers + // the channel rebuild + upload below. + for(const auto& inst_draw : fs.instances) + { + const auto* inst = inst_draw.instance.get(); + if(!inst || !inst->prototype) + continue; + for(const auto& prim : inst->prototype->primitives) + { + const auto* mat = prim.material.get(); + fingerprint.push_back( + mat + ? (mat->stable_id != 0 + ? mat->stable_id + : reinterpret_cast(mat)) + : 0u); + } + } + const bool sameMaterialsContent + = (fingerprint == m_cachedMaterialsFingerprint); + + bool channelReallocated = false; + for(int i = 0; i < ChannelCount; ++i) + { + if(rebuildChannel( + static_cast(i), sameMaterialsContent, + renderer, res, fs)) + channelReallocated = true; + } + if(!sameMaterialsContent) + m_cachedMaterialsFingerprint = std::move(fingerprint); + + // Loader-material arena slot upload: now that rebuildChannel has + // patched fs.materials[i].textureRefs with the resolved per-channel + // layer indices, stream each loader material's packed MaterialGPU + // bytes into its Material arena slot. Producer-authored materials + // (PBRMesh, MaterialOverride-if-migrated, CSF mesh producers) keep + // their own slot fresh in their update() hooks — we skip those. + // + // Uploads happen only when the materials content actually changed + // (sameMaterialsContent==false) OR when a channel reallocated and + // shifted layer indices. Steady-state frames with an unchanged + // scene touch zero bytes here. + if(m_registry && this->scene.state + && (!sameMaterialsContent || channelReallocated)) + { + const std::vector empty_mats; + const auto& mats = this->scene.state->materials + ? *this->scene.state->materials + : empty_mats; + const std::size_t n + = std::min(fs.materials.size(), mats.size()); + for(std::size_t i = 0; i < n; ++i) + { + const auto* mat = mats[i].get(); + if(!mat) + continue; + if(m_registry->isLive(mat->raw_slot)) + continue; // producer-authored — slot owned by producer + auto it = m_loaderMaterialSlots.find(mat); + if(it == m_loaderMaterialSlots.end() || !it->second.valid()) + continue; + m_registry->updateSlot( + res, it->second, &fs.materials[i], sizeof(MaterialGPU)); + } + // Instancer-prototype materials registered above also need + // their MaterialGPU bytes uploaded — they aren't in + // fs.materials so we pack on the fly. textureRefs come from the + // rebuildChannel walk (which now also visits prototype + // materials) so dedup with channel buckets is preserved. + ossia::hash_set uploaded; + uploaded.reserve(mats.size() + fs.instances.size()); + for(const auto& mp : mats) + if(mp) + uploaded.insert(mp.get()); + for(const auto& inst_draw : fs.instances) + { + const auto* inst = inst_draw.instance.get(); + if(!inst || !inst->prototype) + continue; + for(const auto& prim : inst->prototype->primitives) + { + const auto* mat = prim.material.get(); + if(!mat) + continue; + if(!uploaded.insert(mat).second) + continue; // shared with scene material or another prim + if(m_registry->isLive(mat->raw_slot)) + continue; + auto it = m_loaderMaterialSlots.find(mat); + if(it == m_loaderMaterialSlots.end() || !it->second.valid()) + continue; + MaterialGPU packed = packMaterial(*mat); + // Patch textureRefs from the per-channel buckets. Mirrors + // patchMaterialRefsFromCache but inline since prototype + // materials aren't in fs.materials. + for(int chi = 0; chi < ChannelCount; ++chi) + { + const auto ch = static_cast(chi); + const auto& channel = texChannel(ch); + uint32_t ref = tex_ref_none(); + if(const auto* tref = channelRef(ch, *mat); tref) + { + if(!tref->source && tref->texture.valid()) + { + // Stable-id keyed (GpuResourceRegistry.cpp). + auto* dynTex = static_cast( + tref->texture.native_handle); + auto dit = dynTex + ? channel.dynamicSlotMap.find( + dynTex->globalResourceId()) + : channel.dynamicSlotMap.end(); + if(dit != channel.dynamicSlotMap.end()) + ref = tex_ref_dynamic((uint32_t)dit->second); + } + else if(const auto* s = tref->source.get(); s) + { + for(std::size_t bi = 0; bi < channel.buckets.size(); ++bi) + { + auto bit = channel.buckets[bi].layerMap.find(s); + if(bit != channel.buckets[bi].layerMap.end()) + { + ref = tex_ref_static( + (uint32_t)bi, (uint32_t)bit->second); + break; + } + } + } + } + if(ch == ChannelOcclusion) + packed.occlusion_textureRef = ref; + else + packed.textureRefs[chi] = ref; + } + m_registry->updateSlot( + res, it->second, &packed, sizeof(MaterialGPU)); + } + } + } + + // Ensure the scene-wide SSBOs exist at a large-enough capacity. Only + // allocates / resizes when the count grew past the current cap; the + // common steady-state case is a no-op. + // + // Both `scene_materials_ext` and `scene_material_uv_xforms` are + // indexed by Material ARENA SLOT in the shader (shader does + // `entries[pd.material_index]` where pd.material_index is the + // arena slot, parallel to `scene_materials` which IS the arena). + // Their CPU side must therefore be sized + filled by arena slot + // too, NOT by fs.materials position. See the freshMaterialUVTransforms + // build below for the same arena-slot-indexed pattern. + uint32_t maxArenaSlot = 0; + if(this->scene.state && this->scene.state->materials) + { + for(const auto& m : *this->scene.state->materials) + { + if(!m) + continue; + maxArenaSlot + = std::max(maxArenaSlot, arenaSlotForMaterial(m.get())); + } + } + // Instancer / loader prototype materials are NOT in + // scene.state->materials but DO get an arena slot via + // m_loaderMaterialSlots (registered above), and their slot is what + // arenaSlotForMaterial() — hence PerDrawGPU.material_index — resolves + // to for those draws. If such a slot exceeds the scene-material max, + // the shader's `scene_materials_ext[material_index]` / + // `uv_xforms[material_index]` would read past the bound aux range + // (threedim#11). Fold those slots into the extent so the aux buffers + // are sized to cover every reachable material_index. + for(const auto& [mat, slot] : m_loaderMaterialSlots) + { + if(slot.valid()) + maxArenaSlot = std::max(maxArenaSlot, slot.slot_index); + } + const std::size_t arenaSlotEntries + = (std::size_t)maxArenaSlot + 1; + const int64_t matsExtBytes + = std::max( + 16, + (int64_t)arenaSlotEntries * sizeof(MaterialExtensionsGPU)); + auto& rhi = *renderer.state.rhi; + // Track buffer-pointer churn: when grow reallocates any aux buffer we + // MUST republish m_outputSpec.meshes so downstream's SRB rebinds to + // the new pointer. Otherwise the sink keeps its old aux.buffer + // (released via RenderList::releaseBuffer) and reads undefined memory. + // Channel-array reallocation also counts as an aux change for the + // purposes of bumping the mesh identity downstream — see the + // rebuildChannel call above. + bool auxBuffersChanged = channelReallocated; + // Returns true on (re)allocation. Same prefix-staleness invariant + // as the static growBuf above: callers MUST clear the matching + // diffUpload mirror on `true` so the new (uninitialised) buffer + // gets the full fresh contents instead of just the appended tail. + // Also zero-fills the freshly allocated buffer (Vulkan does NOT + // zero VkBuffers on creation — sparse-uploaded SSBOs would + // otherwise read garbage from device-memory pages). + auto grow = [&](QRhiBuffer*& buf, int64_t& cap, int64_t need, const char* nm) { + if(buf && cap >= need) return false; + int64_t newCap = cap > 0 ? cap : 16; + while(newCap < need) newCap *= 2; + if(buf) renderer.releaseBuffer(buf); + buf = rhi.newBuffer(QRhiBuffer::Static, QRhiBuffer::StorageBuffer, newCap); + buf->setName(nm); + buf->create(); + // Zero-fill via the thread-local zero pool (see RhiClearBuffer.hpp). + RhiClearBuffer::clearBuffer(rhi, res, buf, 0, (quint32)newCap); + cap = newCap; + auxBuffersChanged = true; + return true; + }; + // scene_lights now points at the RawLight arena (fixed capacity) + // and scene_materials points at the Material arena — no grow here + // for either. + // Realloc → clear the diffUpload mirror (lines 4740 / 4742) so the + // freshly-allocated GPU buffer's prefix isn't left as garbage. + // Same prefix-staleness invariant as growBuf — see its comment. + if(grow(m_materialsExtBuffer, m_materialsExtCap, matsExtBytes, + "ScenePreprocessor::materials_ext")) + m_cachedMaterialExt.clear(); + + // Per-material UV transforms (KHR_texture_transform). Sized by + // arena-slot count (see comment above scene_materials_ext); the + // freshMaterialUVTransforms vector built below uses the same + // indexing. + const int64_t uvXformBytes + = std::max( + 16, + (int64_t)arenaSlotEntries * sizeof(MaterialUVTransformGPU)); + if(grow(m_materialUVTransformsBuffer, m_materialUVTransformsCap, uvXformBytes, + "ScenePreprocessor::material_uv_xforms")) + m_cachedMaterialUVTransforms.clear(); + // scene_light_indices: compact uint array of arena slot indices. + // Count the lights with valid arena slots (filter out 0xFFFFFFFF + // sentinels from producer-less lights). + std::vector freshLightIndices; + freshLightIndices.reserve(fs.lightArenaSlots.size()); + for(uint32_t s : fs.lightArenaSlots) + if(s != 0xFFFFFFFFu) + freshLightIndices.push_back(s); + // 16 KiB floor (= 4096 light index slots) so override CSFs like + // pack_lights_from_points / wander_lights_inline / grid_lights_inline + // can publish up to 4k procedural lights without OOB-clamping + // themselves to the scene-graph-derived size. RawLight arena + // (GpuResourceRegistry::Arena::RawLight, currently 4096 slots) is + // the matching ceiling — keep the two values consistent: this + // floor must equal arena_slot_count * 4 bytes. If you bump one + // without the other, either (a) procedural CSFs hit the lower + // bound and clamp early, or (b) scene_light_indices references + // slot indices past the arena size and rasterizers read garbage. + const int64_t lightIdxBytes + = std::max(16384, (int64_t)freshLightIndices.size() * 4); + if(grow(m_lightIndicesBuffer, m_lightIndicesCap, lightIdxBytes, + "ScenePreprocessor::light_indices")) + m_cachedLightIndices.clear(); + + // Allocate the scene_counts buffer once (16 bytes, never grows). + // + // Usage: Static + StorageBuffer (SSBO-only). + // + // Historical context: this buffer used to be allocated as + // UniformBuffer | StorageBuffer to satisfy a dual-bind contract — + // rasterizers declared `scene_counts` with TYPE: "uniform" (UBO + // bind) while override CSFs (pack_lights_from_points etc.) + // declared the same name with ACCESS: "read_write" (SSBO bind). + // QRhi forbids Dynamic + StorageBuffer, so the buffer had to be + // Static. But D3D11 / GLES don't support NonDynamicUniformBuffers + // — `Static + UniformBuffer` fails create() silently there, and + // the override-CSF write pattern was unreachable on every desktop + // backend except Vulkan / Metal / D3D12. + // + // Resolution: drop the UBO half entirely. All bundled shaders + // (presets/rasterizers/*.frag, presets/filters/*.csf, + // presets/lighting/*.csf, presets/volumetric/*.csf) declare + // `scene_counts` as a storage buffer. Rasterizers (top-level + // INPUTS) declare it with `TYPE: "storage", ACCESS: "read_only"` + // → parser emits `layout(std430) readonly buffer scene_counts_buf + // { ... } scene_counts;`. Filters / lighting / volumetric (nested + // AUXILIARY, where SSBO is the default kind) just need + // `ACCESS: "read_only"` to get the readonly qualifier on the + // emitted block. Override-CSFs that write the buffer keep their + // `ACCESS: "read_write"` declaration as-is. + // + // The shader-side access pattern `scene_counts.light_count` is + // identical against UBO or SSBO declarations; std140 vs std430 + // layouts agree on a 4-uint struct (16 bytes, no padding either + // way). + // + // Advanced users writing their own shaders MAY still declare + // `TYPE: "uniform"` for `scene_counts` — the parser supports it + // — but they're responsible for ensuring the target backend + // supports the resulting non-dynamic UBO bind. Bundled shaders + // avoid it so they work on every backend. + if(!m_sceneCountsBuffer) + { + m_sceneCountsBuffer = rhi.newBuffer( + QRhiBuffer::Static, QRhiBuffer::StorageBuffer, + sizeof(SceneCountsUBO)); + m_sceneCountsBuffer->setName("ScenePreprocessor::scene_counts"); + m_sceneCountsBuffer->create(); + // Zero-fill: Vulkan doesn't initialise VkBuffer memory. Until + // the first scene_counts upload (gated below on actual count + // changes), shaders reading scene_counts.light_count etc. would + // see device-memory garbage — wildly different per resize as the + // freshly allocated buffer lands on a different memory page. + // SceneCountsUBO is a POD-of-uint32 — the all-zeros pattern + // matches its default-constructed state. + RhiClearBuffer::clearBuffer( + rhi, res, m_sceneCountsBuffer, 0, sizeof(SceneCountsUBO)); + } + + // Allocate the shadow_cascades UBO once (544 B, never grows). Lazy: + // only materialise the buffer when a scene actually authors cascades + // — the vast majority of scenes without shadow-receiving rasterizers + // pay zero GPU memory for this path. + if(!m_shadowCascadesBuffer) + { + m_shadowCascadesBuffer = rhi.newBuffer( + QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, + sizeof(ShadowCascadesUBO)); + m_shadowCascadesBuffer->setName("ScenePreprocessor::shadow_cascades"); + m_shadowCascadesBuffer->create(); + // Zero-fill so a no-shadow-cascade scene reads cascade_count=0 + // (the shader's "skip shadow sampling" sentinel) instead of + // device-memory garbage on the first frame after a fresh + // RenderList. RhiClearBuffer auto-routes Dynamic UBOs through + // chunked updateDynamicBuffer (cap 65535 B per call); 560 B + // here fits in a single chunk. + RhiClearBuffer::clearBuffer( + rhi, res, m_shadowCascadesBuffer, 0, sizeof(ShadowCascadesUBO)); + } + + // Camera UBO upload moved to the top of update() so it runs every + // frame, decoupled from the mesh-rebuild gate (motion vectors need + // per-frame camera_prev refresh; see comment at the head of + // update()). The QRhiBuffer pointer is allocated on first call + // there, so by the time rebuildMDI runs below, m_camerasBuffer is + // non-null and ready to be attached as an aux on the emitted + // geometry — same contract as before. + + // Pack the MERGED scene_environment into our own Env arena slot. + // merge_scenes composes contributions from every EnvironmentLoader + // / CubemapLoader / future IBL-precompute producer field-by-field + // via the `params_set` bitmask, so this->scene.state->environment + // holds the final composed state. Individual producer Env slots + // still get written by those producers (they're POSTing their + // own contribution for any future consumer wanting per-producer + // data), but the scene_environment binding goes to our slot. + if(m_registry && m_envSlot.valid() && this->scene.state) + { + const auto& env = this->scene.state->environment; + EnvParamsUBO gpu{}; + gpu.ambient[0] = env.ambient_color[0]; + gpu.ambient[1] = env.ambient_color[1]; + gpu.ambient[2] = env.ambient_color[2]; + gpu.ambient[3] = env.ambient_intensity; + gpu.fog_color_density[0] = env.fog.color[0]; + gpu.fog_color_density[1] = env.fog.color[1]; + gpu.fog_color_density[2] = env.fog.color[2]; + gpu.fog_color_density[3] = env.fog.density; + gpu.fog_range[0] = env.fog.start; + gpu.fog_range[1] = env.fog.end; + gpu.fog_range[2] = float(env.fog.mode); + gpu.fog_range[3] = env.fog.enabled ? 1.f : 0.f; + gpu.exposure_gamma[0] = env.exposure; + gpu.exposure_gamma[1] = env.gamma; + gpu.exposure_gamma[2] = 0.f; + gpu.exposure_gamma[3] = 0.f; + if(!m_envSlotSeeded + || std::memcmp(&gpu, &m_lastEnvUpload, sizeof(EnvParamsUBO)) != 0) + { + m_registry->updateSlot(res, m_envSlot, &gpu, sizeof(gpu)); + m_lastEnvUpload = gpu; + m_envSlotSeeded = true; + } + } + + // Upload this preprocessor's private world-transforms buffer. + // Per-preprocessor (not a shared registry arena) because two + // preprocessors consuming different filtered views of the same + // source scene legitimately compute different world matrices + // for the same scene_transform — a shared arena would have them + // stomp. Layout: indexed by the RawTransform arena slot index + // (not walk order). Consumer shaders / compute passes read + // `world_transforms.data[slot_index]` for any light / particle / + // effect that needs slot-addressable world-space composition. + { + auto& rhi = *renderer.state.rhi; + // Size to the full RawTransform arena capacity — sparse, but + // bounded (16384 slots × 64 B = 1 MiB). Slot-indexed lookup + // gives O(1) addressing without a per-frame translation table. + const uint32_t xform_slot_count + = renderer.registry().arenaSlotCount( + GpuResourceRegistry::Arena::RawTransform); + const int64_t want_bytes + = (int64_t)xform_slot_count * (int64_t)sizeof(WorldTransformMat4); + if(!m_worldTransformsBuffer || m_worldTransformsCap < want_bytes) + { + if(m_worldTransformsBuffer) + renderer.releaseBuffer(m_worldTransformsBuffer); + if(m_worldTransformsPrevBuffer) + renderer.releaseBuffer(m_worldTransformsPrevBuffer); + // QRhi forbids Dynamic + StorageBuffer — the SSBO path is + // host-coherent differently from a Dynamic UBO's per-frame + // rotation. Static + uploadStaticBuffer is the correct pair. + m_worldTransformsBuffer = rhi.newBuffer( + QRhiBuffer::Static, QRhiBuffer::StorageBuffer, (quint32)want_bytes); + m_worldTransformsBuffer->setName("ScenePreprocessor::world_transforms"); + m_worldTransformsBuffer->create(); + // Prev buffer: same shape as current, sampled alongside it + // as the `world_transforms_prev` aux for motion-vector / + // TAA / reprojection shaders. Populated each frame by a + // single GPU-side copyBuffer in runInitialPasses — see + // m_worldTransformsPrevBuffer doc for the deferred-write + // ordering that keeps the copy reading frame-N-1 data. + m_worldTransformsPrevBuffer = rhi.newBuffer( + QRhiBuffer::Static, QRhiBuffer::StorageBuffer, (quint32)want_bytes); + m_worldTransformsPrevBuffer->setName( + "ScenePreprocessor::world_transforms_prev"); + m_worldTransformsPrevBuffer->create(); + // Zero-fill both buffers. world_transforms is sparse — + // only slots used by actual scene_transforms get written, + // unused arena slots stay at their initial value. After a + // fresh RenderList (resize), Vulkan hands us a VkBuffer with + // device-memory garbage; any consumer indexing + // world_transforms.data[L.transform_slot] for a slot the + // producer hasn't populated reads garbage. Lights end up + // with non-deterministic world positions per resize → the + // user's "wildly different lighting on every resize" + // symptom. + // + // _prev: the runInitialPasses copyBuffer(current → prev) on + // the first post-resize frame would otherwise propagate the + // current buffer's garbage into prev for any shader sampling + // world_transforms_prev. + // + // RhiClearBuffer's batch variant pulls from the thread-local + // zero pool — both 1 MiB clears reuse the same backing + // vector (no per-buffer allocation). + RhiClearBuffer::clearBuffer( + rhi, res, m_worldTransformsBuffer, 0, (quint32)want_bytes); + RhiClearBuffer::clearBuffer( + rhi, res, m_worldTransformsPrevBuffer, 0, (quint32)want_bytes); + m_worldTransformsCap = want_bytes; + } + // Sparse upload: one small write per scene_transform. Typical + // scene has 1-50 transforms, so this is cheaper than packing + // into a contiguous staging buffer. The arena-slot offsets + // naturally cluster at the low indices (free-list LIFO stack + // pops 0, 1, 2, … first) so uploads are cache-friendly. + // + // The actual uploadStaticBuffer is DEFERRED to runInitialPasses + // so the prev-snapshot copyBuffer (which runs ahead of the + // submitted writes) reads frame N-1 contents of current. Here + // we just stash (slot, matrix) pairs; runInitialPasses drains + // the list into the post-snapshot resource batch. + m_pendingWorldXformWrites.clear(); + m_pendingWorldXformWrites.reserve(fs.worldTransforms.size()); + for(const auto& wt : fs.worldTransforms) + { + WorldTransformMat4 m; + writeMat4(m.m, wt.world); + m_pendingWorldXformWrites.emplace_back(wt.transform_slot, m); + } + } + + // Pack per-draw data once (cheap — just struct copy per draw). + // `pd.material_index` is the Material-arena slot index (task 28a) + // resolved by arenaSlotForMaterial(); shaders read + // `scene_materials.entries[material_index]` directly against the + // registry's Material arena. rebuildMDI() uses the same helper + // on the full-rebuild path so the encoding is consistent. + // + // `pd.transform_slot` + `pd.skeleton_offset` + per_draw_bounds are + // packed in lockstep with the other fields; fast path stays cheap + // (one struct copy + one aabb copy per draw) and keeps the per_draw_bounds + // sidecar in sync with per_draws for downstream culling CSFs. + std::vector fastSkinJointOffsets; + fastSkinJointOffsets.reserve(fs.skins.size()); + { + uint32_t running = 0; + for(const auto& sk : fs.skins) + { + fastSkinJointOffsets.push_back(running); + running += (uint32_t)sk.joint_matrices.size(); + } + } + + std::vector freshPerDraws; + std::vector freshPerDrawBounds; + freshPerDraws.reserve(fs.draws.size()); + freshPerDrawBounds.reserve(fs.draws.size()); + for(const auto& dc : fs.draws) + { + // Mirror emitDraw's skip predicate exactly (threedim#3): a draw with + // no usable positions, or with GPU-backed indices, is dropped by + // rebuildMDI and therefore occupies NO per_draws slot. Filtering the + // fast-path mirror only by `vertices > 0` would keep such draws and + // shift every following slot, so diffUpload would write a draw's + // model matrix into its neighbour's GPU slot. + if(!dc.mesh || dc.mesh->vertices <= 0 || !m_registry) + continue; + if(!meshEmitsDraw(*dc.mesh)) + continue; + PerDrawGPU pd{}; + writeMat4(pd.model, dc.worldTransform); + QMatrix4x4 nm = dc.worldTransform.inverted().transposed(); + nm.setColumn(3, QVector4D(0, 0, 0, 1)); + nm.setRow(3, QVector4D(0, 0, 0, 1)); + writeMat4(pd.normal, nm); + pd.material_index = arenaSlotForMaterial(dc.material.get()); + // tag_hash still keyed on the scene-material index (CPU-only + // per-pass filter — not shader-visible as material identity). + pd.tag_hash + = (dc.materialIndex >= 0 + && (std::size_t)dc.materialIndex < materialTagHashes.size()) + ? materialTagHashes[dc.materialIndex] + : 0u; + pd.transform_slot = dc.transform_slot; + pd.skeleton_offset + = (dc.skinIndex >= 0 + && (std::size_t)dc.skinIndex < fastSkinJointOffsets.size()) + ? fastSkinJointOffsets[dc.skinIndex] + : 0xFFFFFFFFu; + freshPerDraws.push_back(pd); + freshPerDrawBounds.push_back(packBounds(dc.local_bounds)); + } + + // Mesh fingerprint: the sequence of DrawCall::stable_id's — the + // addresses of the source mesh_primitives (or legacy ossia::geometry + // entries) that back each draw. Those addresses are invariant across + // frames as long as the mesh_component shared_ptrs and their + // primitives vectors don't change; walking the same scene tree twice + // thus produces identical fingerprints and we can skip the full + // vertex/index rebuild. (Contrast: `dc.mesh` is a fresh + // primitiveToGeometry() wrapper pointer that differs every frame.) + // + // We also mix in the upstream GPU-resident attribute buffer handles + // (positions/normals/texcoords/tangents). `m_pendingGpuCopies` holds + // raw QRhiBuffer* captured in queueSlabCopy at rebuildMDI time and + // re-issued every frame from runInitialPasses; if an upstream node + // rebuilds its QRhiBuffer (CSF compute pipeline rebuild, Instancer + // prototype swap, GPU mesh-handle pool churn) while the source + // mesh_primitive address stays identical, the fast path would skip + // rebuildMDI and the queue would re-issue copies from a freed + // QRhiBuffer*. Including the upstream buffer pointers here makes any + // such swap force a full rebuild → fresh op.src in the queue. + std::vector freshMeshFingerprint; + freshMeshFingerprint.reserve(fs.draws.size() * 5); + for(const auto& dc : fs.draws) + { + if(dc.mesh && dc.mesh->vertices > 0 && dc.stable_id) + { + freshMeshFingerprint.push_back(dc.stable_id); + // Mix one entry per attribute: upstream QRhiBuffer* identity (or + // 0 when the attribute is CPU-sourced / missing). A swap from + // CPU→GPU sourcing or a buffer pointer change → fingerprint + // mismatch → rebuildMDI repopulates m_pendingGpuCopies. + auto bufId = [&](ossia::attribute_semantic sem) -> uint64_t { + const auto v = extractGpuAttribute(*dc.mesh, sem); + return reinterpret_cast(v.buf); + }; + freshMeshFingerprint.push_back( + bufId(ossia::attribute_semantic::position)); + freshMeshFingerprint.push_back( + bufId(ossia::attribute_semantic::normal)); + freshMeshFingerprint.push_back( + bufId(ossia::attribute_semantic::texcoord0)); + freshMeshFingerprint.push_back( + bufId(ossia::attribute_semantic::tangent)); + } + } + + // Cloud fingerprint (threedim#2): rebuildPrimitiveClouds is only + // invoked on the full-rebuild branch, so any change to the primitive + // cloud set must mismatch this fingerprint to force that branch. We + // hash the same fields the function's internal per-bucket fingerprint + // and bucket geometry depend on — raw_data identity + content version, + // primitive_count, transform_slot, the world matrix (drives + // CloudMetaGPU.model + AABBs), and the bucket key derived from + // format_id — so added / removed / moved / re-uploaded clouds all flip + // it. Count is mixed first so a pure add/remove is always detected. + uint64_t freshCloudFingerprint = 0; + ossia::hash_combine( + freshCloudFingerprint, (uint64_t)fs.primitive_clouds.size()); + for(const auto& d : fs.primitive_clouds) + { + if(!d.cloud) + { + ossia::hash_combine(freshCloudFingerprint, (uint64_t)0); + continue; + } + // Bucket key (mirrors rebuildPrimitiveClouds): hash(format_id), or + // the cloud pointer when format_id is empty. + const uint64_t bucket_key + = !d.cloud->format_id.empty() + ? (uint64_t)(uint32_t)ossia::hash_string(d.cloud->format_id) + : (uint64_t)(uintptr_t)d.cloud.get(); + ossia::hash_combine(freshCloudFingerprint, bucket_key); + + const auto* raw = d.cloud->raw_data.get(); + ossia::hash_combine(freshCloudFingerprint, (uint64_t)(uintptr_t)raw); + const uint64_t content_id + = raw ? (raw->content_hash != 0 ? raw->content_hash + : (uint64_t)raw->dirty_index) + : 0u; + ossia::hash_combine(freshCloudFingerprint, content_id); + ossia::hash_combine( + freshCloudFingerprint, (uint64_t)d.cloud->primitive_count); + ossia::hash_combine( + freshCloudFingerprint, (uint64_t)d.transform_slot); + ossia::hash_combine( + freshCloudFingerprint, + ossia::hash_bytes(d.worldTransform.constData(), 64)); + } + + // Pack per-material UV transforms (KHR_texture_transform) and + // material extensions. Both buffers are read by the shader as + // `entries[pd.material_index]` where pd.material_index is the + // Material ARENA SLOT INDEX (parallel to `scene_materials`, + // which IS the registry's Material arena). The buffers therefore + // must also be arena-slot-indexed, not fs.materials-indexed — + // otherwise a 1-material scene whose loader-material lands at + // arena slot 1 reads entries[1] which is OUT OF BOUNDS, returning + // zeros, collapsing every UV transform to (0,0) scale → all + // textures sample pixel (0,0) → uniform color (the "solid gray + // DamagedHelmet" symptom). + std::vector freshMaterialUVTransforms( + arenaSlotEntries); + std::vector freshMaterialExtensions( + arenaSlotEntries); + if(this->scene.state && this->scene.state->materials) + { + const auto& mats = *this->scene.state->materials; + auto pack_xform = [](float* dst_offset_scale, float* dst_rot, + const ossia::texture_ref& tr) { + dst_offset_scale[0] = tr.uv_transform.offset[0]; + dst_offset_scale[1] = tr.uv_transform.offset[1]; + dst_offset_scale[2] = tr.uv_transform.scale[0]; + dst_offset_scale[3] = tr.uv_transform.scale[1]; + *dst_rot = tr.uv_transform.rotation; + }; + for(std::size_t i = 0; i < mats.size(); ++i) + { + if(!mats[i]) + continue; + const uint32_t slot = arenaSlotForMaterial(mats[i].get()); + if(slot >= arenaSlotEntries) + continue; + auto& g = freshMaterialUVTransforms[slot]; + pack_xform(g.bc_offset_scale, &g.rotations0[0], mats[i]->base_color_texture); + pack_xform(g.mr_offset_scale, &g.rotations0[1], mats[i]->metallic_roughness_texture); + pack_xform(g.normal_offset_scale, &g.rotations0[2], mats[i]->normal_texture); + pack_xform(g.em_offset_scale, &g.rotations0[3], mats[i]->emissive_texture); + pack_xform(g.occ_offset_scale, &g.rotations1[0], mats[i]->occlusion_texture); + + // Material extensions are already packed by flattenScene at + // fs.material_extensions[i]; copy into the arena-slot index. + if(i < fs.material_extensions.size()) + freshMaterialExtensions[slot] = fs.material_extensions[i]; + } + } + + const bool meshesUnchanged + = (freshMeshFingerprint == m_cachedMeshFingerprint) + && m_outputSpec.meshes + // If any aux buffer was just reallocated we need to republish + // the output geometry so downstream picks up the new pointers. + // rebuildMDI does this cleanly by building a fresh geometry + // with wrapGpu() wrappers over the current buffer pointers. + && !auxBuffersChanged + // Cloud set unchanged (threedim#2): rebuildPrimitiveClouds only + // runs on the full-rebuild branch and re-appends its bucket + // geometries onto the freshly rebuilt mesh list, so any cloud + // add / remove / move / re-upload must drop us off the fast path. + && (freshCloudFingerprint == m_cachedCloudFingerprint) + // The fast path's freshPerDraws / freshMeshFingerprint cover + // fs.draws ONLY. fs.instances cmds (their world transforms, + // instance counts, prototype identities, per-instance + // GPU-buffer copies) are processed exclusively inside + // rebuildMDI(); skipping it means Instancer control changes + // and per-particle-data updates from upstream CSF compute + // pipelines never reach the GPU. Force the full rebuild + // path whenever any instance group is present. + && fs.instances.empty(); + + if(meshesUnchanged) + { + // Fast path: only diff-upload the small scene-level SSBOs. The + // big vertex/index/indirect buffers are left alone, and + // m_outputSpec.meshes is kept as the same shared_ptr (so + // NodeRenderer::process on the downstream side sees + // `this->geometry == v` and doesn't even flag geometryChanged). + // scene_lights is the RawLight arena; producers keep it fresh + // in their own update() hooks. Only the compact indices list + // needs a diff upload. + diffUpload(res, m_lightIndicesBuffer, m_cachedLightIndices, + freshLightIndices); + // scene_materials: producer + loader-material upload pass + // above already pushed MaterialGPU bytes into the Material + // arena. Nothing to diff-upload here. + diffUpload(res, m_materialsExtBuffer, m_cachedMaterialExt, + freshMaterialExtensions); + diffUpload(res, m_materialUVTransformsBuffer, + m_cachedMaterialUVTransforms, freshMaterialUVTransforms); + diffUpload(res, m_mdi.per_draws, m_cachedPerDraws, freshPerDraws); + // per_draw_bounds is static across a frame (local-space AABB, + // never changes per-frame for the same topology) — on the fast + // path the mirror and fresh arrays match element-for-element and + // diffUpload short-circuits to zero uploads. Kept in the fast + // path for robustness (e.g. a material-swap flow that re-picks + // a primitive variant with different bounds under the hood). + diffUpload(res, m_mdi.per_draw_bounds, m_cachedPerDrawBounds, + freshPerDrawBounds); + } + else + { + // Something structural changed (meshes added/removed/reordered). + // Fall back to the full rebuild path. scene_lights arena bytes + // are maintained by each Light producer's update() hook — we + // only push the compacted indices list here. + if(!freshLightIndices.empty()) + res.uploadStaticBuffer( + m_lightIndicesBuffer, 0, + freshLightIndices.size() * sizeof(uint32_t), + freshLightIndices.data()); + // scene_materials: arena upload already happened above (see + // the "loader-material arena slot upload" block). + if(!freshMaterialExtensions.empty()) + res.uploadStaticBuffer( + m_materialsExtBuffer, 0, + freshMaterialExtensions.size() * sizeof(MaterialExtensionsGPU), + freshMaterialExtensions.data()); + if(!freshMaterialUVTransforms.empty()) + res.uploadStaticBuffer( + m_materialUVTransformsBuffer, 0, + freshMaterialUVTransforms.size() * sizeof(MaterialUVTransformGPU), + freshMaterialUVTransforms.data()); + + rebuildMDI(renderer, res, fs, materialTagHashes); + rebuildPrimitiveClouds(renderer, res, fs); + + // Seed the CPU mirrors from the fresh data so subsequent frames + // can take the fast path via diffUpload. + m_cachedMeshFingerprint = std::move(freshMeshFingerprint); + m_cachedCloudFingerprint = freshCloudFingerprint; + m_cachedLightIndices = std::move(freshLightIndices); + m_cachedMaterialExt = std::move(freshMaterialExtensions); + m_cachedMaterialUVTransforms = std::move(freshMaterialUVTransforms); + // m_cachedPerDraws / m_cachedPerDrawBounds are NOT seeded here: + // rebuildMDI() already assigned them from acc.perDraws (the + // actually-emitted set, after emitDraw's skip predicate), so the + // mirror matches the GPU per_draws layout slot-for-slot. Seeding + // from freshPerDraws (filtered only by vertices>0) would reintroduce + // the threedim#3 divergence whenever a draw was skipped. + } + + // Camera + Env UBOs are packed above, before rebuildMDI, so that the + // geometry's auxiliary entries reference valid buffer pointers. The + // pre-sized capacity keeps those pointers stable across parameter + // changes on the fast path (no re-rebuild needed). + + // scene_counts SSBO: tell shaders the authoritative N for each + // SSBO (so they don't rely on `.length()` which reports buffer + // capacity and includes zeroed tail slots when counts shrank). + // Uploaded only when a count actually changed. + // light_count is the arena-addressable subset (matches + // m_cachedLightIndices / scene_light_indices). Post 28b-shader + // flip: shaders iterate via the indices buffer, so this count + // drives that loop. + SceneCountsUBO sc{ + (uint32_t)m_cachedLightIndices.size(), + (uint32_t)fs.materials.size(), + (uint32_t)m_mdi.drawCount, + 0u}; + if(std::memcmp(&sc, &m_cachedSceneCounts, sizeof(sc)) != 0) + { + // Allocation is Static + StorageBuffer on every backend, so the + // upload always goes through uploadStaticBuffer — at 16 bytes + // the difference vs updateDynamicBuffer is negligible anyway. + res.uploadStaticBuffer(m_sceneCountsBuffer, 0, sizeof(sc), &sc); + m_cachedSceneCounts = sc; + } + + // shadow_cascades UBO: populated from scene_state.shadow_cascades + // (authored upstream by Threedim::ShadowCascadeSetup). Straight + // struct copy — the CPU-side shadow_cascades_info layout mirrors + // the GPU ShadowCascadesUBO field-for-field: light_view_proj[8] + // (column-major mat4 array), split_view_depths[9] compacted into + // cascade_split_distances[8], cascade_count (uint32). Diff-uploaded + // against the cached snapshot so frames without topology / camera + // changes cost zero UBO bytes. + // + // When no upstream authored cascades (the field defaults to + // cascade_count=0), we still publish the UBO with zero count so + // downstream shaders that declare `shadow_cascades` as INPUT have + // a valid binding and fall through their own "cascade_count == 0 + // → skip shadow sampling" guard. + ShadowCascadesUBO sh{}; + if(this->scene.state) + { + const auto& src = this->scene.state->shadow_cascades; + sh.cascade_count + = std::min(src.cascade_count, + ossia::shadow_cascades_info::max_cascades); + std::memcpy( + sh.light_view_proj, src.light_view_proj, + sizeof(sh.light_view_proj)); + // Shaders sample cascade_split_distances[k] for cascade picks; + // slot k is the far-plane Z of cascade k (view-space). + // CPU-side stores count+1 entries in split_view_depths[]; copy + // up to max_cascades slots so UBO and source stay symmetric. + // For k >= count we emit 0 — the shader's pickCascade() clamps + // against cascade_count first, so trailing zeros are never read. + const uint32_t kLayoutSlots = ossia::shadow_cascades_info::max_cascades; // 8 + for(uint32_t k = 0; k < kLayoutSlots; ++k) + { + // split_view_depths[] has (count+1) entries; slot k is the far + // plane of cascade k. Guard with <= cascade_count (not <) so + // the sentinel entry at index cascade_count is also copied. + sh.cascade_split_distances[k] + = (k <= sh.cascade_count) + ? src.split_view_depths[k] + : 0.f; + } + } + if(!m_shadowCascadesSeeded + || std::memcmp(&sh, &m_cachedShadowCascades, + sizeof(ShadowCascadesUBO)) != 0) + { + res.updateDynamicBuffer( + m_shadowCascadesBuffer, 0, sizeof(sh), &sh); + m_cachedShadowCascades = sh; + m_shadowCascadesSeeded = true; + } + + // Instance components are now handled directly inside rebuildMDI + // (above) — every fs.instances entry rides through the same + // unified indirect-cmd batch as fs.draws. No separate sub-mesh + // emission step is needed. + } + + m_cachedSceneState = this->scene.state.get(); + m_cachedVersion = this->scene.state ? this->scene.state->version : -1; + this->sceneChanged = false; + + // Skybox + texture-channel changes propagate through the geometry's + // auxiliary_texture entries on Geometry Out — consumer shaders + // re-resolve pointers per frame via try_bind_texture_from_geometry. + // Phase 4 also bumps mesh identity on channel-array realloc so + // downstream's update() reruns without missing a rebind. + } + + // Resolve an MDI attribute enum to the matching arena stream buffer + // (Plan 09 S4 — streams moved from MDIState to the registry). + QRhiBuffer* mdiBufferFor(MdiAttr a) const noexcept + { + if(!m_registry) + return nullptr; + using Stream = GpuResourceRegistry::MeshStream; + switch(a) + { + case MdiAttr::Positions: return m_registry->meshStreamBuffer(Stream::Positions); + case MdiAttr::Normals: return m_registry->meshStreamBuffer(Stream::Normals); + case MdiAttr::Texcoords: return m_registry->meshStreamBuffer(Stream::Texcoords); + case MdiAttr::Tangents: return m_registry->meshStreamBuffer(Stream::Tangents); + } + return nullptr; + } + + // Issue every pending GPU→GPU copy queued during update(). Called every + // frame in runInitialPasses regardless of whether update() rebuilt the + // accumulator — upstream GPU buffer CONTENTS change every frame (CSF + // compute writes) while the buffer HANDLES + MDI offsets stay stable as + // long as no draw-topology change occurred. The queue is rebuilt (via + // clear + repopulate at the top of the accumulator loop) only when the + // scene actually changed; otherwise the same ops fire with fresh data. + // + // Stride-equal-to-element copies collapse to a single copyBuffer; + // vec4→vec3-style strided copies fall back to a per-vertex loop (one + // copyBuffer per vertex — acceptable for typical CSF point clouds of + // a few thousand vertices). + void issuePendingGpuCopies(RenderList& renderer, QRhiCommandBuffer& cb) + { + if(m_pendingGpuCopies.empty()) + return; + auto* rhi = renderer.state.rhi; + if(!rhi) + return; + cb.beginExternal(); + // One compute→transfer barrier for the whole batch instead of one per + // copy call — eliminates N−1 redundant pipeline stalls on Vulkan. + score::gfx::beginBufferCopyBarrier(*rhi, cb); + // Scratch reused across ops — avoids reallocating for each strided op. + std::vector regions; + for(const auto& op : m_pendingGpuCopies) + { + // Explicit dst wins over the mesh-stream lookup — used by the + // unified-MDI per-instance concat copies (translations / colors) + // which target preprocessor-owned buffers, not arena streams. + QRhiBuffer* dst = op.dst ? op.dst : mdiBufferFor(op.attr); + if(!op.src || !dst) + continue; + if(op.src_stride == 0 || op.src_stride == op.element_size) + { + // Tight source layout — one copy, no per-call barrier (batched). + score::gfx::copyBuffer( + *rhi, cb, op.src, dst, + op.vertex_count * op.element_size, + op.src_offset, op.dst_offset, + score::gfx::BufferCopyBarrier::None); + } + else + { + // Strided source — src slot size differs from MDI slot size. + // Per-vertex copy of min(src_stride, element_size) bytes: the + // overlap between the two layouts (e.g. tight vec3 src (12 B) → + // padded-vec4 MDI slot (16 B) → copy the 12 B of real data into + // each slot's low bytes; zero-fill from uploadStaticBuffer covers + // the trailing padding). + const int per_vertex + = std::min(op.src_stride, op.element_size); + regions.clear(); + regions.reserve(op.vertex_count); + for(int v = 0; v < op.vertex_count; ++v) + { + regions.push_back( + {op.src_offset + v * op.src_stride, + op.dst_offset + v * op.element_size, + per_vertex}); + } + score::gfx::copyBufferRegions( + *rhi, cb, op.src, dst, regions.data(), (int)regions.size(), + score::gfx::BufferCopyBarrier::None); + } + } + score::gfx::endBufferCopyBarrier(*rhi, cb); + cb.endExternal(); + // Intentionally NOT clearing m_pendingGpuCopies here — the list is + // owned by the accumulator and persists across cache-hit frames so + // updates to upstream buffer contents keep flowing through. + } + + // Push the produced geometry_spec to the downstream renderer's input port. + void runInitialPasses( + RenderList& renderer, QRhiCommandBuffer& commands, + QRhiResourceUpdateBatch*& res, Edge& edge) override + { + // Plan 09 S6: debug marker for capture-tool readability. + commands.debugMarkBegin(QByteArrayLiteral("ScenePreprocessor")); + struct MarkEnd + { + QRhiCommandBuffer* c; + ~MarkEnd() { c->debugMarkEnd(); } + } _me{&commands}; + + // GPU→GPU copies run before the geometry_spec hand-off so the + // destination MDI buffers are populated by the time the downstream + // rasterizer starts reading them. Frame-gated (threedim#13) — the + // copies target shared MDI buffers, so one batch per frame serves every + // consumer; without the gate a node feeding K downstreams issues K + // identical copy batches. Same renderer.frame token discipline as the + // world-transforms snapshot below. + if(m_lastGpuCopiesFrame != renderer.frame) + { + issuePendingGpuCopies(renderer, commands); + m_lastGpuCopiesFrame = renderer.frame; + } + + // Snapshot last frame's world_transforms into the prev buffer via + // a pure GPU copy, then apply this frame's per-slot writes via the + // (post-snapshot) resource-update batch. The ordering invariant is: + // + // commands stream : ... [updateBatch_N applied] [copyBuffer current→prev] ... + // res (next batch) : [uploadStaticBuffer per slot] + // RenderList submits : ^ next iteration + // + // So the copy reads m_worldTransformsBuffer at its frame-N-1 + // contents (no frame-N writes have hit it yet — those are queued + // in `*res`, applied AFTER this function returns), and the next + // beginPass sees current = frame N + prev = frame N-1. + // + // Gate on renderer.frame because runInitialPasses fires once per + // outgoing edge: without the guard a node feeding K downstreams + // would queue K back-to-back current→prev copies (the second-and- + // later seeing prev = current = frame N) and would re-upload the + // pending writes K times. Within one frame renderer.frame is + // stable; across frames it advances monotonically, so the + // mismatch correctly discriminates "first call this frame". + // + // Fire EVERY frame (not gated on pending non-empty): for a static + // scene the per-frame copy is what KEEPS prev == current, so + // motion vectors stay zero. A previous attempt to skip when pending + // was empty froze prev at the value from the last animated frame + // and produced ghost motion on idle scenes. + // + // The previous CB-pointer discriminator was broken: every QRhi + // backend's QRhiSwapChain::currentFrameCommandBuffer returns the + // address of a single by-value cbWrapper member, so the pointer is + // constant across frames and the gate fired exactly once per + // swapchain lifetime — freezing world_transforms / _prev at frame + // 0 (motion vectors / TAA / reprojection silently broken). + // + // Frame 0 sees prev=zeroes → first-frame MV is large; consumer + // shaders handle that via frame-index / temporal accumulation. + // Auto barrier covers the compute↔transfer hazards around the copy. + if(m_worldTransformsBuffer && m_worldTransformsPrevBuffer + && m_worldTransformsCap > 0 + && m_lastSnapshotFrame != renderer.frame) + { + commands.beginExternal(); + copyBuffer( + *renderer.state.rhi, commands, + m_worldTransformsBuffer, m_worldTransformsPrevBuffer, + (int)m_worldTransformsCap); + commands.endExternal(); + + // Drain deferred per-slot writes into the next resource batch + // (`res` — distinct from the batch already submitted in + // RenderList::renderInternal before this function ran). The + // batch is submitted later, AFTER the copy above has executed. + if(res && !m_pendingWorldXformWrites.empty()) + { + for(const auto& [slot, m] : m_pendingWorldXformWrites) + { + const uint32_t byte_offset + = slot * (uint32_t)sizeof(WorldTransformMat4); + res->uploadStaticBuffer( + m_worldTransformsBuffer, byte_offset, + (quint32)sizeof(WorldTransformMat4), &m); + } + m_pendingWorldXformWrites.clear(); + } + + m_lastSnapshotFrame = renderer.frame; + } + + auto* src = edge.source; + const int src_port_idx = src && src->node + ? int(std::find(src->node->output.begin(), src->node->output.end(), src) + - src->node->output.begin()) + : -1; + + // Only the Geometry output (port 0) pushes a geometry_spec — it's + // the sole remaining output. Guard kept for robustness in case the + // port layout is extended again. + if(src_port_idx != 0) + return; + if(!m_outputSpec.meshes) + return; + + auto* sink = edge.sink; + if(!sink || !sink->node) + return; + + auto rn_it = sink->node->renderedNodes.find(&renderer); + if(rn_it == sink->node->renderedNodes.end()) + return; + + auto it = std::find(sink->node->input.begin(), sink->node->input.end(), sink); + if(it == sink->node->input.end()) + return; + + int port_idx = (int)(it - sink->node->input.begin()); + BUFTRACE() << "ScenePreprocessor → sink_node=" << sink->node->nodeId + << " port=" << port_idx + << " mdi_indices=" + << (void*)(m_registry ? m_registry->meshStreamBuffer( + GpuResourceRegistry::MeshStream::Indices) : nullptr) + << " mdi_positions=" + << (void*)(m_registry ? m_registry->meshStreamBuffer( + GpuResourceRegistry::MeshStream::Positions) : nullptr) + << " mdi_drawCmds=" << (void*)m_mdi.indirect_draw_cmds + << " mdi_drawCount=" << (quint32)m_mdi.drawCount; + rn_it->second->process(port_idx, m_outputSpec, edge.source); + } + + void runRenderPass(RenderList&, QRhiCommandBuffer&, Edge&) override { } + + // Data-only renderer — no per-edge GPU pass state to release. All GPU + // resources live on the renderer itself (buffers, textures) and are + // dropped in releaseState; nothing is keyed by output edge. + void removeOutputPass(RenderList&, Edge&) override { } +}; + +ScenePreprocessorNode::ScenePreprocessorNode() +{ + // Port 0: Scene input (carries scene_spec — carries EVERYTHING, + // including the environment and its skybox/IBL textures). + input.push_back(new Port{this, {}, Types::Scene, {}}); + + // Single outlet: geometry (concatenated MDI geometry). Scene-wide + // UBOs/SSBOs (per_draws, indirect_draw_cmds, scene_lights, + // scene_materials, scene_counts, camera, env) ride along as + // auxiliary_buffer entries; per-channel material texture arrays + // (base_color_array, metal_rough_array, normal_array, emissive_array) + // and the environment skybox ride along as auxiliary_texture entries. + // Consumer shaders bind them all by name via + // try_bind_from_geometry / try_bind_texture_from_geometry. + output.push_back(new Port{this, {}, Types::Geometry, {}}); +} + +ScenePreprocessorNode::~ScenePreprocessorNode() = default; + +NodeRenderer* ScenePreprocessorNode::createRenderer(RenderList& /*r*/) const noexcept +{ + return new RenderedScenePreprocessorNode{*this}; +} + +} diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/ScenePreprocessorNode.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/ScenePreprocessorNode.hpp new file mode 100644 index 0000000000..c8cdfc5388 --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/ScenePreprocessorNode.hpp @@ -0,0 +1,54 @@ +#pragma once +#include + +namespace score::gfx +{ + +/** + * @brief Bridge from `scene_spec` (hierarchical, CPU) to `geometry_spec` + * (flat, GPU-resident). + * + * Receives a `scene_spec` on its input port, walks the hierarchy, and emits + * a `geometry_spec` on its output port containing one geometry per scene + * mesh primitive. Each output geometry carries a set of well-known + * auxiliary buffers: + * + * - `scene_lights` : LightGPU[] (per scene light_component) + * - `scene_materials` : MaterialGPU[] (per scene material) + * - `scene_materials_ext` : MaterialExtGPU[] (extended material data) + * - `per_draws` : PerDrawGPU[] (one per draw: model/normal mat, + * material/transform/skeleton slots) + * - `indirect_draw_cmds` : IndirectCmd[] (MDI command buffer; one per draw) + * - `scene_counts` : SceneCountsUBO (draw/light/material counts) + * - `camera` : CameraUBO (current-frame camera matrices) + * - `camera_prev` : CameraUBO (previous-frame camera matrices) + * - `env` : EnvUBO (environment/fog parameters) + * - `world_transforms` : mat4[] (current frame, slot-indexed) + * - `world_transforms_prev` : mat4[] (previous frame, for TAA/motion) + * - `scene_light_indices` : uint[] (light culling index list) + * + * Conditionally emitted (when present in the scene): + * - `scene_material_uv_xforms` : mat3[] (per-material UV transforms) + * - `per_draw_bounds` : AABB[] (per-draw world-space bounds) + * - `shadow_cascades` : CascadeUBO[] (shadow cascade matrices) + * + * Per-draw indexing in shaders uses the MDI `firstInstance` / `gl_DrawID` + * mechanism. Shaders read `per_draws[gl_DrawID]` to recover model/normal + * matrices and slot indices into the shared tables. + * + * Inputs: + * - Port 0: Scene (Types::Scene) + * + * Outputs: + * - Port 0: Geometry (Types::Geometry) — flattened scene + */ +class SCORE_PLUGIN_GFX_EXPORT ScenePreprocessorNode : public ProcessNode +{ +public: + ScenePreprocessorNode(); + ~ScenePreprocessorNode() override; + + score::gfx::NodeRenderer* createRenderer(RenderList& r) const noexcept override; +}; + +} diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/ScreenNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/ScreenNode.cpp index 8683367fbc..43b28cd5ea 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/ScreenNode.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/ScreenNode.cpp @@ -7,6 +7,7 @@ #include #include +#include #include @@ -42,12 +43,95 @@ #include #endif +#include +#include +#include #include #include +#include +#include #include +#include + namespace score::gfx { +namespace +{ +// Persistent pipeline cache. Saved on QRhi destruction, loaded right after +// QRhi creation. Keyed per backend so different APIs don't overwrite each +// other's cache. Gated on QRhi::Feature::PipelineCacheDataLoadSave. +static QString pipelineCacheFilePath(GraphicsApi api) +{ + QString root = QStandardPaths::writableLocation(QStandardPaths::CacheLocation); + if(root.isEmpty()) + root = QDir::tempPath(); + QDir().mkpath(root + QStringLiteral("/ossia-score/pipeline-cache")); + const char* apiName = "unknown"; + switch(api) + { + case Null: apiName = "null"; break; + case OpenGL: apiName = "gl"; break; + case Vulkan: apiName = "vk"; break; + case D3D11: apiName = "d3d11"; break; + case D3D12: apiName = "d3d12"; break; + case Metal: apiName = "metal"; break; + } + return QStringLiteral("%1/ossia-score/pipeline-cache/%2.bin") + .arg(root) + .arg(QString::fromLatin1(apiName)); +} + +static void tryLoadPipelineCache(QRhi* rhi, GraphicsApi api) +{ + if(!rhi || !rhi->isFeatureSupported(QRhi::PipelineCacheDataLoadSave)) + return; + QFile f(pipelineCacheFilePath(api)); + if(!f.open(QIODevice::ReadOnly)) + return; + rhi->setPipelineCacheData(f.readAll()); +} + +// Pure disk I/O — no QRhi access, so it is safe to run off the render thread. +static void writePipelineCacheToDisk(QByteArray data, GraphicsApi api) +{ + if(data.isEmpty()) + return; + QFile f(pipelineCacheFilePath(api)); + if(!f.open(QIODevice::WriteOnly | QIODevice::Truncate)) + return; + f.write(data); +} + +// Synchronous store: grabs the cache bytes from the QRhi (must be on the +// render thread) and writes them inline. Used on shutdown (preRhiDestroy) +// where the QRhi is about to be destroyed and we must finish before it goes. +static void tryStorePipelineCache(QRhi* rhi, GraphicsApi api) +{ + if(!rhi || !rhi->isFeatureSupported(QRhi::PipelineCacheDataLoadSave)) + return; + writePipelineCacheToDisk(rhi->pipelineCacheData(), api); +} + +// Mid-session store: grabs the cache bytes on the render thread (QRhi access), +// then offloads the blocking file write to a worker thread so the render +// thread doesn't stall on disk I/O right after a PSO-compile burst. The +// QByteArray is copied into the task (implicitly shared, cheap) and outlives +// the QRhi-independent write. +static void tryStorePipelineCacheAsync(QRhi* rhi, GraphicsApi api) +{ + if(!rhi || !rhi->isFeatureSupported(QRhi::PipelineCacheDataLoadSave)) + return; + QByteArray data = rhi->pipelineCacheData(); + if(data.isEmpty()) + return; + QThreadPool::globalInstance()->start( + [data = std::move(data), api]() mutable { + writePipelineCacheToDisk(std::move(data), api); + }); +} +} + std::shared_ptr createRenderState(GraphicsApi graphicsApi, QSize sz, QWindow* window) { @@ -58,14 +142,29 @@ createRenderState(GraphicsApi graphicsApi, QSize sz, QWindow* window) const auto& settings = score::AppContext().settings(); state.samples = settings.resolveSamples(graphicsApi); - auto populateCaps = [](RenderState& s) { -#if QT_VERSION >= QT_VERSION_CHECK(6, 12, 0) + auto populateCaps = [graphicsApi](RenderState& s) { + // Load persisted pipeline cache (if any) and set up a save-on-destroy + // hook that writes it back before QRhi is deleted. if(s.rhi) { - s.caps.drawIndirect = s.rhi->isFeatureSupported(QRhi::DrawIndirect); - s.caps.drawIndirectMulti = s.rhi->isFeatureSupported(QRhi::DrawIndirectMulti); + tryLoadPipelineCache(s.rhi, graphicsApi); + QRhi* rhiPtr = s.rhi; + s.preRhiDestroy = [rhiPtr, graphicsApi]() { + tryStorePipelineCache(rhiPtr, graphicsApi); + }; + // Plan 09 S6: mid-session flush for crash-resilient cache + // persistence. RenderList::render throttles this after PSO + // stalls; the QRhi read happens here on the render thread but the + // blocking file write is offloaded to a worker so the render + // thread isn't stalled on disk right after a PSO-compile burst. + s.savePipelineCache = [rhiPtr, graphicsApi]() { + tryStorePipelineCacheAsync(rhiPtr, graphicsApi); + }; + } + if(s.rhi) + { + s.caps.populate(*s.rhi); } -#endif // Clamp the requested sample count against what the hardware actually // supports. Without this, asking for e.g. 16x MSAA on a card that only // does 8x silently mismatches between the value stored in @@ -109,6 +208,17 @@ createRenderState(GraphicsApi graphicsApi, QSize sz, QWindow* window) #ifndef NDEBUG flags |= QRhi::EnableDebugMarkers; #endif + // Let the RHI save per-backend pipeline binary cache so subsequent runs + // skip the initial pipeline compilation cost (big win for Vulkan/D3D12). + flags |= QRhi::EnablePipelineCacheDataSave; + + // Enable per-command-buffer GPU timestamps. Required for the per-pass + // GPU timing panel (Plan 09 S6) — without this flag, + // QRhiCommandBuffer::lastCompletedGpuTime() returns 0 on Vulkan/D3D12/Metal. + // Negligible overhead when no timer instance is active. +#if QT_VERSION >= QT_VERSION_CHECK(6, 6, 0) + flags |= QRhi::EnableTimestamps; +#endif #ifndef QT_NO_OPENGL if(graphicsApi == OpenGL) @@ -289,13 +399,18 @@ ScreenNode::~ScreenNode() { if(m_swapChain) { - m_swapChain->deleteLater(); - + // Order matters: clear the alias + flag on the Window BEFORE releasing + // the QRhiSwapChain. A queued QExposeEvent landing between the deferred + // delete and the nullings would otherwise observe the inconsistent + // state (m_hasSwapChain == true && m_swapChain still aliasing freed + // memory). See diagnostic 047. if(m_window) { - m_window->m_swapChain = nullptr; m_window->m_hasSwapChain = false; + m_window->m_swapChain = nullptr; } + + m_swapChain->deleteLater(); } if(m_window && m_window->state) @@ -360,8 +475,8 @@ void ScreenNode::onRendererChange() return; } } + m_window->m_canRender = false; } - m_window->m_canRender = false; } void ScreenNode::stopRendering() @@ -380,7 +495,13 @@ void ScreenNode::stopRendering() void ScreenNode::setRenderer(std::shared_ptr r) { - m_window->state->renderer = r; + // m_window can be null after destroyOutput() (which calls m_window.reset()). + // Reachable from Graph::createOutputRenderList paths after a graphics-API + // switch / sample-count change / output-disable cycle. Sibling guards + // already exist in stopRendering and onRendererChange below; this one + // was missed when those were patched. + if(m_window && m_window->state) + m_window->state->renderer = r; } RenderList* ScreenNode::renderer() const @@ -425,12 +546,28 @@ void ScreenNode::setConfiguration(Configuration conf) void ScreenNode::setSwapchainFlag(Gfx::SwapchainFlag flag) { + if(m_swapchainFlag == flag) + return; m_swapchainFlag = flag; + // Live flag change (sRGB toggle) requires the swapchain to be recreated + // with the new flag bits — setFlags happens in createOutput at line ~667. + // destroyOutput tears down; Graph::createOutputRenderList rebuilds on + // next reconcile (same pattern updateGraphicsAPI uses for sample-count). + if(m_window) + destroyOutput(); } void ScreenNode::setSwapchainFormat(Gfx::SwapchainFormat format) { + if(m_swapchainFormat == format) + return; m_swapchainFormat = format; + // Same rebuild rationale as setSwapchainFlag above. setFormat happens at + // line ~650 inside createOutput; without the rebuild the field stayed + // updated but the live swapchain kept its prior format (HDR↔SDR toggle + // was silently inert). + if(m_window) + destroyOutput(); } void ScreenNode::setSize(QSize sz) @@ -628,6 +765,35 @@ void ScreenNode::destroyOutput() if(!m_window) return; + // Drain the GPU before tearing anything down. Without this, queued frames + // can still reference the swapchain / RPD / depth-stencil while we're + // freeing them — and worse, when setSwapchainFormat / setSwapchainFlag + // call destroyOutput synchronously (commit e2afe7874), the host window's + // last beginFrame may still hold an unfinished cbWrapper referenced by + // ScenePreprocessor's per-frame copyBuffer (commit fe146c8de). The next + // runInitialPasses then records vkCmdCopyBuffer / vkCmdPipelineBarrier + // into a CB whose underlying VkCommandBuffer was already vkEndCommandBuffer'd + // (VUID-vkCmdCopyBuffer-commandBuffer-recording / VUID-vkCmdPipelineBarrier- + // commandBuffer-recording), often followed by a device loss. + // + // MultiWindowNode::destroyOutput already does this at line ~1068; mirror it. + if(m_window->state && m_window->state->rhi) + { + // Pre-condition: destroyOutput must not be called inside a frame + // (between beginFrame and endFrame). If this fires, some upstream + // path triggered a teardown mid-render — the cascade would be + // worse than just deferring to next frame. + SCORE_ASSERT(!m_window->state->rhi->isRecordingFrame()); + m_window->state->rhi->finish(); + } + + // Persist-across-rebuild contract: the registry survives RL teardown + // so we must explicitly release its QRhi resources here, BEFORE + // RenderState::destroy() (called below via m_window->state->destroy()) + // frees the device. destroyOwned() `delete`s the buffer / texture / + // sampler wrappers directly while the QRhi is still alive. + releaseRegistry(); + delete m_depthStencil; m_depthStencil = nullptr; @@ -643,14 +809,19 @@ void ScreenNode::destroyOutput() //delete s.renderBuffer; //s.renderBuffer = nullptr; - delete m_swapChain; - m_swapChain = nullptr; - + // Order matters: clear the alias + flag on the Window BEFORE deleting + // the QRhiSwapChain (see diagnostic 047). A queued event reaching + // Window::exposeEvent between the delete and the nulling would + // otherwise observe (m_hasSwapChain == true && m_swapChain dangling). if(m_window) { + m_window->m_hasSwapChain = false; m_window->m_swapChain = nullptr; } + delete m_swapChain; + m_swapChain = nullptr; + if(m_window) { if(auto s = m_window->state) @@ -742,6 +913,13 @@ score::gfx::OutputNodeRenderer* ScreenNode::createRenderer(RenderList& r) const score::gfx::TextureRenderTarget rt; rt.renderTarget = m_swapChain->currentFrameRenderTarget(); rt.renderPass = r.state.renderPassDescriptor; + // No depth attachment exposed here on purpose: ScaledRenderer is a + // fullscreen-quad blit that samples the upstream color texture and does + // not run depth test. All precision-critical 3D rendering happens + // upstream into an intermediate D32F offscreen render target allocated + // by createRenderTarget(...) in Utils.cpp. The swap chain's D24S8 + // DepthStencil buffer is only attached at the QRhi level for the final + // blit pass — irrelevant to 3D depth precision. // FIXME why doesn't it work? // return new BasicRenderer{rt, r.state, *this}; return new Gfx::ScaledRenderer{rt, r.state, *this}; diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/SimpleRenderedISFNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/SimpleRenderedISFNode.cpp index 85da1f6bf3..3c504bb19f 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/SimpleRenderedISFNode.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/SimpleRenderedISFNode.cpp @@ -1,3 +1,5 @@ +#include +#include #include #include @@ -8,13 +10,48 @@ namespace score::gfx { +static const constexpr auto blit_vs = R"_(#version 450 +layout(location = 0) in vec2 position; +layout(location = 1) in vec2 texcoord; +layout(location = 0) out vec2 v_texcoord; + +layout(std140, binding = 0) uniform renderer_t { + mat4 clipSpaceCorrMatrix; + vec2 renderSize; +} renderer; + +out gl_PerVertex { vec4 gl_Position; }; + +void main() +{ + v_texcoord = texcoord; + gl_Position = renderer.clipSpaceCorrMatrix * vec4(position.xy, 0.0, 1.); +#if defined(QSHADER_HLSL) || defined(QSHADER_MSL) + gl_Position.y = - gl_Position.y; +#endif +} +)_"; + +static const constexpr auto blit_fs = R"_(#version 450 +layout(std140, binding = 0) uniform renderer_t { + mat4 clipSpaceCorrMatrix; + vec2 renderSize; +} renderer; + +layout(binding = 3) uniform sampler2D blitTexture; +layout(location = 0) in vec2 v_texcoord; +layout(location = 0) out vec4 fragColor; + +void main() { fragColor = texture(blitTexture, v_texcoord); } +)_"; + SimpleRenderedISFNode::SimpleRenderedISFNode(const ISFNode& node) noexcept : score::gfx::NodeRenderer{node} , n{const_cast(node)} { } -void SimpleRenderedISFNode::updateInputTexture(const Port& input, QRhiTexture* tex) +void SimpleRenderedISFNode::updateInputTexture(const Port& input, QRhiTexture* tex, QRhiTexture* depthTex) { int sampler_idx = 0; for(auto* p : node.input) @@ -22,7 +59,12 @@ void SimpleRenderedISFNode::updateInputTexture(const Port& input, QRhiTexture* t if(p == &input) break; if(p->type == Types::Image) + { sampler_idx++; + // Skip the depth sampler that follows ports with SamplableDepth + if((p->flags & Flag::SamplableDepth) == Flag::SamplableDepth) + sampler_idx++; + } } if(sampler_idx < (int)m_inputSamplers.size()) @@ -35,6 +77,57 @@ void SimpleRenderedISFNode::updateInputTexture(const Port& input, QRhiTexture* t if(pass.p.srb) score::gfx::replaceTexture(*pass.p.srb, sampl.sampler, tex); } + + // Update the depth sampler if the port has SamplableDepth + if(depthTex + && (input.flags & Flag::SamplableDepth) == Flag::SamplableDepth + && sampler_idx + 1 < (int)m_inputSamplers.size()) + { + auto& depthSampl = m_inputSamplers[sampler_idx + 1]; + if(depthSampl.texture != depthTex) + { + depthSampl.texture = depthTex; + for(auto& [e, pass] : m_passes) + if(pass.p.srb) + score::gfx::replaceTexture(*pass.p.srb, depthSampl.sampler, depthTex); + } + } + } +} + +void SimpleRenderedISFNode::updateInputSamplerFilter( + const Port& input, const RenderTargetSpecs& spec) +{ + int sampler_idx = 0; + for(auto* p : node.input) + { + if(p == &input) + break; + if(p->type == Types::Image) + sampler_idx++; + } + + if(sampler_idx < (int)m_inputSamplers.size()) + { + auto* sampler = m_inputSamplers[sampler_idx].sampler; + if(sampler->magFilter() == spec.mag_filter + && sampler->minFilter() == spec.min_filter + && sampler->mipmapMode() == spec.mipmap_mode + && sampler->addressU() == spec.address_u + && sampler->addressV() == spec.address_v + && sampler->addressW() == spec.address_w) + { + // See RenderedISFNode::updateInputSamplerFilter — skip the + // sampler->create() when nothing actually needs updating. + return; + } + sampler->setMagFilter(spec.mag_filter); + sampler->setMinFilter(spec.min_filter); + sampler->setMipmapMode(spec.mipmap_mode); + sampler->setAddressU(spec.address_u); + sampler->setAddressV(spec.address_v); + sampler->setAddressW(spec.address_w); + sampler->create(); } } @@ -79,7 +172,8 @@ std::vector SimpleRenderedISFNode::allSamplers() const noexcept } void SimpleRenderedISFNode::initPass( - const TextureRenderTarget& renderTarget, RenderList& renderer, Edge& edge) + const TextureRenderTarget& renderTarget, RenderList& renderer, Edge& edge, + QRhiResourceUpdateBatch& res) { auto& model_passes = n.descriptor().passes; SCORE_ASSERT(model_passes.size() == 1); @@ -92,12 +186,53 @@ void SimpleRenderedISFNode::initPass( pubo->setName("SimpleRenderedISFNode::initPass::pubo"); pubo->create(); + // Allocate storage resources (SSBOs + images) declared in the shader. + // Reuse the caller's `res` batch rather than allocating a fresh one — + // the earlier `rhi.nextResourceUpdateBatch()` here was never released + // or submitted (the "tmp gets merged at next endFrame" comment was + // wrong: QRhi does NOT auto-reclaim unreleased batches). That leaked + // one pool slot per addOutputPass call, which exhausts the 64-slot + // pool after ~60 resize cycles under X11 async resize where each + // resize tick rebuilds the RenderList (and thus re-inits every ISF + // renderer's passes) without any intervening frame. + ensureStorageResources( + rhi, res, renderer, n.descriptor(), m_storage, renderer.state.renderSize); + bindUpstreamBuffers(renderer, n.input, m_storage); + + // Build the extra-binding list (storage + multiview UBO). + auto extraRhiBindings = buildExtraBindings(m_storage); + if(m_multiViewUBO) + { + // Multiview UBO binds right after storage resources. + int mvBinding = m_firstStorageBinding; + for(const auto& e : m_storage.ssbos) + { + if(e.binding >= 0) mvBinding = std::max(mvBinding, e.binding + 1); + if(e.prev_binding >= 0) mvBinding = std::max(mvBinding, e.prev_binding + 1); + } + for(const auto& e : m_storage.images) + if(e.binding >= 0) mvBinding = std::max(mvBinding, e.binding + 1); + + extraRhiBindings.append(QRhiShaderResourceBinding::uniformBuffer( + mvBinding, + QRhiShaderResourceBinding::VertexStage | QRhiShaderResourceBinding::FragmentStage, + m_multiViewUBO)); + } + + // Compute effective pipeline state: global default + per-pass override. + auto eff_state = mergeState( + n.descriptor().default_state, model_passes[0].override_state); + // Create the main pass try { auto [v, s] = score::gfx::makeShaders(renderer.state, n.m_vertexS, n.m_fragmentS); - auto pip = score::gfx::buildPipeline( - renderer, *m_mesh, v, s, renderTarget, pubo, m_materialUBO, allSamplers()); + auto pip = score::gfx::buildPipelineWithState( + renderer, *m_mesh, v, s, renderTarget, pubo, m_materialUBO, allSamplers(), + std::span( + extraRhiBindings.data(), (std::size_t)extraRhiBindings.size()), + eff_state, + n.descriptor().multiview_count); if(pip.pipeline) { m_passes.emplace_back(&edge, Pass{renderTarget, pip, pubo}); @@ -119,6 +254,36 @@ void SimpleRenderedISFNode::initMRTPass(RenderList& renderer, QRhiResourceUpdate const auto& outputs = n.descriptor().outputs; QSize sz = renderer.state.renderSize; + // Detect layered / multiview rendering needs. + int maxLayers = 1; + for(const auto& out : outputs) + if(out.layers > maxLayers) + maxLayers = out.layers; + const int mvCount = n.descriptor().multiview_count; + const bool wantMultiview + = mvCount >= 2 && renderer.state.caps.multiview; + if(wantMultiview && mvCount > maxLayers) + maxLayers = mvCount; + + // Per-OUTPUT sample count: MSAA must be uniform across all colour + // attachments of a render pass, so pick the highest SAMPLES requested by + // any OUTPUT and use it as the render pass's sample count. Clamped later + // against QRhi::supportedSampleCounts() in createRenderTarget. + // + // IMPORTANT: the textures we allocate below stay SINGLE-SAMPLE — they + // are the RESOLVE TARGETS. createRenderTarget(mrtSamples) allocates + // multi-sample colorRenderBuffer attachments internally and wires each + // of these textures as its resolve destination (Vulkan contract: a + // resolve target must be single-sample). Downstream shaders sample the + // already-resolved single-sample textures, so there's no MSAA stride + // mismatch. (Previous code called setSampleCount(mrtSamples) on these + // textures, which produced MSAA storage sampled as if it were + // single-sample — visible as evenly-spaced horizontal stripes + // proportional to the sample count.) + int mrtSamples = std::max(renderer.samples(), 1); + for(const auto& out : outputs) + mrtSamples = std::max(mrtSamples, out.samples); + // Create color and depth textures based on OUTPUTS declarations std::vector colorTextures; QRhiTexture* depthTex = nullptr; @@ -127,32 +292,81 @@ void SimpleRenderedISFNode::initMRTPass(RenderList& renderer, QRhiResourceUpdate { if(out.type == "depth") { - depthTex = rhi.newTexture( - QRhiTexture::D32F, sz, 1, - QRhiTexture::RenderTarget); + auto depthFmt = parseOutputFormat(out.format, QRhiTexture::D32F); + QRhiTexture::Flags dflags = QRhiTexture::RenderTarget; + if(maxLayers > 1) + { + dflags |= QRhiTexture::TextureArray; + depthTex = rhi.newTextureArray(depthFmt, maxLayers, sz, 1, dflags); + } + else + { + depthTex = rhi.newTexture(depthFmt, sz, 1, dflags); + } depthTex->setName(("SimpleRenderedISFNode::MRT::depth::" + out.name).c_str()); SCORE_ASSERT(depthTex->create()); } else { - auto* tex = rhi.newTexture( - QRhiTexture::RGBA8, sz, 1, - QRhiTexture::RenderTarget | QRhiTexture::UsedWithLoadStore); + auto fmt = parseOutputFormat(out.format, QRhiTexture::RGBA8); + QRhiTexture::Flags flags = QRhiTexture::RenderTarget | QRhiTexture::UsedWithLoadStore; + const int layers = std::max({1, out.layers, (wantMultiview ? mvCount : 1)}); + QRhiTexture* tex = nullptr; + if(layers > 1) + { + flags |= QRhiTexture::TextureArray; + tex = rhi.newTextureArray(fmt, layers, sz, 1, flags); + } + else + { + tex = rhi.newTexture(fmt, sz, 1, flags); + } tex->setName(("SimpleRenderedISFNode::MRT::color::" + out.name).c_str()); SCORE_ASSERT(tex->create()); colorTextures.push_back(tex); } } - if(colorTextures.empty()) + // Depth-only shader: the only output is depth. + if(colorTextures.empty() && depthTex) + { + // Build the RT AROUND the node-owned depth texture (which may be a + // TextureArray when maxLayers > 1). The previous code asked + // createDepthOnlyRenderTarget to allocate its own depth texture and then + // deleted it — but the render pass still referenced it (use-after-free), + // and textureForOutput() returned a texture that was never rendered to. + m_mrtRenderTarget = createDepthOnlyRenderTarget( + renderer.state, depthTex, mrtSamples, /*samplableDepth=*/true); + } + else if(wantMultiview && !colorTextures.empty()) + { + // Attach ALL color textures so attachments == pipeline blend targets. + m_mrtRenderTarget = createMultiViewRenderTarget( + renderer.state, + std::span{colorTextures.data(), colorTextures.size()}, + mvCount, depthTex, mrtSamples); + } + else if(maxLayers > 1 && !colorTextures.empty()) + { + // Pick layer 0 by default; per-pass LAYER is handled by the pass loop. + // Attach ALL color textures so attachments == pipeline blend targets. + m_mrtRenderTarget = createLayeredRenderTarget( + renderer.state, + std::span{colorTextures.data(), colorTextures.size()}, + 0, depthTex, mrtSamples); + } + else if(!colorTextures.empty()) + { + m_mrtRenderTarget = createRenderTarget( + renderer.state, + std::span{colorTextures.data(), colorTextures.size()}, + depthTex, + mrtSamples); + } + else + { return; - - // Create the multi-attachment render target - m_mrtRenderTarget = createRenderTarget( - renderer.state, - std::span{colorTextures.data(), colorTextures.size()}, - depthTex, - renderer.samples()); + } // Create the pipeline and pass using this render target QRhiBuffer* pubo = rhi.newBuffer( @@ -160,11 +374,39 @@ void SimpleRenderedISFNode::initMRTPass(RenderList& renderer, QRhiResourceUpdate pubo->setName("SimpleRenderedISFNode::initMRTPass::pubo"); pubo->create(); + // Extra bindings: storage + multiview UBO (same as initPass). + auto extraRhiBindings = buildExtraBindings(m_storage); + if(m_multiViewUBO) + { + int mvBinding = m_firstStorageBinding; + for(const auto& e : m_storage.ssbos) + { + if(e.binding >= 0) mvBinding = std::max(mvBinding, e.binding + 1); + if(e.prev_binding >= 0) mvBinding = std::max(mvBinding, e.prev_binding + 1); + } + for(const auto& e : m_storage.images) + if(e.binding >= 0) mvBinding = std::max(mvBinding, e.binding + 1); + + extraRhiBindings.append(QRhiShaderResourceBinding::uniformBuffer( + mvBinding, + QRhiShaderResourceBinding::VertexStage | QRhiShaderResourceBinding::FragmentStage, + m_multiViewUBO)); + } + + const auto& passes = n.descriptor().passes; + auto eff_state = mergeState( + n.descriptor().default_state, + passes.empty() ? isf::pipeline_state{} : passes[0].override_state); + try { auto [v, s] = score::gfx::makeShaders(renderer.state, n.m_vertexS, n.m_fragmentS); - auto pip = score::gfx::buildPipeline( - renderer, *m_mesh, v, s, m_mrtRenderTarget, pubo, m_materialUBO, allSamplers()); + auto pip = score::gfx::buildPipelineWithState( + renderer, *m_mesh, v, s, m_mrtRenderTarget, pubo, m_materialUBO, allSamplers(), + std::span( + extraRhiBindings.data(), (std::size_t)extraRhiBindings.size()), + eff_state, + wantMultiview ? mvCount : 0); if(pip.pipeline) { // Use nullptr edge — MRT passes are shared across all output edges @@ -181,82 +423,53 @@ void SimpleRenderedISFNode::initMRTPass(RenderList& renderer, QRhiResourceUpdate } } -void SimpleRenderedISFNode::initMRTBlitPasses(RenderList& renderer, QRhiResourceUpdateBatch& res) +void SimpleRenderedISFNode::initMRTBlitPass(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge& edge) { - static const constexpr auto blit_vs = R"_(#version 450 -layout(location = 0) in vec2 position; -layout(location = 1) in vec2 texcoord; -layout(location = 0) out vec2 v_texcoord; - -layout(std140, binding = 0) uniform renderer_t { - mat4 clipSpaceCorrMatrix; - vec2 renderSize; -} renderer; - -out gl_PerVertex { vec4 gl_Position; }; + QRhiTexture* srcTex = textureForOutput(*edge.source); + if(!srcTex) + return; -void main() -{ - v_texcoord = texcoord; - gl_Position = renderer.clipSpaceCorrMatrix * vec4(position.xy, 0.0, 1.); -#if defined(QSHADER_HLSL) || defined(QSHADER_MSL) - gl_Position.y = - gl_Position.y; -#endif -} -)_"; + auto rt = renderer.renderTargetForOutput(edge); + if(!rt.renderTarget) + return; - static const constexpr auto blit_fs = R"_(#version 450 -layout(std140, binding = 0) uniform renderer_t { - mat4 clipSpaceCorrMatrix; - vec2 renderSize; -} renderer; + auto [vertexS, fragmentS] = score::gfx::makeShaders(renderer.state, blit_vs, blit_fs); -layout(binding = 3) uniform sampler2D blitTexture; -layout(location = 0) in vec2 v_texcoord; -layout(location = 0) out vec4 fragColor; + QRhiSampler* sampler = renderer.state.rhi->newSampler( + QRhiSampler::Linear, QRhiSampler::Linear, QRhiSampler::None, + QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge); + sampler->setName("SimpleRenderedISFNode::MRT::blitSampler"); + sampler->create(); + m_blitSamplersByEdge[&edge] = sampler; -void main() { fragColor = texture(blitTexture, v_texcoord); } -)_"; + auto pip = score::gfx::buildPipeline( + renderer, *m_mesh, vertexS, fragmentS, rt, nullptr, nullptr, + std::array{Sampler{sampler, srcTex}}); - auto [vertexS, fragmentS] = score::gfx::makeShaders(renderer.state, blit_vs, blit_fs); + if(pip.pipeline) + { + m_passes.emplace_back(&edge, Pass{rt, pip, nullptr}); + } + else + { + m_blitSamplersByEdge.erase(&edge); + delete sampler; + } +} +void SimpleRenderedISFNode::initMRTBlitPasses(RenderList& renderer, QRhiResourceUpdateBatch& res) +{ // For each output port, create a blit pass for each downstream edge for(auto* output_port : n.output) { - QRhiTexture* srcTex = textureForOutput(*output_port); - if(!srcTex) - continue; - for(Edge* edge : output_port->edges) { - auto rt = renderer.renderTargetForOutput(*edge); - if(!rt.renderTarget) - continue; - - QRhiSampler* sampler = renderer.state.rhi->newSampler( - QRhiSampler::Linear, QRhiSampler::Linear, QRhiSampler::None, - QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge); - sampler->setName("SimpleRenderedISFNode::MRT::blitSampler"); - sampler->create(); - m_blitSamplers.push_back(sampler); - - auto pip = score::gfx::buildPipeline( - renderer, *m_mesh, vertexS, fragmentS, rt, nullptr, nullptr, - std::array{Sampler{sampler, srcTex}}); - - if(pip.pipeline) - { - m_passes.emplace_back(edge, Pass{rt, pip, nullptr}); - } - else - { - delete sampler; - } + initMRTBlitPass(renderer, res, *edge); } } } -void SimpleRenderedISFNode::init(RenderList& renderer, QRhiResourceUpdateBatch& res) +void SimpleRenderedISFNode::initState(RenderList& renderer, QRhiResourceUpdateBatch& res) { QRhi& rhi = *renderer.state.rhi; @@ -272,7 +485,7 @@ void SimpleRenderedISFNode::init(RenderList& renderer, QRhiResourceUpdateBatch& } } - // Create the material UBO + // Create the material UBO and upload initial data m_materialSize = n.m_materialSize; if(m_materialSize > 0) { @@ -280,6 +493,8 @@ void SimpleRenderedISFNode::init(RenderList& renderer, QRhiResourceUpdateBatch& = rhi.newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, m_materialSize); m_materialUBO->setName("SimpleRenderedISFNode::init::m_materialUBO"); SCORE_ASSERT(m_materialUBO->create()); + if(n.m_material_data) + res.updateDynamicBuffer(m_materialUBO, 0, m_materialSize, n.m_material_data.get()); } // Create the samplers @@ -287,110 +502,140 @@ void SimpleRenderedISFNode::init(RenderList& renderer, QRhiResourceUpdateBatch& SCORE_ASSERT(m_inputSamplers.empty()); SCORE_ASSERT(m_audioSamplers.empty()); - m_inputSamplers = initInputSamplers(this->n, renderer, n.input); + m_inputSamplers = initInputSamplers(this->n, renderer, n.input, &n.descriptor()); m_audioSamplers = initAudioTextures(renderer, n.m_audio_textures); - // Create the passes + // Collect graphics-visible storage buffers and images declared in the + // shader (storage_input with visibility=fragment/vertex/both, or + // csf_image_input with non-compute visibility). Bindings start right + // after the sampler bindings. + { + const int firstStorageBinding + = 3 + (int)m_inputSamplers.size() + (int)m_audioSamplers.size(); + m_firstStorageBinding = firstStorageBinding; + collectGraphicsStorageResources(n.descriptor(), firstStorageBinding, m_storage); + } + + // Allocate the multiview UBO when MULTIVIEW >= 2 is declared. + if(n.descriptor().multiview_count >= 2) + { + const int mvCount = n.descriptor().multiview_count; + m_multiViewUBO = rhi.newBuffer( + QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, + sizeof(float[16]) * mvCount); + m_multiViewUBO->setName("SimpleRenderedISFNode::multiview_ubo"); + SCORE_ASSERT(m_multiViewUBO->create()); + } + // Count outputs to determine if we need MRT { const auto& outputs = n.descriptor().outputs; int colorCount = 0; bool hasDepth = false; + bool hasLayered = false; for(const auto& out : outputs) { if(out.type == "depth") hasDepth = true; else colorCount++; + if(out.layers > 1) + hasLayered = true; } - // MRT is only needed for multiple color attachments or depth output - m_hasMRT = colorCount > 1 || hasDepth; + // MRT is needed for multiple color attachments, depth output, or layered + // output (TextureArray). Multiview also requires the MRT path. + m_hasMRT = colorCount > 1 || hasDepth || hasLayered + || n.descriptor().multiview_count >= 2; } + m_initialized = true; +} + +void SimpleRenderedISFNode::addOutputPass(RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) +{ if(m_hasMRT) { - // MRT: create internal render target, render in runInitialPasses, - // then blit to downstream render targets in runRenderPass - initMRTPass(renderer, res); + // Create the shared MRT internal render target on first output edge + if(m_mrtRenderTarget.texture == nullptr) + { + initMRTPass(renderer, res); + } - // Create blit passes for each downstream edge across all output ports - initMRTBlitPasses(renderer, res); + // Create the blit pass for this single edge + initMRTBlitPass(renderer, res, edge); } else { - // Default single-output path (also handles OUTPUTS with a single color) - if(n.output[0]->edges.empty()) - qDebug(" WTF EMPTY"); - for(Edge* edge : n.output[0]->edges) + auto rt = renderer.renderTargetForOutput(edge); + if(rt.renderTarget) { - auto rt = renderer.renderTargetForOutput(*edge); - if(rt.renderTarget) - { - initPass(rt, renderer, *edge); - } - else - { - qDebug("WTF NO RT"); - } + initPass(rt, renderer, edge, res); } } } -void SimpleRenderedISFNode::update( - RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) +void SimpleRenderedISFNode::removeOutputPass(RenderList& renderer, Edge& edge) { - m_mrtRenderedThisFrame = false; - - n.standardUBO.passIndex = 0; - n.standardUBO.frameIndex++; - auto sz = renderer.renderSize(edge); - n.standardUBO.renderSize[0] = sz.width(); - n.standardUBO.renderSize[1] = sz.height(); - - // Update audio textures - if(!n.m_audio_textures.empty() && !m_audioTex) + // Find and erase the pass for this edge + auto it = ossia::find_if(m_passes, [&](auto& p) { return p.first == &edge; }); + if(it != m_passes.end()) { - m_audioTex.emplace(); + it->second.p.release(); + if(it->second.processUBO) + it->second.processUBO->deleteLater(); + m_passes.erase(it); } - bool audioChanged = false; - for(auto& audio : n.m_audio_textures) + if(m_hasMRT) { - if(std::optional sampl - = m_audioTex->updateAudioTexture(audio, renderer, n.m_material_data.get(), res)) + // Release the blit sampler for this edge + auto sit = m_blitSamplersByEdge.find(&edge); + if(sit != m_blitSamplersByEdge.end()) { - // Texture changed -> material changed - audioChanged = true; + delete sit->second; + m_blitSamplersByEdge.erase(sit); + } - auto& [rhiSampler, tex] = *sampl; - for(auto& [e, pass] : m_passes) + // If no more blit passes remain (only the shared MRT pass with nullptr edge), + // release MRT resources + bool hasBlitPasses = false; + for(auto& [e, pass] : m_passes) + { + if(e != nullptr) { - score::gfx::replaceTexture( - *pass.p.srb, rhiSampler, tex ? tex : &renderer.emptyTexture()); + hasBlitPasses = true; + break; } } + if(!hasBlitPasses) + { + // Remove the shared MRT pass + auto mrtIt = ossia::find_if(m_passes, [](auto& p) { return p.first == nullptr; }); + if(mrtIt != m_passes.end()) + { + mrtIt->second.p.release(); + if(mrtIt->second.processUBO) + mrtIt->second.processUBO->deleteLater(); + m_passes.erase(mrtIt); + } + m_mrtRenderTarget.release(); + } } +} - // Update material - if(m_materialUBO && m_materialSize > 0 && (materialChanged || audioChanged)) - { - char* data = n.m_material_data.get(); - res.updateDynamicBuffer(m_materialUBO, 0, m_materialSize, data); - } - - // Update all the process UBOs - for(auto& [e, pass] : m_passes) - { - if(pass.processUBO) - res.updateDynamicBuffer( - pass.processUBO, 0, sizeof(ProcessUBO), &this->n.standardUBO); - } +bool SimpleRenderedISFNode::hasOutputPassForEdge(Edge& edge) const +{ + return ossia::find_if(m_passes, [&](const auto& p) { return p.first == &edge; }) + != m_passes.end(); } -void SimpleRenderedISFNode::release(RenderList& r) +void SimpleRenderedISFNode::releaseState(RenderList& r) { - // customRelease + if(!m_initialized) + return; + + // Release all remaining passes { for(auto& texture : n.m_audio_textures) { @@ -430,11 +675,11 @@ void SimpleRenderedISFNode::release(RenderList& r) // texture is deleted elsewhere } m_audioSamplers.clear(); - for(auto sampler : m_blitSamplers) + for(auto& [edge, sampler] : m_blitSamplersByEdge) { delete sampler; } - m_blitSamplers.clear(); + m_blitSamplersByEdge.clear(); delete m_materialUBO; m_materialUBO = nullptr; @@ -447,6 +692,145 @@ void SimpleRenderedISFNode::release(RenderList& r) m_mrtRenderTarget.release(); m_hasMRT = false; } + + // Release storage resources (owned SSBOs + storage images). + m_storage.release(); + + if(m_multiViewUBO) + { + m_multiViewUBO->deleteLater(); + m_multiViewUBO = nullptr; + } + + m_initialized = false; +} + +void SimpleRenderedISFNode::addInputEdge(RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) +{ + if(edge.sink->type == Types::Image) + { + // Find upstream texture + if(auto it = edge.source->node->renderedNodes.find(&renderer); + it != edge.source->node->renderedNodes.end()) + { + if(auto* tex = it->second->textureForOutput(*edge.source)) + { + auto rt = renderer.renderTargetForInputPort(*edge.sink); + updateInputTexture(*edge.sink, tex, rt.depthTexture); + } + } + } +} + +void SimpleRenderedISFNode::removeInputEdge(RenderList& renderer, Edge& edge) +{ + if(edge.sink->type == Types::Image) + { + // Ports declared with DEPTH: true have a second sampler binding for the + // `_depth` companion. When the cable is removed, the upstream renderer + // is often released immediately after — so the depth sampler's cached + // QRhiTexture* becomes a dangling pointer. Pass an empty-texture + // placeholder for the depth side too so the SRB never holds a freed + // VkImageView. Without this, vkUpdateDescriptorSets / end-of-frame + // pipeline barrier both crash on the stale handle. + const bool hasDepthCompanion + = (edge.sink->flags & Flag::SamplableDepth) == Flag::SamplableDepth; + QRhiTexture* depthFallback + = hasDepthCompanion ? &renderer.emptyTexture() : nullptr; + updateInputTexture(*edge.sink, &renderer.emptyTexture(), depthFallback); + } +} + +void SimpleRenderedISFNode::init(RenderList& renderer, QRhiResourceUpdateBatch& res) +{ + initState(renderer, res); + + for(auto* out_port : n.output) + for(auto* edge : out_port->edges) + addOutputPass(renderer, *edge, res); +} + +void SimpleRenderedISFNode::update( + RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) +{ + m_mrtRenderedThisFrame = false; + + n.standardUBO.passIndex = 0; + n.standardUBO.frameIndex++; + auto sz = renderer.renderSize(edge); + n.standardUBO.renderSize[0] = sz.width(); + n.standardUBO.renderSize[1] = sz.height(); + + // Update audio textures + if(!n.m_audio_textures.empty() && !m_audioTex) + { + m_audioTex.emplace(); + } + + bool audioChanged = false; + std::size_t audio_idx = 0; + for(auto& audio : n.m_audio_textures) + { + if(std::optional sampl + = m_audioTex->updateAudioTexture(audio, renderer, n.m_material_data.get(), res)) + { + // Texture changed -> material changed + audioChanged = true; + + auto& [rhiSampler, tex, fb_] = *sampl; + // Keep m_audioSamplers[i].texture in sync with the live GPU texture so + // any later pipeline rebuild (e.g. rt_changed path in RenderList::render + // triggering removeOutputPass + addOutputPass) uses the live binding + // instead of the placeholder empty texture. + if(audio_idx < m_audioSamplers.size()) + m_audioSamplers[audio_idx].texture = tex; + + for(auto& [e, pass] : m_passes) + { + score::gfx::replaceTexture( + *pass.p.srb, rhiSampler, tex ? tex : &renderer.emptyTexture()); + } + } + ++audio_idx; + } + + // Update material + if(m_materialUBO && m_materialSize > 0 && (materialChanged || audioChanged)) + { + char* data = n.m_material_data.get(); + res.updateDynamicBuffer(m_materialUBO, 0, m_materialSize, data); + } + materialChanged = false; + + // Reset event ports now that the UBO has captured their pulse value. + // If anything fired, force next frame's upload so the reset-to-zero + // propagates out through the normally-gated upload path. + if(n.resetEventPortsAfterFrame()) + materialChanged = true; + + // Re-bind upstream buffers (UBOs / read-only SSBOs sourced from upstream + // ports). Cables can be added or replaced after init, so this must run + // every frame. We pass each pass's SRB so that buffer swaps patch the + // descriptor set in place; without this, uniform_input cables connected + // post-init never reach the shader and the placeholder UBO stays bound + // (zero-filled → degenerate matrices on the GPU). + for(auto& [e, pass] : m_passes) + { + bindUpstreamBuffers(renderer, n.input, m_storage, pass.p.srb); + } + + // Update all the process UBOs + for(auto& [e, pass] : m_passes) + { + if(pass.processUBO) + res.updateDynamicBuffer( + pass.processUBO, 0, sizeof(ProcessUBO), &this->n.standardUBO); + } +} + +void SimpleRenderedISFNode::release(RenderList& r) +{ + releaseState(r); } void SimpleRenderedISFNode::runInitialPasses( @@ -469,19 +853,27 @@ void SimpleRenderedISFNode::runInitialPasses( SCORE_ASSERT(pass.p.srb); cb.beginPass( - pass.renderTarget.renderTarget, Qt::transparent, {1.0f, 0}, updateBatch); + pass.renderTarget.renderTarget, Qt::transparent, {0.0f, 0}, updateBatch); updateBatch = nullptr; cb.setGraphicsPipeline(pass.p.pipeline); cb.setShaderResources(pass.p.srb); - auto* tex = pass.renderTarget.texture; - cb.setViewport(QRhiViewport( - 0, 0, tex->pixelSize().width(), tex->pixelSize().height())); + auto* tex = pass.renderTarget.texture ? pass.renderTarget.texture + : pass.renderTarget.depthTexture; + if(tex) + { + cb.setViewport(QRhiViewport( + 0, 0, tex->pixelSize().width(), tex->pixelSize().height())); + } - m_mesh->draw(this->m_meshBuffer, cb); + drawMeshWithOptionalIndirect(*m_mesh, this->m_meshBuffer, cb); cb.endPass(); + + // Persistent SSBO ping-pong: swap current and previous for next frame. + if(pass.p.srb) + swapPersistentSSBOs(m_storage, *pass.p.srb); } void SimpleRenderedISFNode::runRenderPass( @@ -523,10 +915,7 @@ void SimpleRenderedISFNode::runRenderPass( auto it = ossia::find_if(this->m_passes, [&](auto& p) { return p.first == &edge; }); // Maybe the shader could not be created if(it == this->m_passes.end()) - { - qDebug(" NO PASS FOUND"); return; - } auto& pass = it->second; @@ -545,11 +934,17 @@ void SimpleRenderedISFNode::runRenderPass( { cb.setGraphicsPipeline(pipeline); cb.setShaderResources(srb); - cb.setViewport(QRhiViewport( - 0, 0, texture->pixelSize().width(), texture->pixelSize().height())); + if(texture) + { + cb.setViewport(QRhiViewport( + 0, 0, texture->pixelSize().width(), texture->pixelSize().height())); + } - m_mesh->draw(this->m_meshBuffer, cb); + drawMeshWithOptionalIndirect(*m_mesh, this->m_meshBuffer, cb); } + + // Persistent SSBO ping-pong for next frame. + swapPersistentSSBOs(m_storage, *srb); } } diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/SimpleRenderedISFNode.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/SimpleRenderedISFNode.hpp index 1a832c3280..539827e3db 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/SimpleRenderedISFNode.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/SimpleRenderedISFNode.hpp @@ -1,9 +1,12 @@ #pragma once #include +#include #include #include +#include + namespace score::gfx { // Used for the simple case of a single, non-persistent pass (the most common case) @@ -14,13 +17,22 @@ struct SimpleRenderedISFNode : score::gfx::NodeRenderer virtual ~SimpleRenderedISFNode(); - void updateInputTexture(const Port& input, QRhiTexture* tex) override; + void updateInputTexture(const Port& input, QRhiTexture* tex, QRhiTexture* depthTex = nullptr) override; + void updateInputSamplerFilter(const Port& input, const RenderTargetSpecs& spec) override; QRhiTexture* textureForOutput(const Port& output) override; void init(RenderList& renderer, QRhiResourceUpdateBatch& res) override; void update(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) override; void release(RenderList& r) override; + void initState(RenderList& renderer, QRhiResourceUpdateBatch& res) override; + void releaseState(RenderList& r) override; + void addOutputPass(RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) override; + void removeOutputPass(RenderList& renderer, Edge& edge) override; + bool hasOutputPassForEdge(Edge& edge) const override; + void addInputEdge(RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) override; + void removeInputEdge(RenderList& renderer, Edge& edge) override; + void runInitialPasses( RenderList&, QRhiCommandBuffer& commands, QRhiResourceUpdateBatch*& res, Edge& edge) override; @@ -28,9 +40,12 @@ struct SimpleRenderedISFNode : score::gfx::NodeRenderer void runRenderPass(RenderList&, QRhiCommandBuffer& commands, Edge& edge) override; private: - void initPass(const TextureRenderTarget& rt, RenderList& renderer, Edge& edge); + void initPass( + const TextureRenderTarget& rt, RenderList& renderer, Edge& edge, + QRhiResourceUpdateBatch& res); void initMRTPass(RenderList& renderer, QRhiResourceUpdateBatch& res); void initMRTBlitPasses(RenderList& renderer, QRhiResourceUpdateBatch& res); + void initMRTBlitPass(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge& edge); std::vector allSamplers() const noexcept; @@ -40,7 +55,7 @@ struct SimpleRenderedISFNode : score::gfx::NodeRenderer std::vector m_inputSamplers; std::vector m_audioSamplers; - std::vector m_blitSamplers; + ossia::small_flat_map m_blitSamplersByEdge; const Mesh* m_mesh{}; MeshBuffers m_meshBuffer{}; @@ -54,5 +69,15 @@ struct SimpleRenderedISFNode : score::gfx::NodeRenderer TextureRenderTarget m_mrtRenderTarget; bool m_hasMRT{false}; bool m_mrtRenderedThisFrame{false}; + + // Graphics-visible storage buffers / images (see IsfBindingsBuilder). + GraphicsStorageResources m_storage; + + // Multiview UBO: N × mat4 view-projection matrices uploaded per frame. + QRhiBuffer* m_multiViewUBO{}; + + // Cached number of bindings consumed by storage resources (recorded in + // initState so that runtime buffer rebinds can reuse the same layout). + int m_firstStorageBinding{-1}; }; } diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/TexgenNode.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/TexgenNode.hpp index 08dbb3d1bf..57841faad9 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/TexgenNode.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/TexgenNode.hpp @@ -67,10 +67,10 @@ struct TexgenNode : NodeModel ~Rendered() { } QRhiTexture* texture{}; - void init(RenderList& renderer, QRhiResourceUpdateBatch& res) override + void initState(RenderList& renderer, QRhiResourceUpdateBatch& res) override { - const auto& mesh = renderer.defaultTriangle(); - defaultMeshInit(renderer, mesh, res); + m_mesh = &renderer.defaultTriangle(); + defaultMeshInit(renderer, *m_mesh, res); processUBOInit(renderer); m_material.init(renderer, node.input, m_samplers); std::tie(m_vertexS, m_fragmentS) @@ -93,7 +93,8 @@ struct TexgenNode : NodeModel sampler->create(); m_samplers.push_back({sampler, texture}); } - defaultPassesInit(renderer, mesh); + + m_initialized = true; } void update( @@ -116,8 +117,8 @@ struct TexgenNode : NodeModel QRhiTexture::RGBA8, sz, 1, QRhiTexture::Flag{}); newtex->create(); for(auto& [edge, pass] : this->m_p) - if(pass.srb) - score::gfx::replaceTexture(*pass.srb, m_samplers[0].sampler, newtex); + if(pass.p.srb) + score::gfx::replaceTexture(*pass.p.srb, m_samplers[0].sampler, newtex); texture = newtex; if(oldtex && oldtex != &renderer.emptyTexture()) @@ -139,12 +140,15 @@ struct TexgenNode : NodeModel } } - void release(RenderList& r) override + void releaseState(RenderList& r) override { - texture->deleteLater(); - texture = nullptr; + if(texture) + { + texture->deleteLater(); + texture = nullptr; + } - defaultRelease(r); + GenericNodeRenderer::releaseState(r); } int t = 0; diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/TextNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/TextNode.cpp index b0c5c7f7ed..0b52aae4bc 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/TextNode.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/TextNode.cpp @@ -112,11 +112,11 @@ class TextNode::Renderer : public GenericNodeRenderer m_uploaded = false; } - void init(RenderList& renderer, QRhiResourceUpdateBatch& res) override + void initState(RenderList& renderer, QRhiResourceUpdateBatch& res) override { rerender(); - const auto& mesh = renderer.defaultQuad(); - defaultMeshInit(renderer, mesh, res); + m_mesh = &renderer.defaultQuad(); + defaultMeshInit(renderer, *m_mesh, res); processUBOInit(renderer); m_material.init(renderer, node.input, m_samplers); std::tie(m_vertexS, m_fragmentS) = score::gfx::makeShaders( @@ -145,7 +145,7 @@ class TextNode::Renderer : public GenericNodeRenderer m_samplers.push_back({sampler, tex}); } - defaultPassesInit(renderer, mesh); + m_initialized = true; } void update(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) override @@ -179,7 +179,7 @@ class TextNode::Renderer : public GenericNodeRenderer defaultRenderPass(renderer, mesh, cb, edge); } - void release(RenderList& r) override + void releaseState(RenderList& r) override { for(auto tex : m_textures) { @@ -187,7 +187,7 @@ class TextNode::Renderer : public GenericNodeRenderer } m_textures.clear(); - defaultRelease(r); + GenericNodeRenderer::releaseState(r); } QImage m_img; diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/TextureLoader.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/TextureLoader.cpp new file mode 100644 index 0000000000..f6fb9a5249 --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/TextureLoader.cpp @@ -0,0 +1,190 @@ +#include +#include + +#include + +#include +#include +#include + +#include + +namespace score::gfx +{ + +// ----------------------------------------------------------------------------- +// CPU decode +// ----------------------------------------------------------------------------- + +std::optional decodeImageFromPath(const QString& path) +{ + // Decode straight off disk. We previously reused Gfx::ImageCache here, but + // that cache is refcounted and TextureLoader never released its acquisition, + // so every unique path ever decoded leaked one QImage for the program + // lifetime (drag-drop reloads, library scans, image_input swaps all bled + // memory). The TextureCache below already de-duplicates per-renderer GPU + // uploads, and AssetTable handles cross-output dedup keyed on content hash, + // so the extra CPU-side cache layer wasn't pulling its weight. + QImage img(path); + if(img.isNull()) + return std::nullopt; + + DecodedImage out; + out.image = std::move(img); + // Canonical RGBA8888 layout so QRhi's RGBA8 textures sample correctly. + if(out.image.format() != QImage::Format_RGBA8888) + out.image.convertTo(QImage::Format_RGBA8888); + out.debug_name = path; + return out; +} + +std::optional decodeImageFromMemory( + const QByteArray& bytes, const QString& mime_hint) +{ + QImage img; + // QImage::loadFromData accepts a format hint as a const char* (e.g. "PNG"). + // Strip the "image/" prefix from the MIME type if present, then upper-case. + QByteArray fmt; + if(!mime_hint.isEmpty()) + { + QString s = mime_hint; + if(s.startsWith("image/")) + s = s.mid(6); + fmt = s.toUpper().toLatin1(); + } + if(!img.loadFromData(bytes, fmt.isEmpty() ? nullptr : fmt.constData())) + return std::nullopt; + + DecodedImage out; + out.image = std::move(img); + if(out.image.format() != QImage::Format_RGBA8888) + out.image.convertTo(QImage::Format_RGBA8888); + out.debug_name = QStringLiteral("blob:") + mime_hint; + return out; +} + +// ----------------------------------------------------------------------------- +// GPU upload +// ----------------------------------------------------------------------------- + +QRhiTexture* uploadImageToTexture( + QRhi& rhi, QRhiResourceUpdateBatch& batch, const QImage& img, bool srgb, + const QString& debug_name) +{ + if(img.isNull()) + return nullptr; + + // sRGB is a Flag bit (not a separate format) — Qt RHI allocates an RGBA8 + // texture with sRGB sampling semantics when the flag is present. + // MipMapped + UsedWithGenerateMips: required for the generateMips() call + // below. Without a mip chain, sampling a high-resolution material texture + // (Sponza floor at distance, etc.) point-samples the base level at sub- + // pixel rate → uniform noise / TV-static aliasing. + QRhiTexture::Flags flags + = QRhiTexture::MipMapped | QRhiTexture::UsedWithGenerateMips; + if(srgb) + flags |= QRhiTexture::sRGB; + // sampleCount=1 (no MSAA on a sampled material texture). The mip count + // itself is implicit — set by the MipMapped flag and floor(log2(max(w,h)))+1. + auto* tex = rhi.newTexture(QRhiTexture::RGBA8, img.size(), 1, flags); + if(!tex) + return nullptr; + if(!debug_name.isEmpty()) + tex->setName(debug_name.toUtf8()); + if(!tex->create()) + { + delete tex; + return nullptr; + } + // QRhi accepts QImage directly; format conversion is handled internally. + batch.uploadTexture(tex, img); + // Filter the base level into the mip chain. Cheap (one-shot, on first + // upload) and unblocks min-filter-linear-mipmap-linear sampling on the + // material samplers — kills the floor-noise aliasing. + batch.generateMips(tex); + return tex; +} + +// ----------------------------------------------------------------------------- +// One-shot helpers +// ----------------------------------------------------------------------------- + +QRhiTexture* loadAndUploadTexture( + QRhi& rhi, QRhiResourceUpdateBatch& batch, const QString& path, bool srgb) +{ + auto decoded = decodeImageFromPath(path); + if(!decoded) + return nullptr; + return uploadImageToTexture( + rhi, batch, decoded->image, srgb, decoded->debug_name); +} + +QRhiTexture* loadAndUploadTexture( + QRhi& rhi, QRhiResourceUpdateBatch& batch, const QByteArray& bytes, + const QString& mime_hint, bool srgb) +{ + auto decoded = decodeImageFromMemory(bytes, mime_hint); + if(!decoded) + return nullptr; + return uploadImageToTexture( + rhi, batch, decoded->image, srgb, decoded->debug_name); +} + +// ----------------------------------------------------------------------------- +// TextureCache +// ----------------------------------------------------------------------------- + +std::size_t TextureCache::KeyHash::operator()(const Key& k) const noexcept +{ + std::size_t seed = hash_qstring(k.origin); + ossia::hash_combine(seed, (uint8_t)(k.srgb ? 1 : 0)); + return seed; +} + +TextureCache::~TextureCache() +{ + clear(); +} + +void TextureCache::clear() +{ + for(auto& [key, tex] : m_textures) + if(tex) + tex->deleteLater(); + m_textures.clear(); +} + +QRhiTexture* TextureCache::acquireFromPath( + QRhi& rhi, QRhiResourceUpdateBatch& batch, const QString& path, bool srgb) +{ + if(path.isEmpty()) + return nullptr; + Key k{path, srgb}; + if(auto it = m_textures.find(k); it != m_textures.end()) + return it->second; + + auto* tex = loadAndUploadTexture(rhi, batch, path, srgb); + if(tex) + m_textures.emplace(std::move(k), tex); + // Decode failures are not cached — let the next call retry. Caller + // handles the nullptr return as the "missing texture" fallback. + return tex; +} + +QRhiTexture* TextureCache::acquireFromMemory( + QRhi& rhi, QRhiResourceUpdateBatch& batch, const QByteArray& bytes, + const QString& mime_hint, uint64_t content_hash, bool srgb) +{ + Key k{ + QStringLiteral("blob:") + QString::number(content_hash, 16), + srgb}; + if(auto it = m_textures.find(k); it != m_textures.end()) + return it->second; + + auto* tex = loadAndUploadTexture(rhi, batch, bytes, mime_hint, srgb); + if(tex) + m_textures.emplace(std::move(k), tex); + return tex; +} + +} // namespace score::gfx diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/TextureLoader.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/TextureLoader.hpp new file mode 100644 index 0000000000..e670b5e80c --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/TextureLoader.hpp @@ -0,0 +1,132 @@ +#pragma once +#include + +#include +#include + +#include +#include +#include + +class QRhi; +class QRhiTexture; +class QRhiResourceUpdateBatch; +class QByteArray; + +namespace score::gfx +{ + +// ============================================================================= +// CPU-side decode result. +// +// The default implementation produces RGBA8888 data via QImageReader. The +// `srgb` flag is metadata only — it does NOT alter the pixel bytes, it just +// records whether the caller intends those bytes to be interpreted as sRGB +// when the texture is sampled (set the QRhiTexture format to RGBA8 vs sRGB8A8 +// at upload time accordingly). +// +// Future swap-in candidates: OIIO (HDR/EXR), KTX2 (transcoded BCn), AVIF. +// ============================================================================= +struct DecodedImage +{ + QImage image; // QImage::Format_RGBA8888 (no premul) + QString debug_name; // For QRhiTexture::setName() +}; + +// ============================================================================= +// Decode helpers — synchronous, called on the render thread. +// +// Both variants decode directly with QImage; cross-output dedup is handled at +// the TextureCache (per-renderer GPU side) and AssetTable (content-hash +// keyed) layers. We don't share a CPU-side cache here — the previous reuse +// of Gfx::ImageCache leaked every decoded path for the program lifetime. +// ============================================================================= + +SCORE_PLUGIN_GFX_EXPORT +std::optional decodeImageFromPath(const QString& path); + +SCORE_PLUGIN_GFX_EXPORT +std::optional decodeImageFromMemory( + const QByteArray& bytes, const QString& mime_hint); + +// ============================================================================= +// GPU upload — pure RHI, no I/O. Allocates a freshly-sized QRhiTexture +// (RGBA8 or sRGB8_ALPHA8 depending on `srgb`), records the upload into +// `batch`. Caller owns the returned pointer (delete via deleteLater()). +// +// Returns nullptr on QRhi allocation failure. +// ============================================================================= + +SCORE_PLUGIN_GFX_EXPORT +QRhiTexture* uploadImageToTexture( + QRhi& rhi, QRhiResourceUpdateBatch& batch, const QImage& img, bool srgb, + const QString& debug_name = {}); + +// ============================================================================= +// One-shot decode + upload helpers. Convenience for callers that don't need +// to reuse the decoded CPU bytes. +// ============================================================================= + +SCORE_PLUGIN_GFX_EXPORT +QRhiTexture* loadAndUploadTexture( + QRhi& rhi, QRhiResourceUpdateBatch& batch, const QString& path, bool srgb); + +SCORE_PLUGIN_GFX_EXPORT +QRhiTexture* loadAndUploadTexture( + QRhi& rhi, QRhiResourceUpdateBatch& batch, const QByteArray& bytes, + const QString& mime_hint, bool srgb); + +// ============================================================================= +// Per-renderer GPU texture cache. +// +// QRhiTexture* is bound to one QRhi instance, so this cache MUST live on the +// render-side node (e.g. ScenePreprocessorNode), not as a global singleton. Owns +// the textures it returns; clear() (also runs in the dtor) schedules each via +// deleteLater(). +// +// Keys: a file path OR a stable content hash (for embedded glTF/FBX blobs). +// Two entries with the same origin but different sRGB flags coexist. +// ============================================================================= +class SCORE_PLUGIN_GFX_EXPORT TextureCache +{ +public: + TextureCache() = default; + ~TextureCache(); + + TextureCache(const TextureCache&) = delete; + TextureCache& operator=(const TextureCache&) = delete; + TextureCache(TextureCache&&) noexcept = default; + TextureCache& operator=(TextureCache&&) noexcept = default; + + // First call decodes + uploads via `batch`; later calls hit the cache. + // Returns nullptr if the file can't be decoded. + QRhiTexture* acquireFromPath( + QRhi& rhi, QRhiResourceUpdateBatch& batch, const QString& path, bool srgb); + + // Same, for embedded blobs. `content_hash` is supplied by the caller — its + // identity (not its value) is what guards re-upload. + QRhiTexture* acquireFromMemory( + QRhi& rhi, QRhiResourceUpdateBatch& batch, const QByteArray& bytes, + const QString& mime_hint, uint64_t content_hash, bool srgb); + + // Schedule deleteLater() on every owned texture and drop the map. + void clear(); + + std::size_t size() const noexcept { return m_textures.size(); } + +private: + struct Key + { + QString origin; // file path, or "blob:" for memory blobs + bool srgb{}; + bool operator==(const Key&) const noexcept = default; + }; + struct KeyHash + { + std::size_t operator()(const Key& k) const noexcept; + }; + + std::unordered_map m_textures; +}; + +} // namespace score::gfx diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/Uniforms.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/Uniforms.hpp index 74000ee39b..f8c5cb4090 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/Uniforms.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/Uniforms.hpp @@ -20,6 +20,7 @@ enum class Types : int8_t Camera, Geometry, Buffer, + Scene, }; enum class Flag : uint32_t @@ -27,7 +28,32 @@ enum class Flag : uint32_t // Grabs texture at the source instead of // asking it to render. Used for instance to get cubemap textures. GrabsFromSource = (1 << 0), - SamplableDepth = (1 << 1) + SamplableDepth = (1 << 1), + + // Sink expects a sampler2DArray (texture carries multiple layers). + TextureArray = (1 << 2), + + // Sink expects imageLoad/imageStore (storage image) rather than sampledTexture. + StorageImage = (1 << 3), + + // Buffer port carries indirect-draw arguments (QRhiDrawIndirectCommand[]). + IndirectDraw = (1 << 4), + + // Image port is a multiview texture array (one layer per view). + MultiView = (1 << 5), + + // Output port produces only depth (no color attachment). + DepthOnly = (1 << 6), + + // Buffer port is bound as a uniform buffer (UBO, std140) rather than as a + // storage buffer (SSBO, std430). Used for `uniform_input` from upstream. + UniformBuffer = (1 << 7), + + // Sink expects a sampler3D (texture is a 3D volume). + ThreeDimensional = (1 << 8), + + // Sink expects a samplerCube. + Cubemap = (1 << 9), }; static constexpr inline Flag operator&(Flag lhs, Flag rhs) diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/Utils.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/Utils.cpp index c103e2e47d..e92268c89f 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/Utils.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/Utils.cpp @@ -1,10 +1,17 @@ #include #include +#include #include #include +#include +#include + +#include #include +#include + namespace score::gfx { TextureRenderTarget @@ -27,7 +34,7 @@ createRenderTarget(const RenderState& state, QRhiTexture* tex, int samples, bool bool useDepthResolve = false; if(samplableDepth && samples > 1) { -#if QT_VERSION >= QT_VERSION_CHECK(6, 7, 0) +#if QT_VERSION >= QT_VERSION_CHECK(6, 8, 0) useDepthResolve = state.rhi->isFeatureSupported(QRhi::ResolveDepthStencil); #endif if(!useDepthResolve) @@ -67,7 +74,7 @@ createRenderTarget(const RenderState& state, QRhiTexture* tex, int samples, bool if(useDepthResolve) { -#if QT_VERSION >= QT_VERSION_CHECK(6, 7, 0) +#if QT_VERSION >= QT_VERSION_CHECK(6, 8, 0) // Multisample depth attachment used during rendering; resolves into // ret.depthTexture at endPass(). Owned via ret.msDepthTexture so it // is released alongside the rest of the RT. @@ -88,12 +95,18 @@ createRenderTarget(const RenderState& state, QRhiTexture* tex, int samples, bool } else if(depth) { - ret.depthRenderBuffer = state.rhi->newRenderBuffer( - QRhiRenderBuffer::DepthStencil, tex->pixelSize(), effectiveSamples); - ret.depthRenderBuffer->setName("createRenderTarget::ret.depthRenderBuffer"); - SCORE_ASSERT(ret.depthRenderBuffer->create()); + // Reverse-Z project rule: intermediate 3D render targets always use + // D32F float depth. D24 fixed-point combined with reverse-Z yields + // strictly worse precision than standard-Z would, so renderbuffer + // depth is no longer an option here. Stencil is dropped (no shader in + // the codebase currently uses it — revisit via D32FS8 if needed). + ret.depthTexture = state.rhi->newTexture( + QRhiTexture::D32F, tex->pixelSize(), effectiveSamples, + QRhiTexture::RenderTarget); + ret.depthTexture->setName("createRenderTarget::depthTexture (D32F, non-samplable)"); + SCORE_ASSERT(ret.depthTexture->create()); - desc.setDepthStencilBuffer(ret.depthRenderBuffer); + desc.setDepthTexture(ret.depthTexture); } auto renderTarget = state.rhi->newTextureRenderTarget(desc); @@ -147,7 +160,7 @@ TextureRenderTarget createRenderTarget( bool useDepthResolve = false; if(depthTex && samples > 1) { -#if QT_VERSION >= QT_VERSION_CHECK(6, 7, 0) +#if QT_VERSION >= QT_VERSION_CHECK(6, 8, 0) useDepthResolve = state.rhi->isFeatureSupported(QRhi::ResolveDepthStencil); #endif if(!useDepthResolve) @@ -188,7 +201,7 @@ TextureRenderTarget createRenderTarget( #if QT_VERSION >= QT_VERSION_CHECK(6, 6, 0) if(useDepthResolve) { -#if QT_VERSION >= QT_VERSION_CHECK(6, 7, 0) +#if QT_VERSION >= QT_VERSION_CHECK(6, 8, 0) // Multisample depth attachment used during rendering, resolves into // the caller-supplied depthTex on endPass(). We own msDepthTexture. ret.msDepthTexture = state.rhi->newTexture( @@ -233,6 +246,16 @@ TextureRenderTarget createRenderTarget( return ret; } +// NOTE on the reinterpret_cast below (and in +// replaceSampler / replaceTexture / etc.): QRhiShaderResourceBinding stores its +// payload in a private nested ::Data whose only public accessor is the const +// data() method — there is no public mutator. We rebind buffers/samplers/ +// textures in-place by casting the binding to its layout-compatible private +// Data. This relies on QRhiShaderResourceBinding being a thin wrapper whose +// first (and only) data member IS that Data struct; that layout has been stable +// across Qt 6.4..dev, but it is NOT a guaranteed/forward-compatible ABI. If a +// future Qt reorders QRhiShaderResourceBinding's members this will silently +// corrupt bindings — revisit if QRhi ever exposes a public mutating accessor. void replaceBuffer( std::vector& tmp, int binding, QRhiBuffer* newBuffer) { @@ -278,6 +301,16 @@ void replaceSampler( void replaceTexture( std::vector& tmp, int binding, QRhiTexture* newTexture) { + // Defensive null-guard — writing a null texture into a + // sampledTexture / ImageLoad binding crashes the next + // vkUpdateDescriptorSets. Callers that genuinely want to "detach" a + // texture should call replaceTexture with an empty-fallback from the + // RenderList (renderer.emptyTexture() / …Array() / …Cube() / …3D()) + // that matches the sampler's kind. When this is reached with null, + // leave the existing binding in place so the pass keeps working + // with whatever it had last. + if(!newTexture) + return; for(QRhiShaderResourceBinding& b : tmp) { auto d = reinterpret_cast(&b); @@ -300,6 +333,17 @@ void replaceTexture( } } +// The replace*() overloads on QRhiShaderResourceBindings only ever rewrite +// the *resources* inside an existing layout (buffer/texture/sampler pointer +// in the same binding slot). That is the textbook case for QRhi's +// updateResources() fast path: reuse the native descriptor set layout and +// pool slot, bump the generation, let the backend rewrite only the changed +// descriptors. The previous destroy()+create() pattern instead freed the +// pool slot on every live edit — which is what caused the 64-slot batch +// pool to blow up under heavy graph churn. +// +// See qrhivulkan.cpp:8707 (QVkShaderResourceBindings::updateResources). +// All five backends (Vulkan/D3D11/D3D12/Metal/GL) implement the virtual. void replaceBuffer(QRhiShaderResourceBindings& srb, int binding, QRhiBuffer* newBuffer) { std::vector tmp; @@ -307,9 +351,8 @@ void replaceBuffer(QRhiShaderResourceBindings& srb, int binding, QRhiBuffer* new replaceBuffer(tmp, binding, newBuffer); - srb.destroy(); srb.setBindings(tmp.begin(), tmp.end()); - srb.create(); + srb.updateResources(); } void replaceSampler( @@ -320,9 +363,8 @@ void replaceSampler( replaceSampler(tmp, binding, newSampler); - srb.destroy(); srb.setBindings(tmp.begin(), tmp.end()); - srb.create(); + srb.updateResources(); } void replaceTexture( @@ -333,9 +375,8 @@ void replaceTexture( replaceTexture(tmp, binding, newTexture); - srb.destroy(); srb.setBindings(tmp.begin(), tmp.end()); - srb.create(); + srb.updateResources(); } void replaceSampler( @@ -356,9 +397,8 @@ void replaceSampler( } } - srb.destroy(); srb.setBindings(tmp.begin(), tmp.end()); - srb.create(); + srb.updateResources(); } void replaceSamplerAndTexture( @@ -381,16 +421,21 @@ void replaceSamplerAndTexture( } } - srb.destroy(); srb.setBindings(tmp.begin(), tmp.end()); - srb.create(); + srb.updateResources(); } void replaceTexture( QRhiShaderResourceBindings& srb, QRhiSampler* sampler, QRhiTexture* newTexture) { + // Defensive null-guard: see the other replaceTexture overload. Null + // leaves the current binding intact so subsequent setShaderResources + // calls don't hit vkUpdateDescriptorSets with VK_NULL_HANDLE. + if(!newTexture) + return; std::vector tmp; tmp.assign(srb.cbeginBindings(), srb.cendBindings()); + int matches = 0; for(QRhiShaderResourceBinding& b : tmp) { auto d = reinterpret_cast(&b); @@ -400,13 +445,15 @@ void replaceTexture( if(d->u.stex.texSamplers[0].sampler == sampler) { d->u.stex.texSamplers[0].tex = newTexture; + matches++; } } } + if(matches == 0) + return; - srb.destroy(); srb.setBindings(tmp.begin(), tmp.end()); - srb.create(); + srb.updateResources(); } void replaceTexture( @@ -428,9 +475,46 @@ void replaceTexture( } } } - srb.destroy(); srb.setBindings(bindings.begin(), bindings.end()); - srb.create(); + srb.updateResources(); +} + +// Unified geometry-attribute lookup, used by raw raster and CSF alike. +// Matches the request (name + optional semantic key) to an upstream +// ossia::geometry::attribute via a 3-stage cascade: +// +// stage 1 — resolve `semantic_key` (defaults to `name`) via +// name_to_semantic. If it maps to a known semantic, look that +// up on the geometry. +// stage 2 — fall back to a custom-attribute lookup by `name`. +// stage 3 — display_name match. Catches the case where the user said +// { NAME: "position", SEMANTIC: "custom" } but only the real +// position attribute (semantic=position) exists upstream — we +// still want to bind to it instead of failing. +const ossia::geometry::attribute* findGeometryAttribute( + const ossia::geometry& geom, std::string_view name, std::string_view semantic_key) +{ + if(semantic_key.empty()) + semantic_key = name; + const auto sem = ossia::name_to_semantic(semantic_key); + + const ossia::geometry::attribute* match = nullptr; + if(sem != ossia::attribute_semantic::custom) + match = geom.find(sem); + if(!match) + match = geom.find(name); + if(!match) + { + for(const auto& a : geom.attributes) + { + if(ossia::geometry::display_name(a) == name) + { + match = &a; + break; + } + } + } + return match; } bool remapPipelineVertexInputs( @@ -445,28 +529,12 @@ bool remapPipelineVertexInputs( for(const auto& shader_var : shader_inputs) { - // Resolve shader variable name to semantic const std::string_view var_name(shader_var.name.constData(), shader_var.name.size()); - auto sem = ossia::name_to_semantic(var_name); - - // Find matching geometry attribute: by semantic, then custom name, then display name - const ossia::geometry::attribute* match = nullptr; - if(sem != ossia::attribute_semantic::custom) - match = geom.find(sem); - if(!match) - match = geom.find(var_name); - if(!match) - { - // Fallback: match shader variable name against attribute display names - for(const auto& a : geom.attributes) - { - if(ossia::geometry::display_name(a) == var_name) - { - match = &a; - break; - } - } - } + // Same lookup CSF uses — the explicit-SEMANTIC override is plumbed + // separately by callers that have access to the descriptor (see the + // overload below). Here, only the GLSL var name is available, so the + // semantic key defaults to it. + const auto* match = findGeometryAttribute(geom, var_name, var_name); if(!match) return false; @@ -487,6 +555,202 @@ bool remapPipelineVertexInputs( return true; } +bool remapPipelineVertexInputs( + QRhiGraphicsPipeline& pip, const QShader& vertexShader, + const ossia::geometry& geom, const isf::descriptor& desc) +{ + const auto& shader_inputs = vertexShader.description().inputVariables(); + if(shader_inputs.empty()) + return true; + + // Build a fast NAME → SEMANTIC override map from the descriptor's + // VERTEX_INPUTS so we honour explicit user intent. Anything not in the + // map falls through to name-as-semantic-key behaviour. + ossia::small_flat_map overrides; + for(const auto& vi : desc.vertex_inputs) + if(!vi.semantic.empty()) + overrides[vi.name] = vi.semantic; + + QVarLengthArray remappedAttrs; + for(const auto& shader_var : shader_inputs) + { + const std::string_view var_name(shader_var.name.constData(), shader_var.name.size()); + std::string_view sem_key = var_name; + if(auto it = overrides.find(var_name); it != overrides.end()) + sem_key = it->second; + + const auto* match = findGeometryAttribute(geom, var_name, sem_key); + if(!match) + return false; + + remappedAttrs.append(QRhiVertexInputAttribute( + match->binding, shader_var.location, + static_cast(match->format), + match->byte_offset)); + } + + QRhiVertexInputLayout inputLayout; + const auto& prevLayout = pip.vertexInputLayout(); + inputLayout.setBindings(prevLayout.cbeginBindings(), prevLayout.cendBindings()); + inputLayout.setAttributes(remappedAttrs.begin(), remappedAttrs.end()); + pip.setVertexInputLayout(inputLayout); + return true; +} + +namespace +{ + +// Convert the parser's attribute_type enumerator to the lowercase GLSL +// type name the VertexFallbackDefaults resolver expects. Only the +// fallback-eligible scalar / vec2 / vec3 / vec4 entries map to a +// non-empty string; everything else (mat*, integer / sampler / image +// types) returns empty, which the caller treats as "REQUIRED:false on +// unsupported type" and fails pipeline-build. +std::string_view declTypeFromAttributeType(isf::attribute_type t) noexcept +{ + switch(t) + { + case isf::attribute_type::Float: return "float"; + case isf::attribute_type::Vec2: return "vec2"; + case isf::attribute_type::Vec3: return "vec3"; + case isf::attribute_type::Vec4: return "vec4"; + default: return {}; + } +} + +} // namespace + +bool remapPipelineVertexInputs( + QRhiGraphicsPipeline& pip, const QShader& vertexShader, + const ossia::geometry& geom, const isf::descriptor& desc, + QRhi& rhi, VertexFallbackPool& pool, QRhiResourceUpdateBatch& batch, + FallbackBindingPlan& outPlan) +{ + outPlan.clear(); + + const auto& shader_inputs = vertexShader.description().inputVariables(); + if(shader_inputs.empty()) + return true; + + // Build a fast NAME → descriptor-entry map so every shader input can + // cheaply look up its REQUIRED / DEFAULT / SEMANTIC metadata. Shader + // reflection order is driver-dependent; we don't rely on it matching + // descriptor declaration order. + ossia::small_flat_map descByName; + for(const auto& vi : desc.vertex_inputs) + descByName[vi.name] = &vi; + + // Start from whatever bindings the pipeline already has (the mesh's + // per-vertex + per-instance buffers). Fallback slots get appended at + // the end; their binding_index in the extended vector is the index + // the draw-path then binds the fallback buffer at. + QVarLengthArray bindings; + { + const auto& prev = pip.vertexInputLayout(); + for(auto it = prev.cbeginBindings(); it != prev.cendBindings(); ++it) + bindings.append(*it); + } + + QVarLengthArray remappedAttrs; + for(const auto& shader_var : shader_inputs) + { + const std::string_view var_name( + shader_var.name.constData(), shader_var.name.size()); + + // Resolve the semantic key the same way the 3-arg overload does — + // SEMANTIC field wins when set, else NAME is used. + std::string_view sem_key = var_name; + auto descIt = descByName.find(var_name); + const isf::vertex_input* descEntry + = (descIt != descByName.end()) ? descIt->second : nullptr; + if(descEntry && !descEntry->semantic.empty()) + sem_key = descEntry->semantic; + + if(const auto* match = findGeometryAttribute(geom, var_name, sem_key)) + { + remappedAttrs.append(QRhiVertexInputAttribute( + match->binding, shader_var.location, + static_cast(match->format), + match->byte_offset)); + continue; + } + + // Miss. Strict mode (no descriptor entry or REQUIRED=true) fails. + if(!descEntry || descEntry->required) + { + qDebug() << "remapPipelineVertexInputs: required VERTEX_INPUT '" + << QString::fromUtf8(var_name.data(), (int)var_name.size()) + << "' has no matching attribute on upstream geometry"; + return false; + } + + // Optional path — synthesise a fallback buffer. Two failure modes + // still reject the pipeline build: + // - declared GLSL TYPE is unsupported (mat4 / integer / sampler) + // - the semantic has no whitelist neutral AND the shader did not + // supply DEFAULT in its JSON header + const std::string_view decl_type = declTypeFromAttributeType(descEntry->type); + if(decl_type.empty()) + { + qDebug() << "remapPipelineVertexInputs: optional VERTEX_INPUT '" + << QString::fromUtf8(var_name.data(), (int)var_name.size()) + << "' uses a type (mat4 / integer / sampler) that is not" + " supported by the v1 fallback path; bind a real" + " attribute or declare it REQUIRED: true"; + return false; + } + + const auto sem = ossia::name_to_semantic(sem_key); + auto spec = resolveVertexFallback(sem, decl_type, descEntry->default_val); + if(!spec) + { + qDebug() << "remapPipelineVertexInputs: optional VERTEX_INPUT '" + << QString::fromUtf8(var_name.data(), (int)var_name.size()) + << "' (semantic '" + << QString::fromUtf8(sem_key.data(), (int)sem_key.size()) + << "') has no whitelist default and no explicit DEFAULT" + " was provided in the JSON header"; + return false; + } + + const auto fallbackEntry = pool.acquire(rhi, batch, *spec); + if(!fallbackEntry.buffer) + { + qDebug() << "remapPipelineVertexInputs: failed to allocate fallback" + " buffer for VERTEX_INPUT '" + << QString::fromUtf8(var_name.data(), (int)var_name.size()) + << "'"; + return false; + } + + // Append a PerInstance step_rate=1 binding to the layout, pointing + // at a fresh binding index. Semantically: "one instance's worth of + // this attribute is packed into a single-element buffer, broadcast + // to every vertex and every instance of the draw". + const int new_binding_index = bindings.size(); + bindings.append(QRhiVertexInputBinding( + fallbackEntry.stride, + QRhiVertexInputBinding::PerInstance, + /*stepRate=*/1)); + + remappedAttrs.append(QRhiVertexInputAttribute( + new_binding_index, shader_var.location, + static_cast(fallbackEntry.format), + /*offset=*/0)); + + outPlan.slots.push_back( + FallbackBindingPlan::Slot{ + .binding_index = new_binding_index, + .buffer = fallbackEntry.buffer}); + } + + QRhiVertexInputLayout inputLayout; + inputLayout.setBindings(bindings.begin(), bindings.end()); + inputLayout.setAttributes(remappedAttrs.begin(), remappedAttrs.end()); + pip.setVertexInputLayout(inputLayout); + return true; +} + Pipeline buildPipeline( const RenderList& renderer, const Mesh& mesh, const QShader& vertexS, const QShader& fragmentS, const TextureRenderTarget& rt, @@ -603,23 +867,40 @@ QRhiShaderResourceBindings* createDefaultBindings( bindings.push_back(materialBinding); } - // Bind samplers + // Bind samplers. Null texture sources → substitute with the view-type-matched + // empty texture carried by `Sampler::fallback` (2D / Array / Cube / 3D). + // This keeps the SRB valid so the pipeline does not crash during + // vkUpdateDescriptorSets when an optional shader input has no upstream + // producer — the pass will simply sample the default fallback and render a + // neutral value (opaque black / transparent) for that slot. Required inputs + // that truly need content are the shader author's responsibility; the + // invariant here is "missing ⇒ render something safe, never crash". + // + // If `sampler.fallback` is null, the slot intent is assumed sampler2D + // (the 99 % case) and we use `RenderList::emptyTexture()`. Call sites + // that create Samplers for sampler3D / samplerCube / sampler2DArray + // slots MUST populate `fallback` with the typed empty texture — otherwise + // Vulkan will still reject the binding with a view-type mismatch when + // the 2D fallback kicks in. int binding = 3; for(auto sampler : samplers) { - assert(sampler.texture); auto actual_texture = sampler.texture; - // For cases where we do multi-pass rendering, set "this pass"'s input texture - // to an empty texture instead as we can't output to an input texture - if(actual_texture == rt.texture) - actual_texture = &renderer.emptyTexture(); + // Multi-pass feedback short: can't sample the RT we're writing to. + if(actual_texture && actual_texture == rt.texture) + actual_texture = nullptr; + + if(!actual_texture) + actual_texture = sampler.fallback ? sampler.fallback + : &renderer.emptyTexture(); - bindings.push_back(QRhiShaderResourceBinding::sampledTexture( - binding, - QRhiShaderResourceBinding::VertexStage - | QRhiShaderResourceBinding::FragmentStage, - actual_texture, sampler.sampler)); + bindings.push_back( + QRhiShaderResourceBinding::sampledTexture( + binding, + QRhiShaderResourceBinding::VertexStage + | QRhiShaderResourceBinding::FragmentStage, + actual_texture, sampler.sampler)); binding++; } @@ -644,27 +925,136 @@ Pipeline buildPipeline( return buildPipeline(renderer, mesh, vertexS, fragmentS, rt, bindings); } +Pipeline buildPipelineWithState( + const RenderList& renderer, const Mesh& mesh, const QShader& vertexS, + const QShader& fragmentS, const TextureRenderTarget& rt, QRhiBuffer* processUBO, + QRhiBuffer* materialUBO, std::span samplers, + std::span extraBindings, + const isf::pipeline_state& state, + int multiViewCount, + bool useShadingRate) +{ + auto& rhi = *renderer.state.rhi; + auto srb = createDefaultBindings( + renderer, rt, processUBO, materialUBO, samplers, extraBindings); + + auto ps = rhi.newGraphicsPipeline(); + ps->setName("buildPipelineWithState::ps"); + SCORE_ASSERT(ps); + + // Plan 09 S6: VRS opt-in. Only applies when the backend supports + // variable-rate shading (cap set in ScreenNode::populateCaps). The + // actual shading-rate map or per-draw rate is set on the render + // target / command buffer; the pipeline just needs the flag. +#if QT_VERSION >= QT_VERSION_CHECK(6, 9, 0) + if(useShadingRate && renderer.state.caps.variableRateShading) + { + ps->setFlags(ps->flags() | QRhiGraphicsPipeline::UsesShadingRate); + } +#endif + + const bool depthAvailable + = (rt.depthTexture != nullptr) || (rt.depthRenderBuffer != nullptr) + || (rt.msDepthTexture != nullptr); + const bool wantsDepthByDefault = renderer.anyNodeRequiresDepth(); + + // Sample count handling (same as buildPipeline()). + const int rtSamplesQueried = rt.sampleCount(); + const int pipelineSamples + = (rtSamplesQueried > 0) ? rtSamplesQueried : renderer.samples(); + ps->setSampleCount(pipelineSamples); + + mesh.preparePipeline(*ps); + + // Seed legacy premul-alpha blend on every color attachment so that shaders + // which declare a partial PIPELINE_STATE (e.g. only DEPTH_TEST) don't + // silently lose the historical default blend mode. applyPipelineState + // overrides per-attachment blends only when the shader sets BLEND. + { + QRhiGraphicsPipeline::TargetBlend premulAlphaBlend; + premulAlphaBlend.enable = true; + premulAlphaBlend.srcColor = QRhiGraphicsPipeline::BlendFactor::SrcAlpha; + premulAlphaBlend.dstColor = QRhiGraphicsPipeline::BlendFactor::OneMinusSrcAlpha; + premulAlphaBlend.srcAlpha = QRhiGraphicsPipeline::BlendFactor::SrcAlpha; + premulAlphaBlend.dstAlpha = QRhiGraphicsPipeline::BlendFactor::OneMinusSrcAlpha; + const int n = std::max(1, rt.colorAttachmentCount()); + QVarLengthArray blends; + blends.reserve(n); + for(int i = 0; i < n; ++i) + blends.push_back(premulAlphaBlend); + ps->setTargetBlends(blends.begin(), blends.end()); + } + + // Apply pipeline_state: depth, cull, front-face, blend (per-attachment), + // stencil, polygon mode, line width. Only fields explicitly set in `state` + // override the seeded defaults above + mesh.preparePipeline()'s setup. + applyPipelineState( + *ps, state, rt.colorAttachmentCount(), depthAvailable, wantsDepthByDefault); + + // Semantic vertex input remapping (same as buildPipeline()). + if(auto* geom = mesh.semanticGeometry()) + { + if(!remapPipelineVertexInputs(*ps, vertexS, *geom)) + { + qDebug() << "Warning! Shader requires attributes not present in mesh"; + delete ps; + return {nullptr, srb}; + } + } + + ps->setShaderStages( + {{QRhiShaderStage::Vertex, vertexS}, {QRhiShaderStage::Fragment, fragmentS}}); + ps->setShaderResourceBindings(srb); + + SCORE_ASSERT(rt.renderPass); + ps->setRenderPassDescriptor(rt.renderPass); + + // Multiview: on Vulkan/GL the multiViewCount is picked up from the render + // pass descriptor's color attachment (see createMultiViewRenderTarget), but + // D3D12 ViewInstancing and Metal vertex amplification read it from the + // pipeline itself via QRhiGraphicsPipeline::multiViewCount(). So we must set + // it explicitly here for those backends to produce correct multiview output. +#if QT_VERSION >= QT_VERSION_CHECK(6, 7, 0) + if(multiViewCount > 1 && renderer.state.caps.multiview) + ps->setMultiViewCount(multiViewCount); +#else + (void)multiViewCount; +#endif + + if(!ps->create()) + { + qDebug() << "Warning! Pipeline not created"; + delete ps; + ps = nullptr; + } + return {ps, srb}; +} + std::pair makeShaders(const RenderState& v, QString vert, QString frag) { auto [vertexS, vertexError] = ShaderCache::get(v, vert.toUtf8(), QShader::VertexStage); if(!vertexError.isEmpty()) { - qDebug() << vertexError; - qDebug() << vert.toStdString().data(); + qWarning() << "Vertex shader bake failed:" << vertexError; + qWarning().noquote() << vert; } auto [fragmentS, fragmentError] = ShaderCache::get(v, frag.toUtf8(), QShader::FragmentStage); if(!fragmentError.isEmpty()) { - qDebug() << fragmentError; - qDebug() << frag.toStdString().data(); + qWarning() << "Fragment shader bake failed:" << fragmentError; + qWarning().noquote() << frag; } - // qDebug().noquote() << vert.toUtf8().constData(); - if(!vertexS.isValid()) + // QShaderBaker is configured with setPerTargetCompilation(true), so a + // failure on the only requested target leaves errorMessage() non-empty + // even when the QShader itself is "valid" via some intermediate variant. + // Treat any non-empty error as fatal so backend-specific bake failures + // (e.g. SPIRV-Cross HLSL refusing gl_NumWorkGroups) are not silent. + if(!vertexError.isEmpty() || !vertexS.isValid()) throw std::runtime_error("invalid vertex shader"); - if(!fragmentS.isValid()) + if(!fragmentError.isEmpty() || !fragmentS.isValid()) throw std::runtime_error("invalid fragment shader"); return {vertexS, fragmentS}; @@ -676,9 +1066,12 @@ QShader makeCompute(const RenderState& v, QString compute) auto [computeS, computeError] = ShaderCache::get(v, compute.toUtf8(), QShader::ComputeStage); if(!computeError.isEmpty()) - qDebug() << computeError; + { + qWarning() << "Compute shader bake failed:" << computeError; + qWarning().noquote() << compute; + } - if(!computeS.isValid()) + if(!computeError.isEmpty() || !computeS.isValid()) throw std::runtime_error("invalid compute shader"); return computeS; } @@ -901,11 +1294,33 @@ computeScaleForTexcoordSizing(ScaleMode mode, QSizeF renderSize, QSizeF textureS } std::vector initInputSamplers( - const score::gfx::Node& node, RenderList& renderer, const std::vector& ports) + const score::gfx::Node& node, RenderList& renderer, const std::vector& ports, + const isf::descriptor* desc) { std::vector samplers; QRhi& rhi = *renderer.state.rhi; + // Per-port sampler-config lookup. The descriptor's `inputs` list is in + // 1:1 order with the Port array constructed by ISFNode's visitor, so + // we can walk it in lockstep and capture each image_input's + // sampler_config. Used by the GrabsFromSource branch below to honor + // shader-declared WRAP/FILTER on array / 3D textures (without this, + // those hardcoded to ClampToEdge — which broke any glTF whose UVs + // went outside [0,1]). + std::vector port_sampler_cfg(ports.size(), nullptr); + if(desc) + { + const std::size_t N = std::min(ports.size(), desc->inputs.size()); + for(std::size_t i = 0; i < N; ++i) + { + const auto& inp = desc->inputs[i]; + if(auto* im = ossia::get_if(&inp.data)) + port_sampler_cfg[i] = &im->sampler; + else if(auto* cm = ossia::get_if(&inp.data)) + port_sampler_cfg[i] = &cm->sampler; + } + } + int cur_port = 0; for(Port* in : ports) { @@ -935,22 +1350,65 @@ std::vector initInputSamplers( } } + // Pick a view-type-compatible placeholder when the upstream hasn't + // produced a texture yet. Binding a 2D view to a sampler3D / + // samplerCube / sampler2DArray shader input triggers + // VUID-vkCmdDraw-viewType-07752 at every draw until a real texture + // flows in (and forever if no edge ever connects). + QRhiTexture* fallback = nullptr; + if((in->flags & Flag::Cubemap) == Flag::Cubemap) + fallback = &renderer.emptyTextureCube(); + else if((in->flags & Flag::ThreeDimensional) == Flag::ThreeDimensional) + fallback = &renderer.emptyTexture3D(); + else if((in->flags & Flag::TextureArray) == Flag::TextureArray) + fallback = &renderer.emptyTextureArray(); + else + fallback = &renderer.emptyTexture(); if(!srcTex) - srcTex = &renderer.emptyTexture(); - - auto sampler = rhi.newSampler( - QRhiSampler::Linear, QRhiSampler::Linear, QRhiSampler::Linear, - QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge); - sampler->setName("initInputSamplers::cubemap_sampler"); - SCORE_ASSERT(sampler->create()); + srcTex = fallback; + + // Honour the shader-declared sampler config when present + // (WRAP / FILTER / MIPMAP_MODE / COMPARE / …). Falls back to + // the historical Linear+ClampToEdge sampler when the + // descriptor wasn't passed or the input had no sampler block. + QRhiSampler* sampler = nullptr; + if(cur_port < (int)port_sampler_cfg.size() && port_sampler_cfg[cur_port]) + { + sampler = score::gfx::makeSampler(rhi, *port_sampler_cfg[cur_port]); + } + else + { + sampler = rhi.newSampler( + QRhiSampler::Linear, QRhiSampler::Linear, QRhiSampler::Linear, + QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge); + SCORE_ASSERT(sampler->create()); + } + sampler->setName("initInputSamplers::grabs_sampler"); - samplers.push_back({sampler, srcTex}); + samplers.push_back({sampler, srcTex, fallback}); } else { // Look up the pre-created render target from the RenderList auto rt = renderer.renderTargetForInputPort(*in); - auto* texture = rt.texture ? rt.texture : &renderer.emptyTexture(); + // View-type-matched fallback when the render target has no + // texture yet (no upstream producer wired). Same reasoning as + // the GrabsFromSource branch above: binding a sampler2D view + // into a sampler2DArray / samplerCube / sampler3D shader slot + // triggers Vulkan validation errors (VUID-…-viewType-07752) + // every frame and in some drivers crashes outright. Pick the + // empty texture whose view kind matches the shader's + // declared sampler type. + QRhiTexture* fallback = nullptr; + if((in->flags & Flag::Cubemap) == Flag::Cubemap) + fallback = &renderer.emptyTextureCube(); + else if((in->flags & Flag::ThreeDimensional) == Flag::ThreeDimensional) + fallback = &renderer.emptyTexture3D(); + else if((in->flags & Flag::TextureArray) == Flag::TextureArray) + fallback = &renderer.emptyTextureArray(); + else + fallback = &renderer.emptyTexture(); + QRhiTexture* texture = rt.texture ? rt.texture : fallback; auto spec = node.resolveRenderTargetSpecs(cur_port, renderer); auto sampler = rhi.newSampler( @@ -959,7 +1417,7 @@ std::vector initInputSamplers( sampler->setName("initInputSamplers::sampler"); SCORE_ASSERT(sampler->create()); - samplers.push_back({sampler, texture}); + samplers.push_back({sampler, texture, fallback}); // If this port has sampleable depth, add depth sampler if((in->flags & Flag::SamplableDepth) == Flag::SamplableDepth) @@ -971,7 +1429,7 @@ std::vector initInputSamplers( SCORE_ASSERT(depthSampler->create()); auto* depthTex = rt.depthTexture ? rt.depthTexture : &renderer.emptyTexture(); - samplers.push_back({depthSampler, depthTex}); + samplers.push_back({depthSampler, depthTex, &renderer.emptyTexture()}); } } break; @@ -984,4 +1442,502 @@ std::vector initInputSamplers( } return samplers; } + +// --------------------------------------------------------------------------- +// New render-target overloads (depth-only, layered, multiview) +// --------------------------------------------------------------------------- + +TextureRenderTarget createDepthOnlyRenderTarget( + const RenderState& state, QSize sz, int samples, bool samplableDepth, + QRhiTexture::Format depthFmt) +{ + TextureRenderTarget ret; + ret.texture = nullptr; + ret.arrayLayers = 1; + + // Depth resolve for MSAA sampleable depth — matches the main overload. + int effectiveSamples = samples; + bool useDepthResolve = false; + if(samplableDepth && samples > 1) + { +#if QT_VERSION >= QT_VERSION_CHECK(6, 8, 0) + useDepthResolve = state.rhi->isFeatureSupported(QRhi::ResolveDepthStencil); +#endif + if(!useDepthResolve) + { + qWarning() << "createDepthOnlyRenderTarget: samplable depth + samples=" + << samples + << "unsupported on this backend; degrading to samples=1."; + effectiveSamples = 1; + } + } + + // Allocate the sampleable depth texture (what downstream shaders sample). + if(samplableDepth) + { + ret.depthTexture = state.rhi->newTexture( + depthFmt, sz, 1, QRhiTexture::RenderTarget); + ret.depthTexture->setName("createDepthOnlyRenderTarget::depthTexture"); + SCORE_ASSERT(ret.depthTexture->create()); + + if(useDepthResolve) + { +#if QT_VERSION >= QT_VERSION_CHECK(6, 8, 0) + ret.msDepthTexture = state.rhi->newTexture( + depthFmt, sz, effectiveSamples, QRhiTexture::RenderTarget); + ret.msDepthTexture->setName("createDepthOnlyRenderTarget::msDepthTexture"); + SCORE_ASSERT(ret.msDepthTexture->create()); +#endif + } + } + else + { + ret.depthRenderBuffer = state.rhi->newRenderBuffer( + QRhiRenderBuffer::DepthStencil, sz, effectiveSamples); + ret.depthRenderBuffer->setName("createDepthOnlyRenderTarget::depthRB"); + SCORE_ASSERT(ret.depthRenderBuffer->create()); + } + + // Some backends (notably GL ES) REQUIRE a color attachment — allocate a + // 1×1 dummy color texture that never gets written to. The depth-only RT + // stores it in dummyColorTexture (owned, released with the RT). + // + // On desktop Vulkan/Metal/D3D a depth-only RT is usually accepted without + // a color attachment. We always allocate the dummy for portability — + // the memory cost (4 bytes) is negligible. + ret.dummyColorTexture = state.rhi->newTexture( + QRhiTexture::RGBA8, QSize(1, 1), effectiveSamples, QRhiTexture::RenderTarget); + ret.dummyColorTexture->setName("createDepthOnlyRenderTarget::dummyColor"); + SCORE_ASSERT(ret.dummyColorTexture->create()); + + QRhiTextureRenderTargetDescription desc; + { + QRhiColorAttachment color0(ret.dummyColorTexture); + desc.setColorAttachments({color0}); + } + + if(samplableDepth) + { + if(useDepthResolve) + { +#if QT_VERSION >= QT_VERSION_CHECK(6, 8, 0) + desc.setDepthTexture(ret.msDepthTexture); + desc.setDepthResolveTexture(ret.depthTexture); +#else + desc.setDepthTexture(ret.depthTexture); +#endif + } + else + { + desc.setDepthTexture(ret.depthTexture); + } + } + else + { + desc.setDepthStencilBuffer(ret.depthRenderBuffer); + } + + auto* renderTarget = state.rhi->newTextureRenderTarget(desc); + renderTarget->setName("createDepthOnlyRenderTarget::rt"); + SCORE_ASSERT(renderTarget); + + auto* renderPass = renderTarget->newCompatibleRenderPassDescriptor(); + renderPass->setName("createDepthOnlyRenderTarget::rp"); + SCORE_ASSERT(renderPass); + + renderTarget->setRenderPassDescriptor(renderPass); + SCORE_ASSERT(renderTarget->create()); + + ret.renderTarget = renderTarget; + ret.renderPass = renderPass; + return ret; +} + +TextureRenderTarget createLayeredRenderTarget( + const RenderState& state, QRhiTexture* colorTextureArray, int renderLayer, + QRhiTexture* depthTex, int samples) +{ + TextureRenderTarget ret; + SCORE_ASSERT(colorTextureArray); + SCORE_ASSERT(renderLayer >= 0); + + ret.texture = colorTextureArray; + ret.arrayLayers = std::max(colorTextureArray->arraySize(), 1); + ret.renderLayer = renderLayer; + + QRhiTextureRenderTargetDescription desc; + { + QRhiColorAttachment color0(colorTextureArray); + color0.setLayer(renderLayer); + desc.setColorAttachments({color0}); + } + + if(depthTex) + { + ret.depthTexture = depthTex; + // For layered rendering with a depth *array* texture, we'd need to set + // the layer too. We expect a single shared 2D depth texture in most + // cases, which is fine. + desc.setDepthTexture(depthTex); + } + + auto* renderTarget = state.rhi->newTextureRenderTarget(desc); + renderTarget->setName("createLayeredRenderTarget::rt"); + SCORE_ASSERT(renderTarget); + + auto* renderPass = renderTarget->newCompatibleRenderPassDescriptor(); + renderPass->setName("createLayeredRenderTarget::rp"); + SCORE_ASSERT(renderPass); + + renderTarget->setRenderPassDescriptor(renderPass); + SCORE_ASSERT(renderTarget->create()); + + ret.renderTarget = renderTarget; + ret.renderPass = renderPass; + (void)samples; + return ret; +} + +TextureRenderTarget createMultiViewRenderTarget( + const RenderState& state, QRhiTexture* colorTextureArray, int multiViewCount, + QRhiTexture* depthTextureArray, int samples) +{ + TextureRenderTarget ret; + SCORE_ASSERT(colorTextureArray); + SCORE_ASSERT(multiViewCount >= 2); + + ret.texture = colorTextureArray; + ret.arrayLayers = std::max(colorTextureArray->arraySize(), multiViewCount); + ret.multiViewCount = multiViewCount; + + QRhiTextureRenderTargetDescription desc; + { + QRhiColorAttachment color0(colorTextureArray); + // Render to layers [0..multiViewCount-1] via gl_ViewIndex. + color0.setLayer(0); +#if QT_VERSION >= QT_VERSION_CHECK(6, 7, 0) + color0.setMultiViewCount(multiViewCount); +#endif + desc.setColorAttachments({color0}); + } + + if(depthTextureArray) + { + ret.depthTexture = depthTextureArray; + desc.setDepthTexture(depthTextureArray); + } + + auto* renderTarget = state.rhi->newTextureRenderTarget(desc); + renderTarget->setName("createMultiViewRenderTarget::rt"); + SCORE_ASSERT(renderTarget); + + auto* renderPass = renderTarget->newCompatibleRenderPassDescriptor(); + renderPass->setName("createMultiViewRenderTarget::rp"); + SCORE_ASSERT(renderPass); + + renderTarget->setRenderPassDescriptor(renderPass); + SCORE_ASSERT(renderTarget->create()); + + ret.renderTarget = renderTarget; + ret.renderPass = renderPass; + (void)samples; + return ret; +} + +TextureRenderTarget createDepthOnlyRenderTarget( + const RenderState& state, QRhiTexture* externalDepthTexture, int samples, + bool samplableDepth) +{ + // Like createDepthOnlyRenderTarget(sz, ...) but builds the RT AROUND a + // caller-supplied depth texture instead of allocating (and the old buggy + // call site then immediately deleting) an internal one. The supplied + // texture may be a plain 2D depth texture or a TextureArray (layered / + // shadow-cascade depth) — in both cases QRhi attaches layer 0 by default + // for a depth-only pass, which is what we want here. + // + // Ownership: `externalDepthTexture` becomes `ret.depthTexture` and is + // released with the RT (TextureRenderTarget::release()), matching the + // ownership the previous (broken) code implied. + TextureRenderTarget ret; + SCORE_ASSERT(externalDepthTexture); + ret.texture = nullptr; + ret.arrayLayers = std::max(externalDepthTexture->arraySize(), 1); + + // Depth resolve for MSAA sampleable depth — matches the sz overload. + int effectiveSamples = samples; + bool useDepthResolve = false; + if(samplableDepth && samples > 1) + { +#if QT_VERSION >= QT_VERSION_CHECK(6, 8, 0) + useDepthResolve = state.rhi->isFeatureSupported(QRhi::ResolveDepthStencil); +#endif + if(!useDepthResolve) + { + qWarning() << "createDepthOnlyRenderTarget(external): samplable depth + samples=" + << samples + << "unsupported on this backend; degrading to samples=1."; + effectiveSamples = 1; + } + } + + ret.depthTexture = externalDepthTexture; + + if(useDepthResolve) + { +#if QT_VERSION >= QT_VERSION_CHECK(6, 8, 0) + ret.msDepthTexture = state.rhi->newTexture( + externalDepthTexture->format(), externalDepthTexture->pixelSize(), + effectiveSamples, QRhiTexture::RenderTarget); + ret.msDepthTexture->setName( + "createDepthOnlyRenderTarget(external)::msDepthTexture"); + SCORE_ASSERT(ret.msDepthTexture->create()); +#endif + } + + // Some backends (notably GL ES) REQUIRE a color attachment — same dummy + // 1×1 color texture as the sz overload. + ret.dummyColorTexture = state.rhi->newTexture( + QRhiTexture::RGBA8, QSize(1, 1), effectiveSamples, QRhiTexture::RenderTarget); + ret.dummyColorTexture->setName( + "createDepthOnlyRenderTarget(external)::dummyColor"); + SCORE_ASSERT(ret.dummyColorTexture->create()); + + QRhiTextureRenderTargetDescription desc; + { + QRhiColorAttachment color0(ret.dummyColorTexture); + desc.setColorAttachments({color0}); + } + + if(useDepthResolve) + { +#if QT_VERSION >= QT_VERSION_CHECK(6, 8, 0) + desc.setDepthTexture(ret.msDepthTexture); + desc.setDepthResolveTexture(ret.depthTexture); +#else + desc.setDepthTexture(ret.depthTexture); +#endif + } + else + { + desc.setDepthTexture(ret.depthTexture); + } + + auto* renderTarget = state.rhi->newTextureRenderTarget(desc); + renderTarget->setName("createDepthOnlyRenderTarget(external)::rt"); + SCORE_ASSERT(renderTarget); + + auto* renderPass = renderTarget->newCompatibleRenderPassDescriptor(); + renderPass->setName("createDepthOnlyRenderTarget(external)::rp"); + SCORE_ASSERT(renderPass); + + renderTarget->setRenderPassDescriptor(renderPass); + SCORE_ASSERT(renderTarget->create()); + + ret.renderTarget = renderTarget; + ret.renderPass = renderPass; + return ret; +} + +TextureRenderTarget createLayeredRenderTarget( + const RenderState& state, std::span colorTextures, + int renderLayer, QRhiTexture* depthTex, int samples) +{ + // Multi-attachment (MRT) layered variant: attaches ALL color textures to + // the render pass so the pipeline blend-state count (driven by + // rt.colorAttachmentCount()) agrees with the actual attachment count. + // Attaching only color[0] while the pipeline declares N blend targets is a + // Vulkan pipeline-create validation error AND silently drops outputs 1..N. + TextureRenderTarget ret; + SCORE_ASSERT(!colorTextures.empty()); + SCORE_ASSERT(colorTextures[0]); + SCORE_ASSERT(renderLayer >= 0); + + ret.texture = colorTextures[0]; + for(std::size_t i = 1; i < colorTextures.size(); i++) + ret.additionalColorTextures.push_back(colorTextures[i]); + ret.arrayLayers = std::max(colorTextures[0]->arraySize(), 1); + ret.renderLayer = renderLayer; + + QList attachments; + for(auto* tex : colorTextures) + { + QRhiColorAttachment att(tex); + // Layered textures select the rendered layer; plain 2D color textures in + // a mixed MRT keep their (single) layer 0 and ignore this. + if(tex->arraySize() > 1) + att.setLayer(renderLayer); + attachments.append(att); + } + + QRhiTextureRenderTargetDescription desc; + desc.setColorAttachments(attachments.begin(), attachments.end()); + + if(depthTex) + { + ret.depthTexture = depthTex; + desc.setDepthTexture(depthTex); + } + + auto* renderTarget = state.rhi->newTextureRenderTarget(desc); + renderTarget->setName("createLayeredRenderTarget(MRT)::rt"); + SCORE_ASSERT(renderTarget); + + auto* renderPass = renderTarget->newCompatibleRenderPassDescriptor(); + renderPass->setName("createLayeredRenderTarget(MRT)::rp"); + SCORE_ASSERT(renderPass); + + renderTarget->setRenderPassDescriptor(renderPass); + SCORE_ASSERT(renderTarget->create()); + + ret.renderTarget = renderTarget; + ret.renderPass = renderPass; + (void)samples; + return ret; +} + +TextureRenderTarget createMultiViewRenderTarget( + const RenderState& state, std::span colorTextures, + int multiViewCount, QRhiTexture* depthTextureArray, int samples) +{ + // Multi-attachment (MRT) multiview variant: attaches ALL color textures + // (each a TextureArray with >= multiViewCount layers) with per-attachment + // setMultiViewCount, so attachments == pipeline blend targets. See the + // layered overload above for why attaching only color[0] is a bug. + TextureRenderTarget ret; + SCORE_ASSERT(!colorTextures.empty()); + SCORE_ASSERT(colorTextures[0]); + SCORE_ASSERT(multiViewCount >= 2); + + ret.texture = colorTextures[0]; + for(std::size_t i = 1; i < colorTextures.size(); i++) + ret.additionalColorTextures.push_back(colorTextures[i]); + ret.arrayLayers = std::max(colorTextures[0]->arraySize(), multiViewCount); + ret.multiViewCount = multiViewCount; + + QList attachments; + for(auto* tex : colorTextures) + { + QRhiColorAttachment att(tex); + // Render to layers [0..multiViewCount-1] via gl_ViewIndex. + att.setLayer(0); +#if QT_VERSION >= QT_VERSION_CHECK(6, 7, 0) + att.setMultiViewCount(multiViewCount); +#endif + attachments.append(att); + } + + QRhiTextureRenderTargetDescription desc; + desc.setColorAttachments(attachments.begin(), attachments.end()); + + if(depthTextureArray) + { + ret.depthTexture = depthTextureArray; + desc.setDepthTexture(depthTextureArray); + } + + auto* renderTarget = state.rhi->newTextureRenderTarget(desc); + renderTarget->setName("createMultiViewRenderTarget(MRT)::rt"); + SCORE_ASSERT(renderTarget); + + auto* renderPass = renderTarget->newCompatibleRenderPassDescriptor(); + renderPass->setName("createMultiViewRenderTarget(MRT)::rp"); + SCORE_ASSERT(renderPass); + + renderTarget->setRenderPassDescriptor(renderPass); + SCORE_ASSERT(renderTarget->create()); + + ret.renderTarget = renderTarget; + ret.renderPass = renderPass; + (void)samples; + return ret; +} + +QRhiTexture::Format parseOutputFormat( + const std::string& fmt, QRhiTexture::Format fallback) noexcept +{ + std::string f = fmt; + for(auto& c : f) + c = (char)std::tolower((unsigned char)c); + if(f == "rgba8") return QRhiTexture::RGBA8; + if(f == "bgra8") return QRhiTexture::BGRA8; + if(f == "r8") return QRhiTexture::R8; + if(f == "rg8") return QRhiTexture::RG8; + if(f == "r16") return QRhiTexture::R16; + if(f == "rg16") return QRhiTexture::RG16; + if(f == "r16f") return QRhiTexture::R16F; + if(f == "r32f") return QRhiTexture::R32F; + if(f == "rgba16f") return QRhiTexture::RGBA16F; + if(f == "rgba32f") return QRhiTexture::RGBA32F; + if(f == "d16") return QRhiTexture::D16; + if(f == "d24") return QRhiTexture::D24; + if(f == "d24s8") return QRhiTexture::D24S8; + if(f == "d32f") return QRhiTexture::D32F; + return fallback; +} + +// ---------------- makeSampler ----------------------------------------------- +namespace +{ +static QRhiSampler::Filter parseFilter(const std::string& s, QRhiSampler::Filter def) +{ + if(s.empty()) return def; + std::string v = s; + for(auto& c : v) c = (char)tolower(c); + if(v == "nearest") return QRhiSampler::Nearest; + if(v == "linear") return QRhiSampler::Linear; + if(v == "none") return QRhiSampler::None; + return def; +} +static QRhiSampler::AddressMode parseAddress(const std::string& s, QRhiSampler::AddressMode def) +{ + if(s.empty()) return def; + std::string v = s; + for(auto& c : v) c = (char)tolower(c); + for(auto& c : v) if(c == '-') c = '_'; + if(v == "repeat") return QRhiSampler::Repeat; + if(v == "clamp" || v == "clamp_to_edge") return QRhiSampler::ClampToEdge; + if(v == "mirror" || v == "mirrored_repeat") return QRhiSampler::Mirror; + //#if QT_VERSION >= QT_VERSION_CHECK(6, 7, 0) + // if(v == "mirror_once" || v == "mirror_clamp_to_edge") + // return QRhiSampler::MirrorOnce; + //#endif + return def; +} +static QRhiSampler::CompareOp parseCompare(const std::string& s) +{ + if(s.empty()) return QRhiSampler::Never; + std::string v = s; + for(auto& c : v) c = (char)tolower(c); + for(auto& c : v) if(c == '-') c = '_'; + if(v == "never") return QRhiSampler::Never; + if(v == "less") return QRhiSampler::Less; + if(v == "equal") return QRhiSampler::Equal; + if(v == "less_equal" || v == "lequal") return QRhiSampler::LessOrEqual; + if(v == "greater") return QRhiSampler::Greater; + if(v == "not_equal" || v == "neq") return QRhiSampler::NotEqual; + if(v == "greater_equal"|| v == "gequal") return QRhiSampler::GreaterOrEqual; + if(v == "always") return QRhiSampler::Always; + return QRhiSampler::Never; +} +} + +QRhiSampler* makeSampler(QRhi& rhi, const isf::sampler_config& cfg) +{ + const auto defaultLinear = QRhiSampler::Linear; + auto base = parseFilter(cfg.filter, defaultLinear); + auto minF = parseFilter(cfg.min_filter, base); + auto magF = parseFilter(cfg.mag_filter, base); + auto mipF = parseFilter(cfg.mipmap_mode, QRhiSampler::None); + + const auto defaultWrap = QRhiSampler::ClampToEdge; + auto baseWrap = parseAddress(cfg.wrap, defaultWrap); + auto wrapU = parseAddress(cfg.wrap_s, baseWrap); + auto wrapV = parseAddress(cfg.wrap_t, baseWrap); + auto wrapW = parseAddress(cfg.wrap_r, baseWrap); + + auto* s = rhi.newSampler(magF, minF, mipF, wrapU, wrapV, wrapW); + s->setTextureCompareOp(parseCompare(cfg.compare)); + s->create(); + return s; +} } diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/Utils.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/Utils.hpp index 55b43ec9e3..caa2770eff 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/Utils.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/Utils.hpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -13,20 +14,37 @@ #include +namespace isf +{ +struct descriptor; +} + namespace score::gfx { class Node; class NodeModel; struct Port; +class VertexFallbackPool; struct Edge; class RenderList; /** * @brief Stores a sampler and the texture currently associated with it. + * + * `fallback` is the view-type-matched empty texture to bind when `texture` + * becomes null (no upstream, feedback-loop short, disconnect race). It MUST + * be one of `RenderList::emptyTexture() / emptyTexture3D() / emptyTextureCube() + * / emptyTextureArray()` so the bound view type matches the shader's + * sampler declaration. Leaving this null is only safe for plain sampler2D + * slots — a samplerCube / sampler3D / sampler2DArray slot with a null + * `fallback` will trip Vulkan viewType validation or, if the fallback + * path upstream also produced null, crash with a VK_NULL_HANDLE descriptor + * write. */ struct Sampler { QRhiSampler* sampler{}; QRhiTexture* texture{}; + QRhiTexture* fallback{}; }; /** @@ -45,6 +63,13 @@ struct AudioTexture FFT, Histogram } mode{}; + + // Optional sampler config. Empty strings keep legacy defaults + // (linear / clamp_to_edge). Populated by ISFNode from the parsed + // audio_input::sampler (FILTER / WRAP). Useful for FFT reads where + // NEAREST filtering avoids smearing adjacent bins. + std::string filter; + std::string wrap; }; /** @@ -107,10 +132,12 @@ struct Pipeline void release() { - delete pipeline; + if(pipeline) + pipeline->deleteLater(); pipeline = nullptr; - delete srb; + if(srb) + srb->deleteLater(); srb = nullptr; } }; @@ -120,67 +147,122 @@ struct Pipeline */ struct TextureRenderTarget { + // The first five members must keep this order: out-of-tree addons + // aggregate-initialize {texture, colorRenderBuffer, depthRenderBuffer, + // renderPass, renderTarget}. QRhiTexture* texture{}; // Primary color attachment (location 0) - std::vector additionalColorTextures; // MRT: locations 1..N QRhiRenderBuffer* colorRenderBuffer{}; QRhiRenderBuffer* depthRenderBuffer{}; - QRhiTexture* depthTexture{}; // Sampleable depth (alternative to depthRenderBuffer) - QRhiTexture* msDepthTexture{}; // MSAA depth attachment when depthTexture is the resolve target QRhiRenderPassDescriptor* renderPass{}; QRhiRenderTarget* renderTarget{}; - operator bool() const noexcept { return texture != nullptr; } + std::vector additionalColorTextures; // MRT: locations 1..N + QRhiTexture* depthTexture{}; // Sampleable depth (alternative to depthRenderBuffer) + QRhiTexture* msDepthTexture{}; // MSAA depth attachment when depthTexture is the resolve target + + // A 1×1 color texture allocated when the backend requires a color attachment + // but the user only wants depth-only rendering. Owned by this RT. + QRhiTexture* dummyColorTexture{}; + + // Number of array layers on `texture` (1 = non-layered, >1 = texture array). + int arrayLayers{1}; + + // Multiview view count (0/1 = disabled). + int multiViewCount{0}; + + // Which layer of `texture`/`additionalColorTextures` this RT renders to. + // -1 = not applicable (non-layered, or MultiView handles it automatically). + int renderLayer{-1}; + + operator bool() const noexcept { return texture != nullptr || dummyColorTexture != nullptr || depthTexture != nullptr; } int colorAttachmentCount() const noexcept { - return texture ? 1 + (int)additionalColorTextures.size() : 0; + if(texture) + return 1 + (int)additionalColorTextures.size(); + if(dummyColorTexture) + return 1; + return 0; } // Returns the actual MSAA sample count of this render target, or -1 if it - // cannot be determined from the stored fields (e.g. when only renderPass is - // set, as for placeholders that target a swap chain). Callers must treat - // -1 as "unknown — fall back to the renderlist's global sample count". - // This value is the authoritative input to QRhiGraphicsPipeline::setSampleCount() - // when known, since an RT may have been degraded (samplable-depth + MSAA - // without depth-resolve support). + // cannot be determined from the stored fields. Callers must treat -1 as + // "unknown — fall back to the renderlist's global sample count". + // + // Lookup priority: + // 1. colorRenderBuffer (owned MSAA attachment — always authoritative). + // 2. texture (single-sample resolve target OR non-MSAA render target). + // 3. depthTexture (depth-only RTs). + // 4. msDepthTexture (MSAA depth attachment when depth resolve is used). + // 5. renderTarget — BUT only when this RT genuinely owns its attachments + // (colorRenderBuffer/texture/depthTexture set). A "bare" RT that only + // carries renderTarget + renderPass (e.g. a swap-chain wrapper + // returned by QRhiSwapChain::currentFrameRenderTarget()) is NOT + // queried because swap-chain render-target objects lazily write + // their sampleCount only when createOrResize() runs — any read before + // that returns the default 1, which would silently mismatch a + // multi-sample renderPassDescriptor and produce + // VUID-VkGraphicsPipelineCreateInfo-multisampledRenderToSingleSampled-06853. + // 6. Otherwise return -1 so the caller uses RenderList::samples(), which + // IS authoritative for externally-managed swap-chain RTs (it drove + // the swap-chain sample count in the first place). int sampleCount() const noexcept { - if(renderTarget) - return renderTarget->sampleCount(); if(colorRenderBuffer) return colorRenderBuffer->sampleCount(); if(texture) return texture->sampleCount(); + if(msDepthTexture) + return msDepthTexture->sampleCount(); + if(depthTexture) + return depthTexture->sampleCount(); + // renderTarget alone without any owned attachment = swap-chain wrapper. + // Its sampleCount is unreliable pre-createOrResize; fall through. return -1; } void release() { - if(texture) + if(texture || dummyColorTexture || depthTexture) { - delete texture; + // Use deleteLater() for all GPU resources: Qt RHI commands are async + // and resources may still be referenced by in-flight frames until + // endFrame() completes. deleteLater() defers actual destruction to + // the next beginFrame(). + if(texture) + texture->deleteLater(); texture = nullptr; + if(dummyColorTexture) + dummyColorTexture->deleteLater(); + dummyColorTexture = nullptr; + for(auto* t : additionalColorTextures) - delete t; + t->deleteLater(); additionalColorTextures.clear(); - delete colorRenderBuffer; + if(colorRenderBuffer) + colorRenderBuffer->deleteLater(); colorRenderBuffer = nullptr; - delete depthRenderBuffer; + if(depthRenderBuffer) + depthRenderBuffer->deleteLater(); depthRenderBuffer = nullptr; - delete depthTexture; + if(depthTexture) + depthTexture->deleteLater(); depthTexture = nullptr; - delete msDepthTexture; + if(msDepthTexture) + msDepthTexture->deleteLater(); msDepthTexture = nullptr; - delete renderPass; + if(renderPass) + renderPass->deleteLater(); renderPass = nullptr; - delete renderTarget; + if(renderTarget) + renderTarget->deleteLater(); renderTarget = nullptr; } } @@ -225,6 +307,106 @@ TextureRenderTarget createRenderTarget( QRhiTexture* depthTexture, int samples); +/** + * @brief Create a depth-only render target. + * + * Allocates a sampleable depth texture (samplableDepth=true) or a depth + * renderbuffer. If the backend rejects color-less render targets, a 1x1 + * RGBA8 dummy color texture is allocated and stored in the + * TextureRenderTarget::dummyColorTexture field (owned by the RT). + * + * The resulting TextureRenderTarget has: + * - `depthTexture` or `depthRenderBuffer` set (never both) + * - `texture` == nullptr (depth-only semantics) + * - `dummyColorTexture` may be non-null on some backends + */ +SCORE_PLUGIN_GFX_EXPORT +TextureRenderTarget createDepthOnlyRenderTarget( + const RenderState& state, QSize sz, int samples, bool samplableDepth = true, + QRhiTexture::Format depthFmt = QRhiTexture::D32F); + +/** + * @brief Create a depth-only render target around an EXTERNAL depth texture. + * + * Builds the RT around `externalDepthTexture` (caller-allocated, already + * created) instead of allocating its own. Use this when the depth texture is + * named/owned by the node (so textureForOutput() can return it) — it avoids + * the previous bug where the RT was built around an internal texture that was + * then immediately deleted while still referenced by the render pass. + * + * `externalDepthTexture` may be a plain 2D depth texture or a TextureArray + * (layered / shadow-cascade depth). It becomes `ret.depthTexture` and is + * released with the RT. + */ +SCORE_PLUGIN_GFX_EXPORT +TextureRenderTarget createDepthOnlyRenderTarget( + const RenderState& state, QRhiTexture* externalDepthTexture, int samples, + bool samplableDepth = true); + +/** + * @brief Create a render target that targets a single layer of a texture array. + * + * colorTextureArray must have been created with QRhiTexture::TextureArray + * and at least (renderLayer + 1) layers. + * + * depthTexture may be a regular 2D texture (shared across layers) or nullptr + * to skip depth (use a renderbuffer instead via createRenderTarget overloads). + */ +SCORE_PLUGIN_GFX_EXPORT +TextureRenderTarget createLayeredRenderTarget( + const RenderState& state, QRhiTexture* colorTextureArray, int renderLayer, + QRhiTexture* depthTexture, int samples); + +/** + * @brief Multi-attachment (MRT) layered render target. + * + * Same as the single-texture overload but attaches ALL `colorTextures` to the + * render pass (locations 0..N-1), so the number of attachments matches the + * pipeline blend-state count (rt.colorAttachmentCount()). Each layered color + * texture renders to `renderLayer`; plain 2D textures keep layer 0. + */ +SCORE_PLUGIN_GFX_EXPORT +TextureRenderTarget createLayeredRenderTarget( + const RenderState& state, std::span colorTextures, + int renderLayer, QRhiTexture* depthTexture, int samples); + +/** + * @brief Create a multiview render target (single RT drawing N views at once). + * + * colorTextureArray must be a TextureArray with at least multiViewCount layers. + * depthTextureArray may be nullptr for no depth, or a TextureArray with the + * same layer count. + * + * Requires state.caps.multiview == true — caller must check. + */ +SCORE_PLUGIN_GFX_EXPORT +TextureRenderTarget createMultiViewRenderTarget( + const RenderState& state, QRhiTexture* colorTextureArray, int multiViewCount, + QRhiTexture* depthTextureArray, int samples); + +/** + * @brief Multi-attachment (MRT) multiview render target. + * + * Same as the single-texture overload but attaches ALL `colorTextures` (each a + * TextureArray with >= multiViewCount layers) with per-attachment multiview, so + * attachments == pipeline blend targets. Requires state.caps.multiview == true. + */ +SCORE_PLUGIN_GFX_EXPORT +TextureRenderTarget createMultiViewRenderTarget( + const RenderState& state, std::span colorTextures, + int multiViewCount, QRhiTexture* depthTextureArray, int samples); + +/** + * @brief Map an ISF/CSF FORMAT string to a QRhiTexture::Format. + * + * Supported: rgba8, bgra8, r8, rg8, r16, rg16, r16f, r32f, rgba16f, rgba32f, + * d16, d24, d24s8, d32f. Unknown / empty strings fall back to the caller's + * default. Lookup is case-insensitive. + */ +SCORE_PLUGIN_GFX_EXPORT +QRhiTexture::Format parseOutputFormat( + const std::string& fmt, QRhiTexture::Format fallback) noexcept; + SCORE_PLUGIN_GFX_EXPORT void replaceBuffer(QRhiShaderResourceBindings&, int binding, QRhiBuffer* newBuffer); SCORE_PLUGIN_GFX_EXPORT @@ -279,19 +461,75 @@ QRhiShaderResourceBindings* createDefaultBindings( QRhiBuffer* materialUBO, std::span samplers, std::span additionalBindings = {}); +/** + * @brief Match a (name, semantic) request to an upstream geometry attribute. + * + * Three-stage cascade shared by all shader modes: + * 1. semantic_key → name_to_semantic → if known, geom.find(semantic). + * 2. Custom-attribute lookup by `name`. + * 3. display_name == name fallback (so { NAME: "position", SEMANTIC: + * "custom" } still finds the real position attribute when no custom + * one shadows it). + * If `semantic_key` is empty, `name` is used as the semantic key. + */ +SCORE_PLUGIN_GFX_EXPORT +const ossia::geometry::attribute* findGeometryAttribute( + const ossia::geometry& geom, std::string_view name, std::string_view semantic_key); + /** * @brief Remap a pipeline's vertex input layout using semantic matching. * - * For each shader input variable, resolves its name to an attribute semantic, - * finds the matching attribute in the geometry, then creates a vertex input - * attribute with binding/format/offset from the geometry and location from - * the shader. Returns true on success, false if a required attribute is missing. + * Reflects the compiled vertex shader to find each `in` variable, then for + * each one runs findGeometryAttribute(name, name) — useful when no isf + * descriptor is around (legacy callers). Returns true on success, false if + * a required attribute can't be matched. */ SCORE_PLUGIN_GFX_EXPORT bool remapPipelineVertexInputs( QRhiGraphicsPipeline& pip, const QShader& vertexShader, const ossia::geometry& geom); +/** + * @brief Same as above, but honours explicit SEMANTIC on each VERTEX_INPUTS + * entry from the isf descriptor when present. + */ +SCORE_PLUGIN_GFX_EXPORT +bool remapPipelineVertexInputs( + QRhiGraphicsPipeline& pip, const QShader& vertexShader, + const ossia::geometry& geom, const isf::descriptor& desc); + +// FallbackBindingPlan now lives in its own header so both Utils.hpp and +// CustomMesh.hpp can depend on it without creating an include cycle +// (Utils.hpp depends on Mesh.hpp, which transitively reaches CustomMesh +// consumers). See . + +/** + * @brief Fallback-aware overload: the strict-matching behaviour of the + * overload above, extended so VERTEX_INPUTS entries with + * "REQUIRED": false silently resolve to a shared identity buffer + * from the pool when their semantic is absent upstream. + * + * @p pool per-RenderList shared fallback buffer pool + * @p batch any uploads for freshly-allocated fallback buffers are + * recorded here + * @p outPlan filled with the bindings the caller must merge into the + * draw's QRhiCommandBuffer::VertexInput array. Cleared on + * entry. + * + * Returns false (and logs which input failed) if: + * - a REQUIRED=true input has no matching upstream attribute, OR + * - a REQUIRED=false input has no matching upstream attribute AND the + * declared GLSL TYPE is unsupported (mat4 / integer / sampler) OR + * the resolved semantic is not in the whitelist AND no explicit + * DEFAULT was supplied. + */ +SCORE_PLUGIN_GFX_EXPORT +bool remapPipelineVertexInputs( + QRhiGraphicsPipeline& pip, const QShader& vertexShader, + const ossia::geometry& geom, const isf::descriptor& desc, + QRhi& rhi, VertexFallbackPool& pool, QRhiResourceUpdateBatch& batch, + FallbackBindingPlan& outPlan); + /** * @brief Create a render pipeline following the score conventions for shaders and materials. */ @@ -302,6 +540,86 @@ Pipeline buildPipeline( QRhiBuffer* materialUBO, std::span samplers, std::span additionalBindings = {}); +/** + * @brief Lower-level buildPipeline variant: bring your own SRB. + * + * The returned Pipeline::srb equals the srb you passed — no ownership + * transfer. Useful when the caller wants to share a pipeline across + * multiple Passes that each have their own SRB (layout-compatible with + * this one per QRhi contract); the pipeline's stored SRB is only used + * for layout extraction at create() time and never dereferenced at draw + * time. + */ +SCORE_PLUGIN_GFX_EXPORT +Pipeline buildPipeline( + const RenderList& renderer, const Mesh& mesh, const QShader& vertexS, + const QShader& fragmentS, const TextureRenderTarget& rt, + QRhiShaderResourceBindings* srb); + +// Forward declarations — definitions in PipelineStateHelpers.hpp, IsfBindingsBuilder.hpp +} // namespace score::gfx + +namespace isf +{ +struct sampler_config; +} + +namespace score::gfx +{ +/** + * @brief Build a QRhiSampler from an isf::sampler_config. + * + * Fields left empty/unset in the config are filled with ossia defaults + * (linear filtering, no mipmaps, clamp-to-edge). When the config sets a + * comparison op other than "never", the returned sampler is a shadow + * comparison sampler. + * + * The returned sampler is created (create() was called) and has no name + * assigned; callers should setName() before or after create() as needed. + * Ownership follows the standard QRhi convention — callers delete it. + */ +SCORE_PLUGIN_GFX_EXPORT +QRhiSampler* makeSampler(QRhi& rhi, const isf::sampler_config& cfg); +} // namespace score::gfx + +namespace isf +{ +struct pipeline_state; +} + +namespace score::gfx +{ +struct GraphicsStorageResources; + +/** + * @brief Create a render pipeline applying pipeline_state from an ISF descriptor. + * + * This overload replaces the legacy hardcoded `setDepthTest(true)/setDepthWrite(true)` + * on RawRaster and the `anyNodeRequiresDepth()` fallback on ISF with a unified + * path driven by `state`. When `state` is empty (all fields nullopt), behaviour + * matches the legacy variant exactly for backwards compatibility. + * + * `extraBindings` is typically the result of IsfBindingsBuilder::buildExtraBindings(). + * `multiViewCount` >= 2 activates multiview rendering (requires state.caps.multiview). + * + * Plan 09 S6: when `useShadingRate == true` AND + * `renderer.state.caps.variableRateShading == true`, the pipeline + * gets `QRhiGraphicsPipeline::UsesShadingRate`. The shading-rate + * texture / per-draw rate itself is supplied elsewhere (via the + * render-target attachment's `setShadingRateMap` or the command- + * buffer's `setShadingRate`). Presets opt in; silent no-op when the + * backend doesn't support VRS. + */ +SCORE_PLUGIN_GFX_EXPORT +Pipeline buildPipelineWithState( + const RenderList& renderer, const Mesh& mesh, const QShader& vertexS, + const QShader& fragmentS, const TextureRenderTarget& rt, QRhiBuffer* processUBO, + QRhiBuffer* materialUBO, std::span samplers, + std::span extraBindings, + const isf::pipeline_state& state, + int multiViewCount = 0, + bool useShadingRate = false); + /** * @brief Get a pair of compiled vertex / fragment shaders from GLSL 4.5 sources. * @@ -434,5 +752,6 @@ inline void uploadStaticBufferWithStoredData( SCORE_PLUGIN_GFX_EXPORT std::vector initInputSamplers( - const score::gfx::Node& node, RenderList& renderer, const std::vector& ports); + const score::gfx::Node& node, RenderList& renderer, const std::vector& ports, + const isf::descriptor* desc = nullptr); } diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/VertexFallbackDefaults.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/VertexFallbackDefaults.cpp new file mode 100644 index 0000000000..6e3d294c4b --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/VertexFallbackDefaults.cpp @@ -0,0 +1,226 @@ +#include + +#include + +#include + +namespace score::gfx +{ +namespace +{ + +// Small helper: how many float components does a GLSL TYPE declare? +// Returns 0 for unsupported types (mat4, integer types) — v1 accepts +// only scalar float / vec2 / vec3 / vec4 inputs for the fallback path. +// This is strict on purpose: the PerInstance step_rate=1 broadcast +// semantics we ship don't generalise cleanly to integer IDs or mat4 +// (location-bump issue). +int float_components_of(std::string_view decl_type) noexcept +{ + if(decl_type == "float") return 1; + if(decl_type == "vec2") return 2; + if(decl_type == "vec3") return 3; + if(decl_type == "vec4") return 4; + return 0; +} + +// Map component count to the matching ossia geometry attribute format. +// Only float formats are emitted in v1. +int format_for_components(int n) noexcept +{ + using F = ossia::geometry::attribute; + switch(n) + { + case 1: return F::float1; + case 2: return F::float2; + case 3: return F::float3; + case 4: return F::float4; + default: return F::float4; + } +} + +// Pack `n` floats into the spec's byte buffer starting at offset 0. +// `src` holds the source numbers; values past src.size() are zero-padded. +void pack_floats(VertexFallbackSpec& spec, int n, + std::initializer_list src) noexcept +{ + float tmp[4] = {0.f, 0.f, 0.f, 0.f}; + int i = 0; + for(auto v : src) { if(i < 4) tmp[i++] = v; } + std::memcpy(spec.bytes.data(), tmp, (size_t)n * sizeof(float)); + spec.stride_bytes = (uint32_t)(n * sizeof(float)); + spec.format = format_for_components(n); +} + +// Canonical whitelist of neutrals. Returns true if `semantic` is +// whitelisted and the spec has been filled; returns false for +// semantics that require an explicit user DEFAULT. +// +// Keep this in sync with the table in +// docs/reference-manual/processes/library/render-pipeline.md. +bool fill_whitelist(VertexFallbackSpec& spec, + ossia::attribute_semantic sem, int n) noexcept +{ + using S = ossia::attribute_semantic; + switch(sem) + { + // Core geometry + case S::position: pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true; + case S::normal: pack_floats(spec, n, {0.f, 0.f, 1.f, 0.f}); return true; + case S::tangent: pack_floats(spec, n, {1.f, 0.f, 0.f, 1.f}); return true; + case S::bitangent: pack_floats(spec, n, {0.f, 1.f, 0.f, 0.f}); return true; + + // UVs + case S::texcoord0: case S::texcoord1: case S::texcoord2: case S::texcoord3: + case S::texcoord4: case S::texcoord5: case S::texcoord6: case S::texcoord7: + pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true; + + // Vertex colors — multiplicative identity is white. + case S::color0: case S::color1: case S::color2: case S::color3: + pack_floats(spec, n, {1.f, 1.f, 1.f, 1.f}); return true; + + // Per-instance broadcast colors — same multiplicative identity as + // their per-vertex counterparts. Drives the unified-MDI shader's + // base × inst_color modulation: when no per-instance binding is + // present (Sponza, plain glTF), every fragment reads white and the + // effective scaling collapses to per-vertex × material only. + case S::instance_color0: case S::instance_color1: + case S::instance_color2: case S::instance_color3: + pack_floats(spec, n, {1.f, 1.f, 1.f, 1.f}); return true; + + // Per-instance custom — application-specific user data. Zero is the + // benign default for "ignore me unless wired". + case S::instance_custom0: case S::instance_custom1: + case S::instance_custom2: case S::instance_custom3: + pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true; + + // instance_draw_id intentionally omitted — uint-typed VERTEX_INPUTs + // aren't supported by the float-only v1 fallback path. Unified-MDI + // shaders that read it must set REQUIRED: true (and the + // ScenePreprocessor publishes the per-instance draw_id buffer). + + // Transform / instancing. The enum at rotation..translation + // (values 600..607) is now collision-free with the morph deltas + // (500..504), so every transform semantic has an unambiguous + // neutral. transform_matrix (mat4) is still intentionally absent: + // mat4 VERTEX_INPUTS need distinct per-column vertex-input + // bindings which the v1 fallback path (single PerInstance buffer, + // single float{1..4} format) cannot express. Users can declare + // four vec4 columns and reassemble in GLSL, or keep + // transform_matrix REQUIRED: true. + case S::rotation: pack_floats(spec, n, {0.f, 0.f, 0.f, 1.f}); return true; + case S::rotation_extra: pack_floats(spec, n, {0.f, 0.f, 0.f, 1.f}); return true; + case S::scale: pack_floats(spec, n, {1.f, 1.f, 1.f, 1.f}); return true; + case S::uniform_scale: pack_floats(spec, n, {1.f, 0.f, 0.f, 0.f}); return true; + case S::up: pack_floats(spec, n, {0.f, 1.f, 0.f, 0.f}); return true; + case S::pivot: pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true; + case S::translation: pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true; + + // Morph deltas — zero delta means "no morph contribution", which is + // exactly the right neutral for an absent morph target. All five + // are safe to include now that the collisions are gone. + case S::morph_position: + case S::morph_normal: + case S::morph_tangent: + case S::morph_texcoord: + case S::morph_color: + pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true; + + // Particle dynamics — at-rest defaults. + case S::velocity: + case S::acceleration: + case S::force: + case S::angular_velocity: + pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true; + case S::mass: pack_floats(spec, n, {1.f, 0.f, 0.f, 0.f}); return true; + case S::age: pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true; + case S::lifetime: pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true; + case S::drag: pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true; + + // Rendering hints + case S::sprite_size: pack_floats(spec, n, {1.f, 1.f, 0.f, 0.f}); return true; + case S::sprite_rotation: pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true; + case S::sprite_facing: pack_floats(spec, n, {0.f, 0.f, 1.f, 0.f}); return true; + case S::width: pack_floats(spec, n, {1.f, 0.f, 0.f, 0.f}); return true; + case S::opacity: pack_floats(spec, n, {1.f, 0.f, 0.f, 0.f}); return true; + case S::emissive: pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true; + case S::emissive_strength: pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true; + + // Material / PBR + case S::roughness: pack_floats(spec, n, {0.5f, 0.f, 0.f, 0.f}); return true; + case S::metallic: pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true; + case S::ambient_occlusion: pack_floats(spec, n, {1.f, 0.f, 0.f, 0.f}); return true; + case S::specular: pack_floats(spec, n, {0.5f, 0.f, 0.f, 0.f}); return true; + case S::subsurface: + case S::clearcoat: + case S::clearcoat_roughness: + case S::anisotropy: + case S::transmission: + case S::thickness: + pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true; + case S::anisotropy_direction: pack_floats(spec, n, {1.f, 0.f, 0.f, 0.f}); return true; + case S::ior: pack_floats(spec, n, {1.5f, 0.f, 0.f, 0.f}); return true; + + // UI / effect slots + case S::selection: pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true; + case S::fx0: case S::fx1: case S::fx2: case S::fx3: + case S::fx4: case S::fx5: case S::fx6: case S::fx7: + pack_floats(spec, n, {0.f, 0.f, 0.f, 0.f}); return true; + + // Everything else: NOT whitelisted. Forces the caller to require an + // explicit DEFAULT (motion-history semantics, skinning indices / + // weights, integer IDs, volumetric / splat data — cases where a + // wrong "neutral" is silently wrong). + default: + return false; + } +} + +} // namespace + +std::optional resolveVertexFallback( + ossia::attribute_semantic semantic, + std::string_view decl_type, + const std::vector& user_default) noexcept +{ + const int n = float_components_of(decl_type); + if(n <= 0) + return std::nullopt; // unsupported type (mat4, integer, sampler, ...) + + VertexFallbackSpec spec{}; + + if(!user_default.empty()) + { + // User DEFAULT wins. Pack at most n floats, zero-pad the rest. + float tmp[4] = {0.f, 0.f, 0.f, 0.f}; + const int k = (int)std::min(user_default.size(), (std::size_t)n); + for(int i = 0; i < k; ++i) + tmp[i] = (float)user_default[(std::size_t)i]; + std::memcpy(spec.bytes.data(), tmp, (size_t)n * sizeof(float)); + spec.stride_bytes = (uint32_t)(n * sizeof(float)); + spec.format = format_for_components(n); + return spec; + } + + // No user default — look up the whitelist. + if(fill_whitelist(spec, semantic, n)) + return spec; + + return std::nullopt; +} + +uint64_t hashVertexFallback(const VertexFallbackSpec& spec) noexcept +{ + // rapidhash-tiered (ossia::hash_*); same primitive used everywhere + // else in the gfx pipeline. Mix format + stride into the seed via + // hash_combine, then fold in the active byte range so two specs + // with identical bytes but different formats / strides don't alias. + uint64_t seed = ossia::hash_trivial(spec.format); + ossia::hash_combine(seed, spec.stride_bytes); + const uint32_t active + = std::min(spec.stride_bytes, (uint32_t)spec.bytes.size()); + ossia::hash_combine(seed, ossia::hash_bytes(spec.bytes.data(), active)); + return seed; +} + +} // namespace score::gfx diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/VertexFallbackDefaults.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/VertexFallbackDefaults.hpp new file mode 100644 index 0000000000..713883fca3 --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/VertexFallbackDefaults.hpp @@ -0,0 +1,63 @@ +#pragma once + +#include + +#include + +#include +#include +#include +#include +#include + +namespace score::gfx +{ + +// Packed neutral value for an optional VERTEX_INPUT whose upstream +// attribute is absent. The renderer uploads these `stride_bytes` bytes +// into a PerInstance step_rate=1 buffer of exactly one element and binds +// it at the shader input's slot. Stride and format are driven by the +// GLSL TYPE the shader declared — not the semantic's canonical width. +struct VertexFallbackSpec +{ + // Values from the anonymous enum in ossia::geometry::attribute — we + // store as int to sidestep the "decltype on non-static member" + // boilerplate; callers cast back at the QRhi boundary the same way + // RenderedCSFNode.cpp already does. + int format{}; + uint32_t stride_bytes{}; + // First `stride_bytes` bytes are the payload (native float / int + // bytes). 64 bytes accommodate mat4 if mat4 VERTEX_INPUTS ever land + // (they don't today — the parser's location-bump is not mat4-aware). + std::array bytes{}; +}; + +// Resolve a fallback for a shader-declared optional VERTEX_INPUT. +// +// `semantic` the resolved ossia semantic (from SEMANTIC field if +// set, else from NAME via ossia::name_to_semantic). +// Pass attribute_semantic::custom for unknown names. +// `decl_type` the GLSL TYPE the shader declared, lowercased +// ("float", "vec2", "vec3", "vec4"). mat4 / integer +// types are unsupported in v1 — returns nullopt. +// `user_default` the DEFAULT[] array from the JSON header (may be +// empty). When non-empty, overrides the semantic +// whitelist: numbers are packed into the payload in +// declaration order, then truncated / zero-padded to +// fit the declared type width. +// +// Returns `std::nullopt` when neither a user DEFAULT nor a whitelisted +// semantic default applies — the caller is expected to fail the pipeline +// build with a clear error referencing the input name. +SCORE_PLUGIN_GFX_EXPORT std::optional resolveVertexFallback( + ossia::attribute_semantic semantic, + std::string_view decl_type, + const std::vector& user_default) noexcept; + +// Stable hash of a fallback spec's byte payload. Used as part of the +// VertexFallbackPool key so two shaders declaring the same semantic and +// TYPE with different DEFAULT arrays don't share a buffer. +SCORE_PLUGIN_GFX_EXPORT uint64_t +hashVertexFallback(const VertexFallbackSpec& spec) noexcept; + +} diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/VertexFallbackPlan.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/VertexFallbackPlan.hpp new file mode 100644 index 0000000000..c161654e5d --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/VertexFallbackPlan.hpp @@ -0,0 +1,39 @@ +#pragma once + +#include + +class QRhiBuffer; + +namespace score::gfx +{ + +// Draw-time bindings the renderer must merge into its vertex-input +// array to satisfy "REQUIRED: false" VERTEX_INPUTS whose upstream +// geometry did not provide a matching attribute. +// +// Emitted by the fallback-aware remapPipelineVertexInputs overload and +// consumed by RenderedRawRasterPipelineNode at draw time. Each Slot has +// a `binding_index` — the slot in the pipeline's vertex-input binding +// array that was appended during pipeline build — and a QRhiBuffer* the +// runtime binds at that index when issuing the draw. +// +// The plan is safe to hold across frames: the buffer handles come from +// the VertexFallbackPool which lives alongside the RenderList. +// +// This struct lives in its own header so consumers (CustomMesh, the +// renderer) can depend on it without pulling the full Utils.hpp / +// VertexFallbackPool.hpp graph in via Mesh.hpp. +struct FallbackBindingPlan +{ + struct Slot + { + int binding_index{}; + QRhiBuffer* buffer{}; + }; + std::vector slots; + + bool empty() const noexcept { return slots.empty(); } + void clear() noexcept { slots.clear(); } +}; + +} // namespace score::gfx diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/VertexFallbackPool.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/VertexFallbackPool.cpp new file mode 100644 index 0000000000..2ac18fc085 --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/VertexFallbackPool.cpp @@ -0,0 +1,67 @@ +#include + +#include + +namespace score::gfx +{ + +VertexFallbackPool::~VertexFallbackPool() +{ + // RenderList owns us and must have called release() before + // tearing down the QRhi. Anything still in the map at destruction + // time would leak — but we can't safely delete QRhiBuffer* here + // without knowing the QRhi is still alive, so we just assert the + // caller did the right thing via an empty-map check. + // (Destructive assert would fire during OOM teardown; leave it as + // a quiet leak for robustness.) +} + +VertexFallbackPool::Entry VertexFallbackPool::acquire( + QRhi& rhi, QRhiResourceUpdateBatch& batch, + const VertexFallbackSpec& spec) +{ + Key k{ + .format = spec.format, + .stride = spec.stride_bytes, + .payload_hash = hashVertexFallback(spec)}; + + if(auto it = m_entries.find(k); it != m_entries.end()) + return it->second; + + // Allocate a single QRhiBuffer sized to exactly one element. The + // Immutable usage hint means QRhi uploads once and never touches + // the backing memory again. + auto* buf = rhi.newBuffer( + QRhiBuffer::Immutable, + QRhiBuffer::VertexBuffer, + spec.stride_bytes); + buf->setName(QByteArrayLiteral("score.vertex_fallback")); + if(!buf->create()) + { + // Allocation failed. Return a null Entry; the caller will + // propagate as a pipeline-build failure. + delete buf; + return Entry{}; + } + + batch.uploadStaticBuffer(buf, 0, spec.stride_bytes, spec.bytes.data()); + + Entry e{.buffer = buf, .stride = spec.stride_bytes, .format = spec.format}; + m_entries.emplace(k, e); + return e; +} + +void VertexFallbackPool::release() +{ + for(auto& [k, e] : m_entries) + { + if(e.buffer) + { + e.buffer->deleteLater(); + e.buffer = nullptr; + } + } + m_entries.clear(); +} + +} // namespace score::gfx diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/VertexFallbackPool.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/VertexFallbackPool.hpp new file mode 100644 index 0000000000..ef71d3af98 --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/VertexFallbackPool.hpp @@ -0,0 +1,89 @@ +#pragma once + +#include + +#include + +#include + +#include + +class QRhi; +class QRhiBuffer; +class QRhiResourceUpdateBatch; + +namespace score::gfx +{ + +// Shared pool of tiny (4–16 byte) PerInstance step_rate=1 vertex +// buffers used to satisfy "REQUIRED: false" VERTEX_INPUTS whose +// upstream geometry does not provide a matching attribute. +// +// Lifetime-owned by the RenderList (same scope as GpuResourceRegistry). +// Lookup key includes the format, stride, and a hash of the payload so +// different DEFAULT values on the same semantic don't share a buffer. +// A typical session touches ~5–10 distinct buckets; total footprint is +// sub-kilobyte. +// +// Not thread-safe: designed for single-threaded render-thread access. +class SCORE_PLUGIN_GFX_EXPORT VertexFallbackPool +{ +public: + struct Entry + { + QRhiBuffer* buffer{}; // VertexBuffer | Immutable, exactly `stride` bytes + uint32_t stride{}; // matches spec.stride_bytes + int format{}; // matches spec.format (ossia::geometry::attribute::format) + }; + + VertexFallbackPool() = default; + ~VertexFallbackPool(); + + VertexFallbackPool(const VertexFallbackPool&) = delete; + VertexFallbackPool& operator=(const VertexFallbackPool&) = delete; + + // Returns (and lazily creates) the shared buffer matching `spec`. + // The first call per key allocates a QRhiBuffer and records an + // upload on `batch`; subsequent calls return the cached buffer and + // do not touch `batch`. + // + // `rhi` and `batch` must be valid. The returned buffer is valid + // until release() is called. + Entry acquire(QRhi& rhi, QRhiResourceUpdateBatch& batch, + const VertexFallbackSpec& spec); + + // Destroy every cached buffer and clear the pool. Called by the + // owning RenderList on teardown. + void release(); + + // Diagnostic only. + std::size_t size() const noexcept { return m_entries.size(); } + +private: + struct Key + { + int format{}; + uint32_t stride{}; + uint64_t payload_hash{}; + + bool operator==(const Key& o) const noexcept + { + return format == o.format && stride == o.stride + && payload_hash == o.payload_hash; + } + }; + struct KeyHash + { + std::size_t operator()(const Key& k) const noexcept + { + // Cheap mix — keys are already high-entropy via payload_hash. + return (std::size_t)(k.payload_hash + ^ ((uint64_t)k.format << 32) + ^ (uint64_t)k.stride); + } + }; + + ossia::hash_map m_entries; +}; + +} // namespace score::gfx diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/VideoNodeRenderer.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/VideoNodeRenderer.cpp index fb886132bc..1fce43b822 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/VideoNodeRenderer.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/VideoNodeRenderer.cpp @@ -75,6 +75,8 @@ void VideoNodeRenderer::setupGpuDecoder(RenderList& r) m_p.clear(); } + m_shaders = {}; + createGpuDecoder(); createPipelines(r); @@ -84,11 +86,11 @@ void VideoNodeRenderer::createPipelines(RenderList& r) { if(m_gpu) { - auto shaders = m_gpu->init(r); + m_shaders = m_gpu->init(r); SCORE_ASSERT(m_p.empty()); score::gfx::defaultPassesInit( - m_p, this->node().output[0]->edges, r, r.defaultQuad(), shaders.first, - shaders.second, m_processUBO, m_materialUBO, m_gpu->samplers); + m_p, this->node().output[0]->edges, r, r.defaultQuad(), m_shaders.first, + m_shaders.second, m_processUBO, m_materialUBO, m_gpu->samplers); } } @@ -113,7 +115,7 @@ void VideoNodeRenderer::checkFormat(RenderList& r, AVPixelFormat fmt, int w, int } } -void VideoNodeRenderer::init(RenderList& renderer, QRhiResourceUpdateBatch& res) +void VideoNodeRenderer::initState(RenderList& renderer, QRhiResourceUpdateBatch& res) { auto& rhi = *renderer.state.rhi; @@ -136,8 +138,88 @@ void VideoNodeRenderer::init(RenderList& renderer, QRhiResourceUpdateBatch& res) if(!m_gpu) createGpuDecoder(); - createPipelines(renderer); + // Cache the shaders from the GPU decoder (also creates its samplers/textures) + if(m_gpu) + m_shaders = m_gpu->init(renderer); + m_recomputeScale = true; + m_initialized = true; +} + +void VideoNodeRenderer::addOutputPass( + RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) +{ + if(!m_gpu) + return; + if(!m_shaders.first.isValid() || !m_shaders.second.isValid()) + return; + + auto rt = renderer.renderTargetForOutput(edge); + if(rt.renderTarget) + { + auto pip = score::gfx::buildPipeline( + renderer, renderer.defaultQuad(), m_shaders.first, m_shaders.second, rt, + m_processUBO, m_materialUBO, m_gpu->samplers); + if(pip.pipeline) + m_p.emplace_back(&edge, Pass{rt, pip, nullptr}); + } +} + +void VideoNodeRenderer::removeOutputPass(RenderList& renderer, Edge& edge) +{ + auto it + = ossia::find_if(m_p, [&](const auto& p) { return p.first == &edge; }); + if(it != m_p.end()) + { + it->second.release(); + m_p.erase(it); + } +} + +bool VideoNodeRenderer::hasOutputPassForEdge(Edge& edge) const +{ + return ossia::find_if(m_p, [&](const auto& p) { return p.first == &edge; }) + != m_p.end(); +} + +void VideoNodeRenderer::releaseState(RenderList& r) +{ + if(!m_initialized) + return; + + if(m_gpu) + m_gpu->release(r); + + delete m_processUBO; + m_processUBO = nullptr; + + delete m_materialUBO; + m_materialUBO = nullptr; + + for(auto& p : m_p) + p.second.release(); + m_p.clear(); + + m_meshBuffer = {}; + m_shaders = {}; + + if(m_currentFrame) + { + m_currentFrame->use_count--; + m_currentFrame.reset(); + } + + m_initialized = false; +} + +void VideoNodeRenderer::init(RenderList& renderer, QRhiResourceUpdateBatch& res) +{ + initState(renderer, res); + + for(Edge* edge : this->node().output[0]->edges) + { + addOutputPass(renderer, *edge, res); + } } void VideoNodeRenderer::runRenderPass( @@ -235,25 +317,6 @@ void VideoNodeRenderer::displayFrame( void VideoNodeRenderer::release(RenderList& r) { - if(m_gpu) - m_gpu->release(r); - - delete m_processUBO; - m_processUBO = nullptr; - - delete m_materialUBO; - m_materialUBO = nullptr; - - for(auto& p : m_p) - p.second.release(); - m_p.clear(); - - m_meshBuffer = {}; - - if(m_currentFrame) - { - m_currentFrame->use_count--; - m_currentFrame.reset(); - } + releaseState(r); } } diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/VideoNodeRenderer.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/VideoNodeRenderer.hpp index 298760a934..1c58114eeb 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/VideoNodeRenderer.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/VideoNodeRenderer.hpp @@ -32,6 +32,13 @@ class VideoNodeRenderer : public NodeRenderer void update(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) override; void release(RenderList& r) override; + void initState(RenderList& renderer, QRhiResourceUpdateBatch& res) override; + void releaseState(RenderList& renderer) override; + void addOutputPass( + RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) override; + void removeOutputPass(RenderList& renderer, Edge& edge) override; + bool hasOutputPassForEdge(Edge& edge) const override; + private: void createPipelines(RenderList& r); void displayFrame(AVFrame& frame, RenderList& renderer, QRhiResourceUpdateBatch& res); @@ -55,6 +62,7 @@ class VideoNodeRenderer : public NodeRenderer }; std::unique_ptr m_gpu; + std::pair m_shaders; Video::ImageFormat m_frameFormat{}; score::gfx::ScaleMode m_currentScaleMode{}; diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/VulkanVideoDevice.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/VulkanVideoDevice.hpp index 3a0004800a..72a185a213 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/VulkanVideoDevice.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/VulkanVideoDevice.hpp @@ -7,6 +7,13 @@ #include #include +#if __has_include() +#include +#ifdef Q_OS_WIN +#include +#endif +#endif + #include #include #include diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/Window.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/Window.cpp index 0e59153621..59b6653b27 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/Window.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/Window.cpp @@ -216,7 +216,7 @@ void Window::render() auto buf = m_swapChain->currentFrameCommandBuffer(); auto batch = state->rhi->nextResourceUpdateBatch(); - buf->beginPass(m_swapChain->currentFrameRenderTarget(), Qt::black, {1.0f, 0}, batch); + buf->beginPass(m_swapChain->currentFrameRenderTarget(), Qt::black, {0.0f, 0}, batch); buf->endPass(); state->rhi->endFrame(m_swapChain, {}); @@ -248,12 +248,12 @@ void Window::exposeEvent(QExposeEvent* ev) resizeSwapChain(); } - if(m_hasSwapChain && !m_swapChain) - { - qDebug("exposeEvent: m_hasSwapChain && !m_swapChain"); - m_hasSwapChain = false; - } - + // The (m_hasSwapChain, m_swapChain) pair is kept consistent at the + // teardown sites in ScreenNode (~ScreenNode, destroyOutput) and + // MultiWindowNode (releaseWindowSwapChain, destroyOutput): the flag is + // cleared and the alias is nulled BEFORE the QRhiSwapChain is released, + // so we can never observe (m_hasSwapChain == true && m_swapChain == + // nullptr) here. See diagnostic 047. const QSize surfaceSize = m_hasSwapChain ? m_swapChain->surfacePixelSize() : QSize(); if((!isExposed() || (m_hasSwapChain && surfaceSize.isEmpty())) && m_running) diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/decoders/GPUVideoDecoder.cpp b/src/plugins/score-plugin-gfx/Gfx/Graph/decoders/GPUVideoDecoder.cpp index 3519255543..b592ae6a13 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/decoders/GPUVideoDecoder.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/decoders/GPUVideoDecoder.cpp @@ -9,8 +9,8 @@ GPUVideoDecoder::~GPUVideoDecoder() { } void GPUVideoDecoder::release(RenderList&) { - for(auto [sampler, tex] : samplers) - tex->deleteLater(); + for(auto& s : samplers) + if(s.texture) s.texture->deleteLater(); for(auto sampler : samplers) { diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/decoders/HWD3D11.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/decoders/HWD3D11.hpp index 6819dcd4fc..e7fba53804 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/decoders/HWD3D11.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/decoders/HWD3D11.hpp @@ -19,7 +19,10 @@ extern "C" { #if defined(SCORE_HAS_D3D11_HWCONTEXT) +// clang-format off +#include #include +// clang-format on namespace score::gfx { diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/decoders/HWD3D12.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/decoders/HWD3D12.hpp index 762fa85095..0c5fda135b 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/decoders/HWD3D12.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/decoders/HWD3D12.hpp @@ -19,7 +19,10 @@ extern "C" { #if defined(SCORE_HAS_D3D12_HWCONTEXT) +// clang-format off +#include #include +// clang-format on namespace score::gfx { diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/decoders/Tonemap.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/decoders/Tonemap.hpp index a63acb101e..8ca17bbf1e 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/decoders/Tonemap.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/decoders/Tonemap.hpp @@ -403,13 +403,19 @@ vec3 agx(vec3 color) { } vec3 agxEotf(vec3 color) { - // AgX -> sRGB/BT.709 linear + // AgX outset (inverse of inset). The output of agxDefaultContrastApprox + // is in AgX's pseudo-sRGB-2.2-gamma space; we apply outset then the + // 2.2 EOTF to land in linear sRGB. Reference: iolite minimal AgX, + // https://iolite-engine.com/blog_posts/minimal_agx_implementation const mat3 agxInvTransform = mat3( 1.19687900512017, -0.0528968517574562, -0.0529716355144438, -0.0980208811401368, 1.15190312990417, -0.0980434501171241, -0.0990297440797205, -0.0989611768448433, 1.15107367264116 ); - return agxInvTransform * color; + vec3 v = agxInvTransform * color; + // Without this gamma the output is display-non-linear but the caller + // treats it as linear -> shadows crushed, contrast over-steep. + return pow(max(v, vec3(0.0)), vec3(2.2)); } vec3 tonemap(vec3 color) { diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/encoders/I420.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/encoders/I420.hpp index 17b37ef558..40b1f39faf 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/encoders/I420.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/encoders/I420.hpp @@ -145,7 +145,7 @@ struct I420Encoder : GPUVideoEncoder void execPlane(QRhi& rhi, QRhiCommandBuffer& cb, PlaneResources& plane, int w, int h) { - cb.beginPass(plane.rt, Qt::black, {1.0f, 0}); + cb.beginPass(plane.rt, Qt::black, {0.0f, 0}); cb.setGraphicsPipeline(plane.pipeline); cb.setShaderResources(plane.srb); cb.setViewport(QRhiViewport(0, 0, w, h)); diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/encoders/NV12.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/encoders/NV12.hpp index 644087a10f..6cb97dca31 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/encoders/NV12.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/encoders/NV12.hpp @@ -159,7 +159,7 @@ struct NV12Encoder : GPUVideoEncoder void exec(QRhi& rhi, QRhiCommandBuffer& cb) override { // Pass 1: Y plane (full resolution) - cb.beginPass(m_yRT, Qt::black, {1.0f, 0}); + cb.beginPass(m_yRT, Qt::black, {0.0f, 0}); cb.setGraphicsPipeline(m_yPipeline); cb.setShaderResources(m_ySRB); cb.setViewport(QRhiViewport(0, 0, m_width, m_height)); @@ -170,7 +170,7 @@ struct NV12Encoder : GPUVideoEncoder cb.endPass(yReadbackBatch); // Pass 2: UV plane (half resolution) - cb.beginPass(m_uvRT, Qt::black, {1.0f, 0}); + cb.beginPass(m_uvRT, Qt::black, {0.0f, 0}); cb.setGraphicsPipeline(m_uvPipeline); cb.setShaderResources(m_uvSRB); cb.setViewport(QRhiViewport(0, 0, m_width / 2, m_height / 2)); diff --git a/src/plugins/score-plugin-gfx/Gfx/Graph/encoders/UYVY.hpp b/src/plugins/score-plugin-gfx/Gfx/Graph/encoders/UYVY.hpp index bf3d2994b1..f57f123803 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Graph/encoders/UYVY.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Graph/encoders/UYVY.hpp @@ -116,7 +116,7 @@ struct UYVYEncoder : GPUVideoEncoder void exec(QRhi& rhi, QRhiCommandBuffer& cb) override { - cb.beginPass(m_renderTarget, Qt::black, {1.0f, 0}); + cb.beginPass(m_renderTarget, Qt::black, {0.0f, 0}); cb.setGraphicsPipeline(m_pipeline); cb.setShaderResources(m_srb); cb.setViewport(QRhiViewport(0, 0, m_width / 2, m_height)); diff --git a/src/plugins/score-plugin-gfx/Gfx/Hashes.hpp b/src/plugins/score-plugin-gfx/Gfx/Hashes.hpp new file mode 100644 index 0000000000..92e9f8587a --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Hashes.hpp @@ -0,0 +1,35 @@ +#pragma once + +// Qt-aware adapters over ossia::hash (rapidhash). Centralises the +// QString / QByteArray hashing pattern so cache keys across the gfx +// pipeline produce the same stable values without each call site +// re-deriving the trick of hashing the raw character buffer. +// +// All hashes here delegate to ossia::hash_bytes, which dispatches +// to the appropriate rapidhash tier (Nano / Micro / full) based on +// size. Use these — not qHash, not std::hash — for any +// in-memory cache key in this plugin. + +#include + +#include +#include + +#include +#include + +namespace score::gfx +{ + +inline uint64_t hash_qstring(const QString& s) noexcept +{ + return ossia::hash_bytes( + s.constData(), (std::size_t)s.size() * sizeof(QChar)); +} + +inline uint64_t hash_qbytearray(const QByteArray& b) noexcept +{ + return ossia::hash_bytes(b.constData(), (std::size_t)b.size()); +} + +} // namespace score::gfx diff --git a/src/plugins/score-plugin-gfx/Gfx/ISFProcess.hpp b/src/plugins/score-plugin-gfx/Gfx/ISFProcess.hpp index d0f9b4cee6..2f793889c8 100644 --- a/src/plugins/score-plugin-gfx/Gfx/ISFProcess.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/ISFProcess.hpp @@ -16,6 +16,8 @@ #include +#include + namespace Gfx { struct ISFHelpers @@ -79,6 +81,10 @@ struct ISFHelpers const isf::input& input; const int i; T& self; + // Outlet id allocator for write-access storage / image inputs. Starts at + // a high base so it never collides with inlet ids (input index `i`), the + // default "Texture Out" outlet (id 1), or the MRT outlet base (10000). + int& outlet_id; Process::Inlet* operator()(const float_input& v) { @@ -168,8 +174,18 @@ struct ISFHelpers alternatives.emplace_back("2", 2); } + // ComboBox::init expects the VALUE to be initially selected, not + // an index. libisf's `v.def` is the INDEX into values for enum + // mode — passing it raw was making `DEFAULT: ` silently + // fall back to alternatives[0] when didn't equal a valid + // index. Look up the alternative at v.def and forward its value. + // Same fix lives in CSF/Process.cpp + GeometryFilter/Process.cpp. + const std::size_t def_idx + = std::min(v.def, alternatives.size() - 1); + const ossia::value& init_value = alternatives[def_idx].second; + auto port = new Process::ComboBox( - std::move(alternatives), (int)v.def, nm, Id(i), &self); + std::move(alternatives), init_value, nm, Id(i), &self); if(auto it = previous_values.find(nm); it != previous_values.end() @@ -340,23 +356,134 @@ struct ISFHelpers } // CSF-specific input handlers - Process::Inlet* operator()(const storage_input& v) { return nullptr; } - Process::Inlet* operator()(const texture_input& v) { return nullptr; } - Process::Inlet* operator()(const csf_image_input& v) { return nullptr; } + Process::Inlet* operator()(const storage_input& v) + { + // Mirror the renderer (isf_input_port_vis in ISFNode.cpp): the access + // qualifier decides inlet vs outlet. Treating every storage_input as a + // read inlet gave write buffers a phantom TextureInlet — shifting every + // later port by one (positional routing) and never exposing the + // TextureOutlet the renderer actually produces. + if(v.access == "read_only") + { + // read inlet: an upstream Buffer-producing node (ScenePreprocessor's + // scene_* auxes, ExtractBuffer2 outputs, ...) has a target to land on. + // For aux-named storage_inputs the RawRaster renderer also auto-binds + // by name, so this inlet is optional but allows explicit wiring. + auto port = new Gfx::TextureInlet( + QString::fromStdString(input.name), Id(i), &self); + self.m_inlets.push_back(port); + return port; + } + + // write_only / read_write: the renderer pushes a Buffer OUTPUT port for + // the produced SSBO so downstream nodes can connect to it. + auto outport = new Gfx::TextureOutlet( + QString::fromStdString(input.name), Id(outlet_id++), + &self); + self.m_outlets.push_back(outport); + + // Conditional sizing inlet: only buffers whose layout ends in a + // flexible-array member synthesize a "size" control — SAME condition as + // CSF/Process.cpp setupCSF, the renderer, and the generated GLSL. + if(!v.layout.empty() + && v.layout.back().type.find("[]") != std::string::npos) + { + auto size_inl = new Process::IntSpinBox{ + 1, 536870911, 1024, + QString::fromStdString(input.name) + " size", + Id(i), &self}; + self.m_inlets.push_back(size_inl); + self.controlAdded(size_inl->id()); + return size_inl; + } + return nullptr; + } + Process::Inlet* operator()(const uniform_input& v) + { + // uniform_input expects an upstream Buffer port (ScenePreprocessor's + // camera/env aux buffers, ExtractBuffer2 outputs, etc.). TextureInlet + // is score's Process-layer inlet for SSBO / texture / UBO data flow. + // Without this, the Process model has no inlet for the cable to land + // on and Score.inlet(proc, i) returns null. + auto port = new Gfx::TextureInlet( + QString::fromStdString(input.name), Id(i), &self); + self.m_inlets.push_back(port); + return port; + } + Process::Inlet* operator()(const texture_input& v) + { + // The renderer (isf_input_port_vis) creates an Image input port for + // every texture_input; returning nullptr here dropped the inlet and + // shifted all subsequent ports (same off-by-one drift family as the + // storage / csf_image cases). + auto port = new Gfx::TextureInlet( + QString::fromStdString(input.name), Id(i), &self); + self.m_inlets.push_back(port); + return port; + } + Process::Inlet* operator()(const csf_image_input& v) + { + // Mirror the renderer: read_only → input port (an upstream texture + // cable lands on it); write_only / read_write → output port for the + // produced storage image. Always creating an inlet gave write images a + // phantom inlet (port shift) and no outlet for downstream connection. + if(v.access == "read_only") + { + auto port = new Gfx::TextureInlet( + QString::fromStdString(input.name), Id(i), &self); + self.m_inlets.push_back(port); + return port; + } + auto outport = new Gfx::TextureOutlet( + QString::fromStdString(input.name), Id(outlet_id++), + &self); + self.m_outlets.push_back(outport); + return nullptr; + } Process::Inlet* operator()(const geometry_input& v) { return nullptr; } }; + // Outlet ids for write-access storage / image inputs. Base 20000 keeps + // them clear of inlet ids (input index), the default outlet (id 1) and the + // MRT base (10000), and lets the MRT block below tell them apart. + static constexpr int storage_outlet_base = 20000; + int outlet_id = storage_outlet_base; + for(const isf::input& input : desc.inputs) { - ossia::visit(input_vis{previous_values, input, i, self}, input.data); + ossia::visit(input_vis{previous_values, input, i, self, outlet_id}, input.data); i++; } - // MRT: recreate outlets from OUTPUTS declarations + // The renderer (isf_input_port_vis) pushes write-storage / write-image + // OUTPUT ports first (in input order), then the color / MRT outputs. The + // model's outlets must follow the same order for positional routing. The + // default "Texture Out" outlet was created by the constructor *before* this + // loop, so it currently sits ahead of any storage outlets — pull the + // storage outlets (ids >= storage_outlet_base) to the front to match. + { + std::stable_partition( + self.m_outlets.begin(), self.m_outlets.end(), + [](Process::Outlet* o) { return o->id().val() >= storage_outlet_base; }); + } + + // MRT: recreate the color outlets from OUTPUTS declarations. Preserve the + // storage / image write outlets (ids >= storage_outlet_base); only the + // color / default outlets are replaced. if(!desc.outputs.empty()) { - qDeleteAll(self.m_outlets); - self.m_outlets.clear(); + for(auto it = self.m_outlets.begin(); it != self.m_outlets.end();) + { + if((*it)->id().val() < storage_outlet_base) + { + delete *it; + it = self.m_outlets.erase(it); + } + else + { + ++it; + } + } int outId = 10000; // High base to avoid ID collisions with inlets for(const auto& out : desc.outputs) diff --git a/src/plugins/score-plugin-gfx/Gfx/InvertYRenderer.cpp b/src/plugins/score-plugin-gfx/Gfx/InvertYRenderer.cpp index 92b1220bbf..ae9d4d1cb1 100644 --- a/src/plugins/score-plugin-gfx/Gfx/InvertYRenderer.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/InvertYRenderer.cpp @@ -83,7 +83,7 @@ void InvertYRenderer::finishFrame( score::gfx::RenderList& renderer, QRhiCommandBuffer& cb, QRhiResourceUpdateBatch*& res) { - cb.beginPass(m_renderTarget.renderTarget, Qt::black, {1.0f, 0}, res); + cb.beginPass(m_renderTarget.renderTarget, Qt::black, {0.0f, 0}, res); res = nullptr; { const auto sz = renderer.state.renderSize; @@ -170,7 +170,7 @@ void ScaledRenderer::runRenderPass(score::gfx::RenderList &, QRhiCommandBuffer & void ScaledRenderer::finishFrame(score::gfx::RenderList &renderer, QRhiCommandBuffer &cb, QRhiResourceUpdateBatch *&res) { - cb.beginPass(m_renderTarget.renderTarget, Qt::black, {1.0f, 0}, res); + cb.beginPass(m_renderTarget.renderTarget, Qt::black, {0.0f, 0}, res); res = nullptr; { const auto sz = renderer.state.outputSize; diff --git a/src/plugins/score-plugin-gfx/Gfx/Libav/LibavEncoderNode.cpp b/src/plugins/score-plugin-gfx/Gfx/Libav/LibavEncoderNode.cpp index 1bf8e44729..a56737e626 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Libav/LibavEncoderNode.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Libav/LibavEncoderNode.cpp @@ -153,18 +153,15 @@ score::gfx::RenderList* LibavEncoderNode::renderer() const void LibavEncoderNode::createOutput(score::gfx::OutputConfiguration conf) { - m_renderState = std::make_shared(); - - m_renderState->surface = QRhiGles2InitParams::newFallbackSurface(); - QRhiGles2InitParams params; - params.fallbackSurface = m_renderState->surface; - score::GLCapabilities caps; - caps.setupFormat(params.format); - m_renderState->rhi = QRhi::create(QRhi::OpenGLES2, ¶ms, {}); - m_renderState->renderSize = QSize(m_settings.width, m_settings.height); + m_renderState = score::gfx::createRenderState( + conf.graphicsApi, QSize(m_settings.width, m_settings.height), nullptr); + if(!m_renderState || !m_renderState->rhi) + { + qWarning() << "LibavEncoderNode: failed to create QRhi"; + m_renderState.reset(); + return; + } m_renderState->outputSize = m_renderState->renderSize; - m_renderState->api = score::gfx::GraphicsApi::OpenGL; - m_renderState->version = caps.qShaderVersion; auto rhi = m_renderState->rhi; m_texture = rhi->newTexture( @@ -226,6 +223,11 @@ void LibavEncoderNode::destroyOutput() if(m_renderState) { + // Persist-across-rebuild contract: registry survives RL teardown, + // so we tear down its QRhi resources here BEFORE + // RenderState::destroy() (called below) frees the device. + releaseRegistry(); + delete m_renderTarget; m_renderTarget = nullptr; delete m_renderState->renderPassDescriptor; @@ -234,10 +236,10 @@ void LibavEncoderNode::destroyOutput() m_depthStencil = nullptr; delete m_texture; m_texture = nullptr; - delete m_renderState->rhi; - m_renderState->rhi = nullptr; - delete m_renderState->surface; - m_renderState->surface = nullptr; + // RenderState::destroy() flushes the pipeline cache via preRhiDestroy + // and then deletes rhi + surface. Doing the deletes manually (the + // previous approach) bypassed the cache flush. + m_renderState->destroy(); m_renderState.reset(); } } diff --git a/src/plugins/score-plugin-gfx/Gfx/Sh4lt/Sh4ltOutputDevice.cpp b/src/plugins/score-plugin-gfx/Gfx/Sh4lt/Sh4ltOutputDevice.cpp index e6ffc374cc..2593b9a31f 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Sh4lt/Sh4ltOutputDevice.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Sh4lt/Sh4ltOutputDevice.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -191,18 +192,15 @@ void Sh4ltOutputNode::createOutput(score::gfx::OutputConfiguration conf) sh4lt::ShType::default_group()), m_settings.width * m_settings.height * 4, m_logger); m_frame_dur = 1e9 / m_settings.rate; - m_renderState = std::make_shared(); - - m_renderState->surface = QRhiGles2InitParams::newFallbackSurface(); - QRhiGles2InitParams params; - params.fallbackSurface = m_renderState->surface; - score::GLCapabilities caps; - caps.setupFormat(params.format); - m_renderState->rhi = QRhi::create(QRhi::OpenGLES2, ¶ms, {}); - m_renderState->renderSize = QSize(m_settings.width, m_settings.height); + m_renderState = score::gfx::createRenderState( + conf.graphicsApi, QSize(m_settings.width, m_settings.height), nullptr); + if(!m_renderState || !m_renderState->rhi) + { + qWarning() << "Sh4ltOutputNode: failed to create QRhi"; + m_renderState.reset(); + return; + } m_renderState->outputSize = m_renderState->renderSize; - m_renderState->api = score::gfx::GraphicsApi::OpenGL; - m_renderState->version = caps.qShaderVersion; auto rhi = m_renderState->rhi; m_texture = rhi->newTexture( @@ -221,6 +219,26 @@ void Sh4ltOutputNode::createOutput(score::gfx::OutputConfiguration conf) void Sh4ltOutputNode::destroyOutput() { m_writer.reset(); + + if(!m_renderState) + return; + + // Persist-across-rebuild contract: registry survives RL teardown, + // so we tear down its QRhi resources here BEFORE + // RenderState::destroy() (called below) frees the device. + releaseRegistry(); + + delete m_renderTarget; + m_renderTarget = nullptr; + + delete m_renderState->renderPassDescriptor; + m_renderState->renderPassDescriptor = nullptr; + + delete m_texture; + m_texture = nullptr; + + m_renderState->destroy(); + m_renderState.reset(); } std::shared_ptr Sh4ltOutputNode::renderState() const diff --git a/src/plugins/score-plugin-gfx/Gfx/ShaderProgram.cpp b/src/plugins/score-plugin-gfx/Gfx/ShaderProgram.cpp index 5759c0d6bc..04497b3b64 100644 --- a/src/plugins/score-plugin-gfx/Gfx/ShaderProgram.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/ShaderProgram.cpp @@ -7,11 +7,13 @@ #include #include +#include #include #include #include #include + namespace Gfx { @@ -20,15 +22,35 @@ namespace QStringList shaderIncludePaths() { - // Resolve includes ; for now we have one hardcoded library... QStringList shaderIncludePath; - // FIXME refactor that ! + // Default path: the library packages dir so users' own GLSL snippets + // drop in without ceremony. Additional search roots are expected to be + // supplied via a user-facing include-paths GUI (not yet wired up) — + // no static registration mechanism lives here anymore. auto& lib_settings = score::AppContext().settings(); + const QString lib_path = lib_settings.getPackagesPath(); + if(QDir{}.exists(lib_path)) { - QString lib_path = lib_settings.getPackagesPath(); - if(QDir{}.exists(lib_path)) - shaderIncludePath.append(lib_path); + shaderIncludePath.append(lib_path); + + // Also register every first-level subdirectory of `packages/` so + // shader libraries shipping as standalone packages (openpbr/, + // lygia/, MaterialX/, …) can be `#include`d by their bare header + // name from any user shader without the consumer having to know + // the install layout. Internal cross-includes inside a library + // keep working via the origin-dir-first lookup in + // tryResolveQuoted. + // + // Collision policy: if two libraries ship the same header + // basename, the one earlier in QDir iteration order wins. In + // practice shader libs prefix their headers (`openpbr_*.h`) so + // collisions are vanishingly unlikely. + QDir packagesDir{lib_path}; + const auto subdirs = packagesDir.entryList( + QDir::Dirs | QDir::NoDotAndDotDot, QDir::Name); + for(const auto& sub : subdirs) + shaderIncludePath.append(packagesDir.filePath(sub)); } return shaderIncludePath; @@ -113,53 +135,233 @@ void updateToGlsl45(ShaderSource& program) program.fragment.remove("highp "); } -static bool resolveGLSLIncludes( - QByteArray& data, const QStringList& includes, QString rootPath, int iterations); - -static std::optional resolveFile_relative( - const QString& name, const QStringList& includes, const QString& rootPath, - int iterations) +// Resolver state shared across recursive include expansion. +// +// `searchPaths` holds roots applied to both quoted and bracketed +// includes. `originDir` is the directory the current buffer was loaded +// from; it becomes the first place quoted includes are looked up and is +// pushed/popped as we descend into included files so relative headers +// resolve against their own sibling dir, not the top-level shader's. +// `visited` holds canonicalised paths already expanded in the current +// chain — revisiting one is a cycle. +struct IncludeContext +{ + QStringList searchPaths; + QString originDir; + ossia::hash_set visited; + int depth = 0; + int maxDepth = 16; + QString error; // first fatal error encountered + QStringList missing; // unresolved headers, for diagnostics +}; + +static void removeIncludesInComments(QByteArray& data); +static QByteArray resolveIncludes(QByteArray data, IncludeContext& ctx); + +static std::optional tryResolveQuoted( + const QString& header, const IncludeContext& ctx) { - QFile f{rootPath + "/" + name}; - if(f.open(QIODevice::ReadOnly)) + // Quoted: origin dir first, then search paths. + if(!ctx.originDir.isEmpty()) { - QByteArray res = f.readAll(); - if(resolveGLSLIncludes(res, includes, QFileInfo{f}.absolutePath(), iterations)) - return res; - return std::nullopt; + const QString candidate = ctx.originDir + QLatin1Char('/') + header; + if(QFileInfo::exists(candidate)) + return QFileInfo{candidate}.canonicalFilePath(); } - return {}; + for(const auto& path : ctx.searchPaths) + { + const QString candidate = path + QLatin1Char('/') + header; + if(QFileInfo::exists(candidate)) + return QFileInfo{candidate}.canonicalFilePath(); + } + return std::nullopt; } -static std::optional -resolveFile_in_paths(const QString& name, const QStringList& includes, int iterations) +static std::optional tryResolveBracketed( + const QString& header, const IncludeContext& ctx) { - for(auto& path : includes) + // Bracketed: search paths only (no origin-dir lookup). + for(const auto& path : ctx.searchPaths) { - if(auto res = resolveFile_relative(name, includes, path, iterations)) - return res; + const QString candidate = path + QLatin1Char('/') + header; + if(QFileInfo::exists(candidate)) + return QFileInfo{candidate}.canonicalFilePath(); } return std::nullopt; } -static std::optional resolveFile_quotes( - const QString& name, const QStringList& includes, const QString& rootPath, - int iterations) +// Expand one resolved include file into `ctx`-tracked source, emitting +// `#line` markers so glslang error messages point at the included file. +// On cycle / depth / unreadable-file failure, sets ctx.error and returns +// an empty byte array (caller must abort). +static QByteArray expandFile( + const QString& canonicalPath, IncludeContext& ctx, int parentLine, + const QString& parentPath) { - if(auto res = resolveFile_relative(name, includes, rootPath, iterations)) - return res; - if(auto res = resolveFile_in_paths(name, includes, iterations)) - return res; - return std::nullopt; + if(ctx.depth >= ctx.maxDepth) + { + ctx.error = QStringLiteral("Shader include depth limit (%1) exceeded at '%2'") + .arg(ctx.maxDepth) + .arg(canonicalPath); + return {}; + } + if(ctx.visited.contains(canonicalPath)) + { + ctx.error + = QStringLiteral("Shader include cycle detected: '%1' re-entered") + .arg(canonicalPath); + return {}; + } + + QFile f{canonicalPath}; + if(!f.open(QIODevice::ReadOnly)) + { + ctx.error + = QStringLiteral("Shader include: failed to read '%1'").arg(canonicalPath); + return {}; + } + QByteArray body = f.readAll(); + + // Recurse with a pushed origin dir so relative includes in this file + // resolve against its own sibling dir. Save/restore on return. + const QString savedOriginDir = ctx.originDir; + ctx.originDir = QFileInfo{canonicalPath}.absolutePath(); + ctx.visited.insert(canonicalPath); + ctx.depth++; + + QByteArray expanded = resolveIncludes(std::move(body), ctx); + + ctx.depth--; + ctx.visited.erase(canonicalPath); + ctx.originDir = savedOriginDir; + + if(!ctx.error.isEmpty()) + return {}; + + // Frame with #line markers: enter the included file at line 1, return + // to the parent at the line just after the #include directive. We pass + // filenames through as string tokens — glslang accepts that form. + QByteArray framed; + framed.reserve(expanded.size() + 256); + framed.append("#line 1 \""); + framed.append(canonicalPath.toUtf8()); + framed.append("\"\n"); + framed.append(expanded); + if(!framed.endsWith('\n')) + framed.append('\n'); + framed.append("#line "); + framed.append(QByteArray::number(parentLine + 1)); + framed.append(" \""); + framed.append(parentPath.toUtf8()); + framed.append("\"\n"); + return framed; } -static std::optional resolveFile_brackets( - const QString& name, const QStringList& includes, const QString& rootPath, - int iterations) +// Single-pass textual expansion. Walks from top to bottom, replacing +// each `#include` line with the (already-expanded) body of the target. +// Comments are neutralised before the scan so `#include` inside // or /* +// doesn't trigger. +static QByteArray resolveIncludes(QByteArray data, IncludeContext& ctx) { - if(auto res = resolveFile_in_paths(name, includes, iterations)) - return res; - return std::nullopt; + removeIncludesInComments(data); + + // Anchor to start-of-line (optional leading whitespace only) so an + // `#include "..."` substring inside an #error string or a string- + // literal payload doesn't get misidentified as a directive. The + // openpbr headers exercise this: `#error "... Add #include + // ..."` would otherwise trip a " not found" + // hard error even though no actual GLSL include is needed. + static const QRegularExpression quoted{ + R"_(^\s*#include\s*"([^"]+)")_", + QRegularExpression::MultilineOption}; + static const QRegularExpression bracket{ + R"_(^\s*#include\s*<([^>]+)>)_", + QRegularExpression::MultilineOption}; + + QByteArray out; + out.reserve(data.size()); + + // Lightweight "current file" tag for the parent-line #line marker; + // when the outer buffer came from disk, originDir points to the file's + // dir but we don't have the filename itself — fall back to "" + // for in-memory / unknown roots. + const QString parentPath + = ctx.originDir.isEmpty() ? QStringLiteral("") : ctx.originDir; + + int cursor = 0; + int line = 1; + while(cursor < data.size()) + { + const int eol = data.indexOf('\n', cursor); + const int lineEnd = eol == -1 ? data.size() : eol; + const QByteArray lineBytes = data.mid(cursor, lineEnd - cursor); + + // Only scan lines that look like include directives at all. + const int hashIdx = lineBytes.indexOf('#'); + if(hashIdx != -1 && lineBytes.indexOf("include", hashIdx) != -1) + { + const QString lineStr = QString::fromUtf8(lineBytes); + if(auto m = quoted.match(lineStr); m.hasMatch()) + { + const QString header = m.captured(1); + if(auto resolved = tryResolveQuoted(header, ctx)) + { + QByteArray body = expandFile(*resolved, ctx, line, parentPath); + if(!ctx.error.isEmpty()) + return {}; + out.append(body); + cursor = lineEnd + (eol == -1 ? 0 : 1); + line++; + continue; + } + ctx.missing.push_back(header); + ctx.error = QStringLiteral( + "Shader include not found: \"%1\" (searched: %2)") + .arg(header) + .arg(ctx.originDir.isEmpty() + ? ctx.searchPaths.join(", ") + : (ctx.originDir + QStringLiteral(", ") + + ctx.searchPaths.join(", "))); + return {}; + } + if(auto m = bracket.match(lineStr); m.hasMatch()) + { + const QString header = m.captured(1); + if(auto resolved = tryResolveBracketed(header, ctx)) + { + QByteArray body = expandFile(*resolved, ctx, line, parentPath); + if(!ctx.error.isEmpty()) + return {}; + out.append(body); + cursor = lineEnd + (eol == -1 ? 0 : 1); + line++; + continue; + } + // Bracketed include not found: NON-fatal. Emit the line verbatim + // and let the downstream preprocessor (glslang/QShaderBaker) + // handle gating. This is what makes openpbr work without an + // `#if`-aware resolver: openpbr_interop.h pulls in + // `openpbr_interop_cpp.h` (gated by `#if defined(__cplusplus)`), + // which itself includes `` / ``. We don't + // honour the `#if`, so we textually inline the C++ branch's + // contents — but glslang DOES honour the `#if`, sees that + // `__cplusplus` is undefined for shader compilation, and skips + // the entire C++ branch (including the orphan `` + // line) at preprocess time. Tracking in `missing` keeps the + // diagnostic visible if the user wants to debug. + ctx.missing.push_back(header); + // fall through to the verbatim-line append below + } + } + + out.append(lineBytes); + if(eol != -1) + out.append('\n'); + cursor = lineEnd + (eol == -1 ? 0 : 1); + line++; + } + + return out; } static void removeIncludesInComments(QByteArray& data) @@ -245,59 +447,6 @@ static void removeIncludesInComments(QByteArray& data) } } -static bool resolveGLSLIncludes( - QByteArray& data, const QStringList& includes, QString rootPath, int iterations) -{ - removeIncludesInComments(data); - - iterations++; - if(iterations > 1000) - { - qDebug() << "More than 1000 iterations, shader include loop likely. Stopping."; - return false; - } - int idx = data.indexOf("#include"); - if(idx == -1) - return true; - - int end_line = data.indexOf('\n', idx); - int len = end_line - idx; - static QRegularExpression quoted_include{R"_(#include\s*"(.*)")_"}; - auto cap = quoted_include.match(data.mid(idx, len)).capturedTexts(); - if(cap.size() == 2) - { - if(auto f = resolveFile_quotes(cap[1], includes, rootPath, iterations)) - { - data.replace(idx, len, *f); - } - else - { - qDebug().noquote() << "Could not resolve: " << cap[0] - << " while processing shader"; - return false; - } - } - else - { - static QRegularExpression bracket_include{R"_(#include\s*<(.*)>)_"}; - auto cap = bracket_include.match(data.mid(idx, len)).capturedTexts(); - if(cap.size() == 2) - { - if(auto f = resolveFile_brackets(cap[1], includes, rootPath, iterations)) - { - data.replace(idx, len, *f); - } - else - { - qDebug().noquote() << "Could not resolve: " << cap[0] - << " while processing shader"; - return false; - } - } - } - - return resolveGLSLIncludes(data, includes, rootPath, iterations); -} } ProgramCache& ProgramCache::instance() noexcept @@ -307,19 +456,42 @@ ProgramCache& ProgramCache::instance() noexcept } std::pair, QString> -ProgramCache::get(const ShaderSource& program) noexcept +ProgramCache::get(const ShaderSource& program, const QString& originPath) noexcept { - auto it = programs.find(program); + // Derive the origin dir once — it's both the cache-key disambiguator + // (two shaders with identical text but different origin dirs resolve + // different sibling includes and must not collide) and the first + // search root for quoted #include resolution. + const QString originDir + = originPath.isEmpty() ? QString{} : QFileInfo{originPath}.absolutePath(); + const ProgramCacheKey cacheKey{program, originDir}; + + auto it = programs.find(cacheKey); if(it != programs.end()) return {it->second, QString{}}; try { - // Resolve includes - QByteArray source_frag = program.fragment.toUtf8(); - QByteArray source_vert = program.vertex.toUtf8(); - resolveGLSLIncludes(source_frag, shaderIncludePaths(), {}, 0); - resolveGLSLIncludes(source_vert, shaderIncludePaths(), {}, 0); + // Resolve includes. Empty originDir → in-memory source, falls back + // to the search paths only. + IncludeContext ctx; + ctx.searchPaths = shaderIncludePaths(); + ctx.originDir = originDir; + + QByteArray source_frag = resolveIncludes(program.fragment.toUtf8(), ctx); + if(!ctx.error.isEmpty()) + return {std::nullopt, QStringLiteral("Fragment: ") + ctx.error}; + + // Reset per-file state (visited chain, depth, errors); keep search + // paths and origin dir across the two shader stages. + ctx.visited.clear(); + ctx.depth = 0; + ctx.error.clear(); + ctx.missing.clear(); + + QByteArray source_vert = resolveIncludes(program.vertex.toUtf8(), ctx); + if(!ctx.error.isEmpty()) + return {std::nullopt, QStringLiteral("Vertex: ") + ctx.error}; switch(program.type) { @@ -387,7 +559,7 @@ ProgramCache::get(const ShaderSource& program) noexcept if(vertexS.isValid() && fragmentS.isValid()) { - programs[program] = processed; + programs[cacheKey] = processed; return {processed, {}}; } } @@ -415,18 +587,30 @@ ShaderSource programFromISFFragmentShaderPath(const QString& fsFilename, QByteArray fsData) { // ISF works by storing a vertex shader next to the fragment shader. - QString vertexName = fsFilename; - vertexName.replace(".frag", ".vert"); - vertexName.replace(".fs", ".vs"); + // Score recognises both the long (.frag/.vert) and short (.fs/.vs) + // extension conventions; pairings are tried independently of the FS + // file's own naming so a `foo.frag` next to `foo.vs` (or `foo.fs` next + // to `foo.vert`) also resolves. Without this, the .vs sibling is + // silently ignored and the descriptor falls back to the ISF default + // vertex shader — which doesn't know about user-declared + // VERTEX_INPUTS, so the consumer renders nothing. + const QString candidates[] = { + QString(fsFilename).replace(".frag", ".vert").replace(".fs", ".vs"), + QString(fsFilename).replace(".frag", ".vs"), + QString(fsFilename).replace(".fs", ".vert"), + }; // If empty: will be using the ISF's default QByteArray vertexData; - if(vertexName != fsFilename) + for(const QString& vertexName : candidates) { + if(vertexName == fsFilename) + continue; if(QFile vertexFile{vertexName}; vertexFile.exists() && vertexFile.open(QIODevice::ReadOnly)) { vertexData = vertexFile.readAll(); + break; } } @@ -469,4 +653,18 @@ programFromVSAVertexShaderPath(const QString& vertexFilename, QByteArray vertexD return {ShaderSource::ProgramType::VertexShaderArt, vertexData, ""}; } + +std::pair +preprocessShaderIncludes(QByteArray source, const QString& originPath) noexcept +{ + IncludeContext ctx; + ctx.searchPaths = shaderIncludePaths(); + if(!originPath.isEmpty()) + ctx.originDir = QFileInfo{originPath}.absolutePath(); + + QByteArray expanded = resolveIncludes(std::move(source), ctx); + if(!ctx.error.isEmpty()) + return {{}, ctx.error}; + return {std::move(expanded), {}}; +} } diff --git a/src/plugins/score-plugin-gfx/Gfx/ShaderProgram.hpp b/src/plugins/score-plugin-gfx/Gfx/ShaderProgram.hpp index 2bdcaaf6a6..c9c847ffb3 100644 --- a/src/plugins/score-plugin-gfx/Gfx/ShaderProgram.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/ShaderProgram.hpp @@ -1,8 +1,10 @@ #pragma once #include +#include #include +#include #include #include @@ -93,7 +95,12 @@ struct SCORE_PLUGIN_GFX_EXPORT ShaderSource } friend bool operator==(const ShaderSource& lhs, const ShaderSource& rhs) noexcept { - return lhs.vertex == rhs.vertex && lhs.fragment == rhs.fragment; + // `type` MUST be part of equality: std::hash seeds with + // `type`, so two sources differing only by type hash differently. If == + // ignored type they'd be "equal but unequal-hash", breaking the + // unordered-container invariant for ProgramCache / ProgramCacheKey. + return lhs.type == rhs.type && lhs.vertex == rhs.vertex + && lhs.fragment == rhs.fragment; } friend bool operator!=(const ShaderSource& lhs, const ShaderSource& rhs) noexcept { @@ -117,6 +124,16 @@ struct SCORE_PLUGIN_GFX_EXPORT ShaderSource ShaderSource programFromISFFragmentShaderPath(const QString& fsFilename, QByteArray fsData); ShaderSource programFromVSAVertexShaderPath(const QString& vertexFilename, QByteArray vertexData); + +// Textual `#include` resolution for a single GLSL buffer. Used by +// callers that want include support without going through the full +// ProgramCache ISF pipeline — compute shaders are the current use case. +// Returns the expanded source and a non-empty error string on failure +// (missing header, include cycle, depth limit, …). The returned +// QByteArray is empty iff the error is non-empty. +SCORE_PLUGIN_GFX_EXPORT +std::pair +preprocessShaderIncludes(QByteArray source, const QString& originPath = {}) noexcept; } namespace std @@ -126,14 +143,11 @@ struct hash { std::size_t operator()(const Gfx::ShaderSource& program) const noexcept { - constexpr const QtPrivate::QHashCombine combine{ -#if QT_VERSION >= QT_VERSION_CHECK(6, 10, 0) - 0 -#endif - }; - std::size_t seed{}; - seed = combine(seed, program.vertex); - seed = combine(seed, program.fragment); + // rapidhash via the gfx Qt-aware adapters; same primitive that + // produces content_hash values throughout the gfx pipeline. + std::size_t seed{(std::size_t)program.type}; + ossia::hash_combine(seed, score::gfx::hash_qstring(program.vertex)); + ossia::hash_combine(seed, score::gfx::hash_qstring(program.fragment)); return seed; } }; @@ -146,13 +160,52 @@ struct ProcessedProgram : ShaderSource isf::descriptor descriptor; }; +// Cache key. `originDir` is the *canonical directory* the shader was +// loaded from (derived by the cache from the caller-supplied origin +// path). Keying on both means two models loading the same source text +// from different directories don't collide — include resolution against +// each shader's own sibling dir stays correct. +struct ProgramCacheKey +{ + ShaderSource source; + QString originDir; + + friend bool + operator==(const ProgramCacheKey& a, const ProgramCacheKey& b) noexcept + { + return a.source == b.source && a.originDir == b.originDir; + } +}; +} + +namespace std +{ +template <> +struct hash +{ + std::size_t operator()(const Gfx::ProgramCacheKey& k) const noexcept + { + std::size_t seed = std::hash{}(k.source); + ossia::hash_combine(seed, score::gfx::hash_qstring(k.originDir)); + return seed; + } +}; +} + +namespace Gfx +{ struct SCORE_PLUGIN_GFX_EXPORT ProgramCache { static ProgramCache& instance() noexcept; + + // `originPath` is the absolute path of the shader file the source was + // loaded from, used as the base for quoted `#include "..."` resolution + // and as part of the cache key. Empty when the source is in-memory + // with no associated file. std::pair, QString> - get(const ShaderSource& program) noexcept; + get(const ShaderSource& program, const QString& originPath = {}) noexcept; - ossia::hash_map programs; + ossia::hash_map programs; }; } diff --git a/src/plugins/score-plugin-gfx/Gfx/Shmdata/ShmdataOutputDevice.cpp b/src/plugins/score-plugin-gfx/Gfx/Shmdata/ShmdataOutputDevice.cpp index f57712fc35..26f4574a93 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Shmdata/ShmdataOutputDevice.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Shmdata/ShmdataOutputDevice.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -180,18 +181,15 @@ void ShmdataOutputNode::createOutput(score::gfx::OutputConfiguration conf) m_settings.height, int(m_settings.rate)), &m_logger); // clang-format on - m_renderState = std::make_shared(); - - m_renderState->surface = QRhiGles2InitParams::newFallbackSurface(); - QRhiGles2InitParams params; - params.fallbackSurface = m_renderState->surface; - score::GLCapabilities caps; - caps.setupFormat(params.format); - m_renderState->rhi = QRhi::create(QRhi::OpenGLES2, ¶ms, {}); - m_renderState->renderSize = QSize(m_settings.width, m_settings.height); + m_renderState = score::gfx::createRenderState( + conf.graphicsApi, QSize(m_settings.width, m_settings.height), nullptr); + if(!m_renderState || !m_renderState->rhi) + { + qWarning() << "ShmdataOutputNode: failed to create QRhi"; + m_renderState.reset(); + return; + } m_renderState->outputSize = m_renderState->renderSize; - m_renderState->api = score::gfx::GraphicsApi::OpenGL; - m_renderState->version = caps.qShaderVersion; auto rhi = m_renderState->rhi; m_texture = rhi->newTexture( @@ -210,6 +208,26 @@ void ShmdataOutputNode::createOutput(score::gfx::OutputConfiguration conf) void ShmdataOutputNode::destroyOutput() { m_writer.reset(); + + if(!m_renderState) + return; + + // Persist-across-rebuild contract: registry survives RL teardown, + // so we tear down its QRhi resources here BEFORE + // RenderState::destroy() (called below) frees the device. + releaseRegistry(); + + delete m_renderTarget; + m_renderTarget = nullptr; + + delete m_renderState->renderPassDescriptor; + m_renderState->renderPassDescriptor = nullptr; + + delete m_texture; + m_texture = nullptr; + + m_renderState->destroy(); + m_renderState.reset(); } std::shared_ptr ShmdataOutputNode::renderState() const diff --git a/src/plugins/score-plugin-gfx/Gfx/Spout/SpoutInput.cpp b/src/plugins/score-plugin-gfx/Gfx/Spout/SpoutInput.cpp index e9593bb417..824c06bac3 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Spout/SpoutInput.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Spout/SpoutInput.cpp @@ -4,9 +4,12 @@ #include #include #include +#include #include #include +#include + #include #include @@ -23,8 +26,11 @@ #include #include +// clang-format off // D3D11On12 for D3D12 interop +#include #include +// clang-format on // Vulkan interop #if __has_include() && defined(QT_FEATURE_vulkan) && __has_include() @@ -43,6 +49,68 @@ namespace Gfx::Spout { +namespace +{ +// Cached snapshot of what we last observed from the Spout sender. +// Allows detecting size/format/handle changes between frames. +struct SpoutSenderInfo +{ + unsigned int width{}; + unsigned int height{}; + DWORD dxgiFormat{}; + HANDLE handle{}; + + friend bool operator==(const SpoutSenderInfo&, const SpoutSenderInfo&) noexcept + = default; +}; + +bool querySpoutSender(const char* name, SpoutSenderInfo& out) noexcept +{ + spoutSenderNames senders; + return senders.GetSenderInfo(name, out.width, out.height, out.handle, out.dxgiFormat); +} + +QRhiTexture::Format +dxgiToQRhiFormat(DWORD dxgi, QRhi::Implementation backend) noexcept +{ + // For OpenGL we keep RGBA channel order regardless of sender layout: + // Spout's GL-DX interop handles the BGRA<->RGBA conversion on its side. + const bool wantNativeBGRA = (backend == QRhi::D3D11 || backend == QRhi::D3D12 + || backend == QRhi::Vulkan); + + switch(static_cast(dxgi)) + { + case DXGI_FORMAT_R8G8B8A8_UNORM: + case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: + case DXGI_FORMAT_R8G8B8A8_TYPELESS: + return QRhiTexture::RGBA8; + case DXGI_FORMAT_B8G8R8A8_UNORM: + case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: + case DXGI_FORMAT_B8G8R8A8_TYPELESS: + return wantNativeBGRA ? QRhiTexture::BGRA8 : QRhiTexture::RGBA8; + case DXGI_FORMAT_R10G10B10A2_UNORM: + case DXGI_FORMAT_R10G10B10A2_TYPELESS: + return QRhiTexture::RGB10A2; + case DXGI_FORMAT_R16G16B16A16_UNORM: + case DXGI_FORMAT_R16G16B16A16_FLOAT: + case DXGI_FORMAT_R16G16B16A16_TYPELESS: + // RGBA16F is the only 4x16 format QRhi exposes (no RGBA16-UNORM). For a + // _UNORM sender this samples as half-float (color-inaccurate) but is the + // only available 64-bit/pixel format; dxgiToVulkanFormat() maps the same + // DXGI formats to VK_FORMAT_R16G16B16A16_SFLOAT so the imported VkImage + // and the QRhi-created view agree (no validation violation). On D3D the + // CopyResource between _UNORM and _FLOAT is permitted (shared TYPELESS + // family) and bit-preserving. + return QRhiTexture::RGBA16F; + case DXGI_FORMAT_R32G32B32A32_FLOAT: + case DXGI_FORMAT_R32G32B32A32_TYPELESS: + return QRhiTexture::RGBA32F; + default: + return wantNativeBGRA ? QRhiTexture::BGRA8 : QRhiTexture::RGBA8; + } +} +} + class InputSettingsWidget final : public SharedInputSettingsWidget { public: @@ -91,6 +159,7 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer score::gfx::VideoMaterialUBO material; std::unique_ptr m_gpu{}; + std::pair m_shaders; // Spout receiver (for OpenGL) ::SpoutReceiver m_receiver; @@ -98,14 +167,12 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer // Spout DirectX (for D3D11) spoutDirectX m_spoutDX; ID3D11Texture2D* m_receivedTexture{}; - HANDLE m_sharedHandle{}; // D3D11On12 interop (for D3D12) ID3D11On12Device* m_d3d11On12Device{}; ID3D11Device* m_d3d11Device{}; ID3D11DeviceContext* m_d3d11Context{}; ID3D11Resource* m_wrappedTexture{}; - ID3D11Texture2D* m_spoutSharedTexture{}; // Cached Spout shared texture #if SCORE_SPOUT_VULKAN // Vulkan-D3D11 interop using KMT handles (SpoutVK approach) @@ -113,12 +180,14 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer // using the legacy DXGI shared handle (KMT type) VkImage m_vkLinkedImage{}; // VkImage linked to Spout's shared D3D11 texture VkDeviceMemory m_vkLinkedMemory{}; // Device memory imported from D3D11 texture - unsigned int m_vkSenderWidth{}; - unsigned int m_vkSenderHeight{}; - DWORD m_vkSenderFormat{}; bool m_vkInitialized{}; #endif + // Last-known sender info — used to detect size/format/handle changes. + SpoutSenderInfo m_lastSender{}; + // Current destination texture format (may differ from sender DXGI byte-order on OpenGL). + QRhiTexture::Format m_textureFormat{QRhiTexture::RGBA8}; + bool enabled{}; QRhi::Implementation m_backend{QRhi::Null}; @@ -130,7 +199,7 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer return {}; } - void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override + void initState(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override { auto& rhi = *renderer.state.rhi; m_backend = rhi.backend(); @@ -151,102 +220,133 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer sizeof(score::gfx::VideoMaterialUBO)); m_materialUBO->create(); - // Initialize based on backend - unsigned int w = 0, h = 0; - + // Backend-specific bring-up (creates D3D11On12 device, OpenGL receiver context, etc.) + // Does NOT allocate the destination texture — that happens once we know the format. switch(m_backend) { case QRhi::OpenGLES2: - initOpenGL(rhi, w, h); + initOpenGL(rhi); break; case QRhi::D3D11: - initD3D11(rhi, w, h); + initD3D11(rhi); break; case QRhi::D3D12: - initD3D12(rhi, w, h); + initD3D12(rhi); break; #if SCORE_SPOUT_VULKAN case QRhi::Vulkan: - initVulkan(rhi, w, h); + initVulkan(rhi); break; #endif default: break; } - // Use reasonable defaults if no sender found yet - if(w == 0 || h == 0) + // Probe sender once up-front so we can pick a matching texture format. + // If no sender is present yet, fall through to safe defaults and let the + // first successful update() reconfigure to the real format. + SpoutSenderInfo si; + if(querySpoutSender(node.settings.path.toStdString().c_str(), si) + && si.width > 0 && si.height > 0) + { + enabled = true; + } + else { - w = 1280; - h = 720; + si = {}; + si.width = 1280; + si.height = 720; + // Default DXGI format mirrors the previous fallback (BGRA on D3D/Vulkan, RGBA on GL) + si.dxgiFormat = (m_backend == QRhi::D3D11 || m_backend == QRhi::D3D12 + || m_backend == QRhi::Vulkan) + ? DXGI_FORMAT_B8G8R8A8_UNORM + : DXGI_FORMAT_R8G8B8A8_UNORM; enabled = false; } - metadata.width = w; - metadata.height = h; + m_lastSender = si; + m_textureFormat = dxgiToQRhiFormat(si.dxgiFormat, m_backend); + metadata.width = si.width; + metadata.height = si.height; + + m_gpu = std::make_unique( + m_textureFormat, 4, metadata, QString{}, true); - // Use BGRA for D3D/Vulkan backends (native DXGI format), RGBA for OpenGL - auto format = (m_backend == QRhi::D3D11 || m_backend == QRhi::D3D12 - || m_backend == QRhi::Vulkan) - ? QRhiTexture::BGRA8 - : QRhiTexture::RGBA8; - m_gpu = std::make_unique(format, 4, metadata, QString{}, true); - createPipelines(renderer); + // Cache shaders from GPU decoder init + if(m_gpu) + m_shaders = m_gpu->init(renderer); material.textureSize[0] = metadata.width; material.textureSize[1] = metadata.height; res.updateDynamicBuffer( m_materialUBO, 0, sizeof(score::gfx::VideoMaterialUBO), &material); + + m_initialized = true; } - void initOpenGL(QRhi& rhi, unsigned int& w, unsigned int& h) + void addOutputPass( + score::gfx::RenderList& renderer, score::gfx::Edge& edge, + QRhiResourceUpdateBatch& res) override { - m_receiver.SetReceiverName(node.settings.path.toStdString().c_str()); - rhi.makeThreadLocalNativeContextCurrent(); + if(!m_gpu) + return; + if(!m_shaders.first.isValid() || !m_shaders.second.isValid()) + return; - if(m_receiver.ReceiveTexture()) + auto rt = renderer.renderTargetForOutput(edge); + if(rt.renderTarget) { - w = m_receiver.GetSenderWidth(); - h = m_receiver.GetSenderHeight(); - enabled = true; + auto pip = score::gfx::buildPipeline( + renderer, renderer.defaultTriangle(), m_shaders.first, m_shaders.second, rt, + m_processUBO, m_materialUBO, m_gpu->samplers); + if(pip.pipeline) + m_p.emplace_back(&edge, score::gfx::Pass{rt, pip, nullptr}); } } - void initD3D11(QRhi& rhi, unsigned int& w, unsigned int& h) + void removeOutputPass(score::gfx::RenderList& renderer, score::gfx::Edge& edge) override + { + auto it = ossia::find_if(m_p, [&](const auto& p) { return p.first == &edge; }); + if(it != m_p.end()) + { + it->second.release(); + m_p.erase(it); + } + } + + bool hasOutputPassForEdge(score::gfx::Edge& edge) const override + { + return ossia::find_if(m_p, [&](const auto& p) { return p.first == &edge; }) + != m_p.end(); + } + + void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override + { + initState(renderer, res); + + for(auto* edge : this->node.output[0]->edges) + addOutputPass(renderer, *edge, res); + } + + void initOpenGL(QRhi& rhi) + { + m_receiver.SetReceiverName(node.settings.path.toStdString().c_str()); + rhi.makeThreadLocalNativeContextCurrent(); + } + + void initD3D11(QRhi& rhi) { - // Get the D3D11 device from QRhi auto nativeHandles = static_cast(rhi.nativeHandles()); if(!nativeHandles || !nativeHandles->dev) return; auto device = static_cast(nativeHandles->dev); - - // Initialize Spout DirectX with the QRhi device - if(!m_spoutDX.OpenDirectX11(device)) - return; - - // Try to find and connect to the sender - spoutSenderNames senderNames; - char senderName[256]{0}; - strncpy_s(senderName, node.settings.path.toStdString().c_str(), 255); - - unsigned int senderWidth = 0, senderHeight = 0; - DWORD dwFormat = 0; - HANDLE shareHandle = nullptr; - - if(senderNames.GetSenderInfo(senderName, senderWidth, senderHeight, shareHandle, dwFormat)) - { - w = senderWidth; - h = senderHeight; - m_sharedHandle = shareHandle; - enabled = true; - } + m_spoutDX.OpenDirectX11(device); } - void initD3D12(QRhi& rhi, unsigned int& w, unsigned int& h) + void initD3D12(QRhi& rhi) { - // Get D3D12 device and command queue from QRhi auto nativeHandles = static_cast(rhi.nativeHandles()); if(!nativeHandles || !nativeHandles->dev || !nativeHandles->commandQueue) @@ -264,7 +364,6 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer if(FAILED(hr) || !m_d3d11Device) return; - // Get the D3D11On12Device interface hr = m_d3d11Device->QueryInterface( __uuidof(ID3D11On12Device), reinterpret_cast(&m_d3d11On12Device)); if(FAILED(hr) || !m_d3d11On12Device) @@ -273,63 +372,13 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer m_d3d11Device = nullptr; m_d3d11Context->Release(); m_d3d11Context = nullptr; - return; - } - - // Try to find and connect to the sender - spoutSenderNames senderNames; - char senderName[256]{0}; - strncpy_s(senderName, node.settings.path.toStdString().c_str(), 255); - - unsigned int senderWidth = 0, senderHeight = 0; - DWORD dwFormat = 0; - HANDLE shareHandle = nullptr; - - if(senderNames.GetSenderInfo(senderName, senderWidth, senderHeight, shareHandle, dwFormat)) - { - w = senderWidth; - h = senderHeight; - m_sharedHandle = shareHandle; - enabled = true; } } #if SCORE_SPOUT_VULKAN - void initVulkan(QRhi& rhi, unsigned int& w, unsigned int& h) - { - // Try to find and connect to the sender - spoutSenderNames senderNames; - char senderName[256]{0}; - strncpy_s(senderName, node.settings.path.toStdString().c_str(), 255); - - unsigned int senderWidth = 0, senderHeight = 0; - DWORD dwFormat = 0; - HANDLE shareHandle = nullptr; - - if(senderNames.GetSenderInfo(senderName, senderWidth, senderHeight, shareHandle, dwFormat)) - { - w = senderWidth; - h = senderHeight; - m_sharedHandle = shareHandle; - m_vkSenderWidth = senderWidth; - m_vkSenderHeight = senderHeight; - m_vkSenderFormat = dwFormat; - enabled = true; - } - } + void initVulkan(QRhi& /*rhi*/) { } #endif - void createPipelines(score::gfx::RenderList& r) - { - if(m_gpu) - { - auto shaders = m_gpu->init(r); - SCORE_ASSERT(m_p.empty()); - score::gfx::defaultPassesInit( - m_p, this->node.output[0]->edges, r, r.defaultTriangle(), shaders.first, - shaders.second, m_processUBO, m_materialUBO, m_gpu->samplers); - } - } void update( score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res, @@ -371,6 +420,8 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer auto tex = m_gpu->samplers[0].texture; auto gltex = static_cast(tex); + // Probe sender presence — this also lets Spout update its internal + // m_bUpdated flag, which IsUpdated() then reports/clears. if(!m_receiver.ReceiveTexture()) { enabled = false; @@ -379,16 +430,14 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer enabled = true; - if(m_receiver.IsUpdated()) + // Pull the full sender state (size + DXGI format + handle) for change detection. + // GetSenderInfo reads from the Spout sender-names shared memory and is cheap. + SpoutSenderInfo si; + if(querySpoutSender(node.settings.path.toStdString().c_str(), si) + && si.width > 0 && si.height > 0) { - unsigned int w = m_receiver.GetSenderWidth(); - unsigned int h = m_receiver.GetSenderHeight(); - - if(w > 0 && h > 0 && (w != metadata.width || h != metadata.height)) - { - resizeTexture(tex, w, h); + if(reconfigureIfNeeded(rhi, si)) gltex->specified = true; - } } GLuint texId = gltex->texture; @@ -410,16 +459,8 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer auto device = static_cast(nativeHandles->dev); auto context = static_cast(nativeHandles->context); - // Check for sender updates - spoutSenderNames senderNames; - char senderName[256]{0}; - strncpy_s(senderName, node.settings.path.toStdString().c_str(), 255); - - unsigned int senderWidth = 0, senderHeight = 0; - DWORD dwFormat = 0; - HANDLE shareHandle = nullptr; - - if(!senderNames.GetSenderInfo(senderName, senderWidth, senderHeight, shareHandle, dwFormat)) + SpoutSenderInfo si; + if(!querySpoutSender(node.settings.path.toStdString().c_str(), si)) { enabled = false; return; @@ -427,25 +468,16 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer enabled = true; - // Check if size or handle changed - if(senderWidth != metadata.width || senderHeight != metadata.height - || shareHandle != m_sharedHandle) - { - // Release cached shared texture if handle changed - if(m_receivedTexture && shareHandle != m_sharedHandle) - { - m_receivedTexture->Release(); - m_receivedTexture = nullptr; - } - m_sharedHandle = shareHandle; - resizeTexture(tex, senderWidth, senderHeight); - } + // Recreate the destination texture if anything changed. + // Important: D3D11 CopyResource requires source & destination formats to match, + // so we have to honor the sender's DXGI format here. + reconfigureIfNeeded(rhi, si); // Open the shared texture (cache it to avoid reopening every frame) - if(!m_receivedTexture && m_sharedHandle) + if(!m_receivedTexture && m_lastSender.handle) { - HRESULT hr - = device->OpenSharedResource(m_sharedHandle, IID_PPV_ARGS(&m_receivedTexture)); + HRESULT hr = device->OpenSharedResource( + m_lastSender.handle, IID_PPV_ARGS(&m_receivedTexture)); if(FAILED(hr)) m_receivedTexture = nullptr; } @@ -465,16 +497,8 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer SCORE_ASSERT(!m_gpu->samplers.empty()); auto tex = m_gpu->samplers[0].texture; - // Check for sender updates - spoutSenderNames senderNames; - char senderName[256]{0}; - strncpy_s(senderName, node.settings.path.toStdString().c_str(), 255); - - unsigned int senderWidth = 0, senderHeight = 0; - DWORD dwFormat = 0; - HANDLE shareHandle = nullptr; - - if(!senderNames.GetSenderInfo(senderName, senderWidth, senderHeight, shareHandle, dwFormat)) + SpoutSenderInfo si; + if(!querySpoutSender(node.settings.path.toStdString().c_str(), si)) { enabled = false; return; @@ -482,24 +506,9 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer enabled = true; - // Check if size changed - need to re-wrap the texture - bool sizeChanged = (senderWidth != metadata.width || senderHeight != metadata.height); - bool handleChanged = (shareHandle != m_sharedHandle); - - if(sizeChanged || handleChanged) - { - // Release old wrapped resource - if(m_wrappedTexture) - { - m_wrappedTexture->Release(); - m_wrappedTexture = nullptr; - } - - m_sharedHandle = shareHandle; - - if(sizeChanged) - resizeTexture(tex, senderWidth, senderHeight); - } + // Recreate destination texture (and drop the cached D3D11 wrapped resource) + // when the sender's size, format or share handle changes. + reconfigureIfNeeded(rhi, si); // Get the native D3D12 resource from QRhiTexture auto nativeTex = tex->nativeTexture(); @@ -529,8 +538,8 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer // Open the Spout shared texture via D3D11 ID3D11Texture2D* sharedTex = nullptr; - HRESULT hr - = m_d3d11Device->OpenSharedResource(m_sharedHandle, IID_PPV_ARGS(&sharedTex)); + HRESULT hr = m_d3d11Device->OpenSharedResource( + m_lastSender.handle, IID_PPV_ARGS(&sharedTex)); if(FAILED(hr) || !sharedTex) return; @@ -561,8 +570,17 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer case DXGI_FORMAT_R10G10B10A2_UNORM: return VK_FORMAT_A2B10G10R10_UNORM_PACK32; case DXGI_FORMAT_R16G16B16A16_UNORM: - return VK_FORMAT_R16G16B16A16_UNORM; case DXGI_FORMAT_R16G16B16A16_FLOAT: + // The QRhi destination texture for both of these is RGBA16F (the only + // 4x16 format QRhi exposes — there is no RGBA16-UNORM). The imported + // VkImage MUST use the same format as the QRhi-created image view, + // otherwise QVkTexture::createFrom() builds an SFLOAT view over a + // non-MUTABLE_FORMAT UNORM image, which is a Vulkan validation + // violation (VUID-VkImageViewCreateInfo-image-01762) and samples + // garbage. Both _UNORM and _FLOAT are 64-bit/pixel, so the KMT import + // succeeds; we therefore map both to SFLOAT to stay consistent with + // dxgiToQRhiFormat(). (UNORM data read as half-float is still color- + // inaccurate, but that is an inherent QRhi limitation, not a crash.) return VK_FORMAT_R16G16B16A16_SFLOAT; case DXGI_FORMAT_R32G32B32A32_FLOAT: return VK_FORMAT_R32G32B32A32_SFLOAT; @@ -572,13 +590,11 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer } } - // Link a Vulkan image to D3D11 shared texture memory using KMT handle - // Based on SpoutVK::LinkVulkanImage from the official SpoutVulkan examples + // Link a Vulkan image to D3D11 shared texture memory using KMT handle. + // Caller is expected to have torn down any prior linked resources via + // releaseVulkanResources() and the QRhiTexture's destroy() before calling. bool linkVulkanImage(QRhi& rhi, HANDLE dxShareHandle, unsigned int w, unsigned int h, DWORD dwFormat) { - if(m_vkInitialized) - return false; - auto nativeHandles = static_cast(rhi.nativeHandles()); if(!nativeHandles || !nativeHandles->dev || !nativeHandles->physDev) return false; @@ -588,33 +604,12 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer VkFormat vulkanFormat = dxgiToVulkanFormat(dwFormat); - // Release any previous resources + // Defensive: ensure nothing leaks if caller did not release first. releaseVulkanResources(rhi); - // The handle type for Spout sender is KMT (legacy shared handle) - // NOT NT handle - this is critical for Spout compatibility - VkExternalMemoryHandleTypeFlags handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_KMT_BIT; - - // Query support for external image format using KMT handles - VkPhysicalDeviceImageFormatInfo2 formatInfo = {}; - formatInfo.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2; - formatInfo.format = vulkanFormat; - formatInfo.type = VK_IMAGE_TYPE_2D; - formatInfo.tiling = VK_IMAGE_TILING_OPTIMAL; - formatInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT; - - VkPhysicalDeviceExternalImageFormatInfo externalFormatInfo = {}; - externalFormatInfo.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO; - externalFormatInfo.handleType = (VkExternalMemoryHandleTypeFlagBits)handleType; - formatInfo.pNext = &externalFormatInfo; - - VkExternalImageFormatProperties externalImageFormatProps = {}; - externalImageFormatProps.sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES; - VkImageFormatProperties2 imageFormatProps2 = {}; - imageFormatProps2.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2; - imageFormatProps2.pNext = &externalImageFormatProps; - - // Use vkGetPhysicalDeviceImageFormatProperties2 to check support + // Spout shares D3D11 textures via legacy KMT handles (NOT NT handles). + constexpr auto handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_KMT_BIT; + auto* inst = score::gfx::staticVulkanInstance(); if(!inst) return false; @@ -625,33 +620,69 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer if(!dfuncs) return false; - // We need to use the device-level function for this - auto vkGetPhysicalDeviceImageFormatProperties2Func - = reinterpret_cast( - inst->getInstanceProcAddr("vkGetPhysicalDeviceImageFormatProperties2")); - if(!vkGetPhysicalDeviceImageFormatProperties2Func) - return false; - - VkResult result = vkGetPhysicalDeviceImageFormatProperties2Func(vkPhysDev, &formatInfo, &imageFormatProps2); - if(result != VK_SUCCESS) + // Resolve vkGetMemoryWin32HandlePropertiesKHR via vkGetDeviceProcAddr. + // + // Why not inst->getInstanceProcAddr("vkGetMemoryWin32HandlePropertiesKHR")? + // Qt forwards that to vkGetInstanceProcAddr, which for device-level + // extension functions can return a non-null trampoline that CRASHES + // when called: the instance loader has no per-device dispatch for + // device extensions, so calling that pointer dereferences garbage. + // + // vkGetDeviceProcAddr is itself a core 1.0 function, so resolving IT + // through inst->getInstanceProcAddr is safe — that part of the loader + // has proper dispatch. We then call the device-level resolver to get + // a pointer that's valid for THIS device's enabled extensions. + PFN_vkGetMemoryWin32HandlePropertiesKHR pfnGetMemWin32Props = nullptr; { - qWarning() << "SpoutInput: KMT handle type not supported for Vulkan external memory"; - return false; + auto pfnGetDeviceProcAddr = reinterpret_cast( + inst->getInstanceProcAddr("vkGetDeviceProcAddr")); + if(pfnGetDeviceProcAddr) + { + pfnGetMemWin32Props + = reinterpret_cast( + pfnGetDeviceProcAddr( + vkDevice, "vkGetMemoryWin32HandlePropertiesKHR")); + } } - // Check if import is supported - VkExternalMemoryFeatureFlags externalMemoryFeatures - = externalImageFormatProps.externalMemoryProperties.externalMemoryFeatures; - if(!(externalMemoryFeatures & VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT)) + // Probe whether import for this format/handle type is supported. + // Note: this is informational; the real test is the memory-type + // intersection below. + VkExternalMemoryFeatureFlags externalMemoryFeatures = 0; { - qWarning() << "SpoutInput: Cannot import memory with KMT handle type"; - return false; + VkPhysicalDeviceExternalImageFormatInfo externalFormatInfo = {}; + externalFormatInfo.sType + = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO; + externalFormatInfo.handleType = handleType; + + VkPhysicalDeviceImageFormatInfo2 formatInfo = {}; + formatInfo.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2; + formatInfo.pNext = &externalFormatInfo; + formatInfo.format = vulkanFormat; + formatInfo.type = VK_IMAGE_TYPE_2D; + formatInfo.tiling = VK_IMAGE_TILING_OPTIMAL; + formatInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + + VkExternalImageFormatProperties extProps = {}; + extProps.sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES; + + VkImageFormatProperties2 props2 = {}; + props2.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2; + props2.pNext = &extProps; + + auto pfnGetPhysFmt2 = reinterpret_cast( + inst->getInstanceProcAddr("vkGetPhysicalDeviceImageFormatProperties2")); + if(pfnGetPhysFmt2) + { + VkResult r = pfnGetPhysFmt2(vkPhysDev, &formatInfo, &props2); + if(r == VK_SUCCESS) + externalMemoryFeatures = extProps.externalMemoryProperties.externalMemoryFeatures; + } } - // Create the Vulkan import image with external memory info + // Create the VkImage with external memory info. VkExternalMemoryImageCreateInfo extMemoryImageInfo = {}; extMemoryImageInfo.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO; - extMemoryImageInfo.pNext = nullptr; extMemoryImageInfo.handleTypes = handleType; VkImageCreateInfo imageCreateInfo = {}; @@ -664,81 +695,122 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer imageCreateInfo.arrayLayers = 1; imageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; imageCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; - imageCreateInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + imageCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT; imageCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; imageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - result = dfuncs->vkCreateImage(vkDevice, &imageCreateInfo, nullptr, &m_vkLinkedImage); + VkResult result + = dfuncs->vkCreateImage(vkDevice, &imageCreateInfo, nullptr, &m_vkLinkedImage); if(result != VK_SUCCESS) { - qWarning() << "SpoutInput: Could not create Vulkan image for external memory"; + qWarning() << "SpoutInput: vkCreateImage failed for external memory:" << result; + m_vkLinkedImage = VK_NULL_HANDLE; return false; } - // Get memory requirements + // Memory requirements as dictated by the image we just created. VkMemoryRequirements memRequirements; dfuncs->vkGetImageMemoryRequirements(vkDevice, m_vkLinkedImage, &memRequirements); - // Find suitable memory type + // For an imported KMT handle, the spec requires picking a memoryTypeIndex + // from the intersection of memRequirements.memoryTypeBits and the bits + // returned by vkGetMemoryWin32HandlePropertiesKHR for that handle. + uint32_t handleMemoryTypeBits = 0; + if(pfnGetMemWin32Props) + { + VkMemoryWin32HandlePropertiesKHR handleProps = {}; + handleProps.sType = VK_STRUCTURE_TYPE_MEMORY_WIN32_HANDLE_PROPERTIES_KHR; + VkResult hr + = pfnGetMemWin32Props(vkDevice, handleType, dxShareHandle, &handleProps); + if(hr == VK_SUCCESS) + handleMemoryTypeBits = handleProps.memoryTypeBits; + else + qWarning() << "SpoutInput: vkGetMemoryWin32HandlePropertiesKHR failed:" << hr; + } + else + { + qWarning() << "SpoutInput: vkGetMemoryWin32HandlePropertiesKHR not available"; + } + + const uint32_t supportedBits + = memRequirements.memoryTypeBits & handleMemoryTypeBits; + if(supportedBits == 0) + { + qWarning() << "SpoutInput: No memory type supports the shared KMT handle" + << "(memReqBits=" << Qt::hex << memRequirements.memoryTypeBits + << "handleBits=" << handleMemoryTypeBits << ")"; + dfuncs->vkDestroyImage(vkDevice, m_vkLinkedImage, nullptr); + m_vkLinkedImage = VK_NULL_HANDLE; + return false; + } + VkPhysicalDeviceMemoryProperties memProperties; funcs->vkGetPhysicalDeviceMemoryProperties(vkPhysDev, &memProperties); + // Prefer DEVICE_LOCAL among compatible types; fall back to any compatible. uint32_t memoryTypeIndex = UINT32_MAX; for(uint32_t i = 0; i < memProperties.memoryTypeCount; i++) { - if((memRequirements.memoryTypeBits & (1 << i)) - && (memProperties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)) + if((supportedBits & (1u << i)) + && (memProperties.memoryTypes[i].propertyFlags + & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)) { memoryTypeIndex = i; break; } } - if(memoryTypeIndex == UINT32_MAX) { - qWarning() << "SpoutInput: No suitable memory type for external import"; + for(uint32_t i = 0; i < memProperties.memoryTypeCount; i++) + { + if(supportedBits & (1u << i)) + { + memoryTypeIndex = i; + break; + } + } + } + if(memoryTypeIndex == UINT32_MAX) + { dfuncs->vkDestroyImage(vkDevice, m_vkLinkedImage, nullptr); m_vkLinkedImage = VK_NULL_HANDLE; return false; } - // Set up import memory info with KMT handle + // Import the KMT handle. VkImportMemoryWin32HandleInfoKHR importMemoryInfo = {}; importMemoryInfo.sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_WIN32_HANDLE_INFO_KHR; - importMemoryInfo.pNext = nullptr; - importMemoryInfo.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_KMT_BIT; + importMemoryInfo.handleType = handleType; importMemoryInfo.handle = dxShareHandle; - importMemoryInfo.name = nullptr; - - // Check if dedicated allocation is required - bool dedicatedRequired = (externalMemoryFeatures & VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT) != 0; + // Dedicated allocation: KMT-imported memory backs exactly one image, + // so we always dedicate. Required by some drivers, harmless on others. + (void)externalMemoryFeatures; VkMemoryDedicatedAllocateInfo dedicatedAllocInfo = {}; dedicatedAllocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO; dedicatedAllocInfo.pNext = &importMemoryInfo; dedicatedAllocInfo.image = m_vkLinkedImage; - dedicatedAllocInfo.buffer = VK_NULL_HANDLE; VkMemoryAllocateInfo allocInfo = {}; allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - allocInfo.pNext = dedicatedRequired ? (void*)&dedicatedAllocInfo : (void*)&importMemoryInfo; + allocInfo.pNext = &dedicatedAllocInfo; allocInfo.allocationSize = memRequirements.size; allocInfo.memoryTypeIndex = memoryTypeIndex; result = dfuncs->vkAllocateMemory(vkDevice, &allocInfo, nullptr, &m_vkLinkedMemory); if(result != VK_SUCCESS) { - qWarning() << "SpoutInput: Could not allocate memory for external import"; + qWarning() << "SpoutInput: vkAllocateMemory for external import failed:" << result; dfuncs->vkDestroyImage(vkDevice, m_vkLinkedImage, nullptr); m_vkLinkedImage = VK_NULL_HANDLE; + m_vkLinkedMemory = VK_NULL_HANDLE; return false; } - // Bind memory to the Vulkan image result = dfuncs->vkBindImageMemory(vkDevice, m_vkLinkedImage, m_vkLinkedMemory, 0); if(result != VK_SUCCESS) { - qWarning() << "SpoutInput: Could not bind memory to image"; + qWarning() << "SpoutInput: vkBindImageMemory failed:" << result; dfuncs->vkFreeMemory(vkDevice, m_vkLinkedMemory, nullptr); m_vkLinkedMemory = VK_NULL_HANDLE; dfuncs->vkDestroyImage(vkDevice, m_vkLinkedImage, nullptr); @@ -752,16 +824,8 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer void updateVulkan(QRhi& rhi, QRhiResourceUpdateBatch& res) { - // Check for sender updates - spoutSenderNames senderNames; - char senderName[256]{0}; - strncpy_s(senderName, node.settings.path.toStdString().c_str(), 255); - - unsigned int senderWidth = 0, senderHeight = 0; - DWORD dwFormat = 0; - HANDLE shareHandle = nullptr; - - if(!senderNames.GetSenderInfo(senderName, senderWidth, senderHeight, shareHandle, dwFormat)) + SpoutSenderInfo si; + if(!querySpoutSender(node.settings.path.toStdString().c_str(), si)) { enabled = false; return; @@ -769,63 +833,16 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer enabled = true; - // Check if size, format, or handle changed - bool needsRecreate = !m_vkInitialized - || senderWidth != m_vkSenderWidth - || senderHeight != m_vkSenderHeight - || dwFormat != m_vkSenderFormat - || shareHandle != m_sharedHandle; - - if(needsRecreate) + // On Vulkan the destination QRhiTexture must be (re)linked to the + // sender's shared D3D11 memory whenever size, format or handle changes. + // The first frame after init also flows through here because m_vkInitialized + // is still false (initState only allocates a plain placeholder texture). + if(!m_vkInitialized) { - // Update stored values - m_sharedHandle = shareHandle; - m_vkSenderWidth = senderWidth; - m_vkSenderHeight = senderHeight; - m_vkSenderFormat = dwFormat; - - // Create linked Vulkan image from Spout's shared handle - if(!linkVulkanImage(rhi, shareHandle, senderWidth, senderHeight, dwFormat)) - { - enabled = false; - return; - } - - // Update metadata and texture size - if(senderWidth != metadata.width || senderHeight != metadata.height) - { - metadata.width = senderWidth; - metadata.height = senderHeight; - material.scale[0] = 1.f; - material.scale[1] = 1.f; - material.textureSize[0] = metadata.width; - material.textureSize[1] = metadata.height; - } - - // Update QRhiTexture to use the linked VkImage - SCORE_ASSERT(!m_gpu->samplers.empty()); - auto tex = m_gpu->samplers[0].texture; - - tex->destroy(); - tex->setPixelSize(QSize(senderWidth, senderHeight)); - - QRhiTexture::NativeTexture nativeTex; - nativeTex.object = (quint64)m_vkLinkedImage; - // The linked image is in GENERAL layout for shared memory compatibility - nativeTex.layout = VK_IMAGE_LAYOUT_GENERAL; - - if(!tex->createFrom(nativeTex)) - { - qWarning() << "SpoutInput: Failed to create QRhiTexture from linked VkImage"; - releaseVulkanResources(rhi); - enabled = false; - return; - } - - // Recreate shader resource bindings - for(auto& pass : m_p) - pass.second.srb->create(); + // Force reconfiguration even if state happens to match the placeholder. + m_lastSender = {}; } + reconfigureIfNeeded(rhi, si); // The texture content is automatically synchronized because // the VkImage memory is linked to the D3D11 shared texture. @@ -852,35 +869,156 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer if(!dfuncs) return; - if(m_vkLinkedMemory) - { - dfuncs->vkFreeMemory(vkDevice, m_vkLinkedMemory, nullptr); - m_vkLinkedMemory = VK_NULL_HANDLE; - } + // Destroy the image (and any binding to memory) before freeing the memory. if(m_vkLinkedImage) { dfuncs->vkDestroyImage(vkDevice, m_vkLinkedImage, nullptr); m_vkLinkedImage = VK_NULL_HANDLE; } + if(m_vkLinkedMemory) + { + dfuncs->vkFreeMemory(vkDevice, m_vkLinkedMemory, nullptr); + m_vkLinkedMemory = VK_NULL_HANDLE; + } m_vkInitialized = false; } #endif - void resizeTexture(QRhiTexture* tex, unsigned int w, unsigned int h) + // Drop backend-specific caches that are tied to the previous sender handle, + // format or size. Called from reconfigureIfNeeded() before recreating the + // destination texture, and from releaseState() during teardown. + void releaseSharedResources(QRhi& rhi) + { + switch(m_backend) + { + case QRhi::D3D11: + if(m_receivedTexture) + { + m_receivedTexture->Release(); + m_receivedTexture = nullptr; + } + break; + case QRhi::D3D12: + if(m_wrappedTexture) + { + m_wrappedTexture->Release(); + m_wrappedTexture = nullptr; + } + break; +#if SCORE_SPOUT_VULKAN + case QRhi::Vulkan: + releaseVulkanResources(rhi); + break; +#endif + default: + break; + } + } + + // Returns true if anything was reconfigured (texture recreated). When that + // happens, callers may need to refresh backend-specific state that depends + // on the underlying QRhiTexture (e.g. OpenGL's `specified` flag). + // + // Always ensures the QRhiTexture has a valid backing on return (either a + // linked import or a plain placeholder), so the SRB rebuild that follows + // never produces a null VkImageView descriptor write. + bool reconfigureIfNeeded(QRhi& rhi, const SpoutSenderInfo& sender) { - metadata.width = w; - metadata.height = h; + if(sender.width == 0 || sender.height == 0) + return false; + + const QRhiTexture::Format newFormat + = dxgiToQRhiFormat(sender.dxgiFormat, m_backend); + + const bool sizeChanged + = sender.width != m_lastSender.width || sender.height != m_lastSender.height; + const bool formatChanged = newFormat != m_textureFormat; + const bool handleChanged = sender.handle != m_lastSender.handle; + if(!sizeChanged && !formatChanged && !handleChanged) + return false; + + SCORE_ASSERT(!m_gpu->samplers.empty()); + auto tex = m_gpu->samplers[0].texture; + + // Tear-down order matters: the QRhi-owned VkImageView (or D3D SRV) must + // be destroyed BEFORE the underlying native resource it was created + // from. Calling tex->destroy() first does the former; then + // releaseSharedResources() drops the latter. + tex->destroy(); + releaseSharedResources(rhi); + + tex->setPixelSize(QSize(sender.width, sender.height)); + tex->setFormat(newFormat); + + bool linked = false; +#if SCORE_SPOUT_VULKAN + if(m_backend == QRhi::Vulkan) + { + if(linkVulkanImage( + rhi, sender.handle, sender.width, sender.height, sender.dxgiFormat)) + { + QRhiTexture::NativeTexture nt; + nt.object = (quint64)m_vkLinkedImage; + nt.layout = VK_IMAGE_LAYOUT_GENERAL; + if(tex->createFrom(nt)) + { + linked = true; + } + else + { + qWarning() << "SpoutInput: createFrom(VkImage) failed during reconfigure"; + releaseVulkanResources(rhi); + } + } + } +#endif + + bool ok = linked; + if(!ok) + { + // Either non-Vulkan path, or Vulkan link failed. Allocate a normal + // QRhiTexture so the SRB has a valid view to bind. On Vulkan this + // yields a black/undefined image but avoids the + // VUID-VkWriteDescriptorSet-descriptorType-02997 validation error + // and the subsequent draw-time crash. + ok = tex->create(); + } + + if(!ok) + { + enabled = false; + // Do NOT advance m_lastSender — let the next frame retry from scratch. + return false; + } + + // Update metadata + material UBO. + metadata.width = sender.width; + metadata.height = sender.height; material.scale[0] = 1.f; material.scale[1] = 1.f; material.textureSize[0] = metadata.width; material.textureSize[1] = metadata.height; - tex->destroy(); - tex->setPixelSize(QSize(w, h)); - tex->create(); + m_textureFormat = newFormat; + m_lastSender = sender; +#if SCORE_SPOUT_VULKAN + if(m_backend == QRhi::Vulkan && !linked) + { + // Link failed for this sender configuration. We mark the renderer as + // disabled (so callers can show a fallback frame) but record the + // sender state so we don't churn through destroy/create every frame. + // A natural retry happens when the sender's size, format or share + // handle changes. + enabled = false; + } +#endif + // Pipelines stay valid (only the input sampler binding changed), but the + // SRB references the QRhiTexture pointer/format and must be rebuilt. for(auto& pass : m_p) - pass.second.srb->create(); + pass.second.p.srb->create(); + + return true; } void runRenderPass( @@ -891,28 +1029,31 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer score::gfx::defaultRenderPass(renderer, mesh, m_meshBuffer, cb, edge, m_p); } - void release(score::gfx::RenderList& r) override + void releaseState(score::gfx::RenderList& r) override { + if(!m_initialized) + return; + + // Order matters: destroy QRhi-owned resources (QRhiTexture wrappers and + // their image views) BEFORE the underlying native shared resources they + // wrap. Otherwise the QRhiTexture destruction may operate on a view + // whose underlying VkImage / D3D resource has already been released. + if(m_gpu) + { + m_gpu->release(r); + } + + // Now drop the native shared resources we hold. + releaseSharedResources(*r.state.rhi); + switch(m_backend) { case QRhi::OpenGLES2: if(enabled) m_receiver.ReleaseReceiver(); break; - case QRhi::D3D11: - if(m_receivedTexture) - { - m_receivedTexture->Release(); - m_receivedTexture = nullptr; - } - break; case QRhi::D3D12: - // Release D3D11On12 resources - if(m_wrappedTexture) - { - m_wrappedTexture->Release(); - m_wrappedTexture = nullptr; - } + // Release the D3D11On12 interop layer (set up in initD3D12). if(m_d3d11On12Device) { m_d3d11On12Device->Release(); @@ -929,26 +1070,13 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer m_d3d11Device = nullptr; } break; -#if SCORE_SPOUT_VULKAN - case QRhi::Vulkan: - releaseVulkanResources(*r.state.rhi); - m_vkSenderWidth = 0; - m_vkSenderHeight = 0; - m_vkSenderFormat = 0; - break; -#endif default: break; } enabled = false; - m_receivedTexture = nullptr; - m_sharedHandle = nullptr; - - if(m_gpu) - { - m_gpu->release(r); - } + m_lastSender = {}; + m_textureFormat = QRhiTexture::RGBA8; delete m_processUBO; m_processUBO = nullptr; @@ -959,7 +1087,15 @@ class SpoutInputNode::Renderer : public score::gfx::NodeRenderer p.second.release(); m_p.clear(); - m_meshBuffer.buffers.clear(); + m_meshBuffer = {}; + m_shaders = {}; + + m_initialized = false; + } + + void release(score::gfx::RenderList& r) override + { + releaseState(r); } }; diff --git a/src/plugins/score-plugin-gfx/Gfx/Spout/SpoutOutput.cpp b/src/plugins/score-plugin-gfx/Gfx/Spout/SpoutOutput.cpp index ae0e2d7945..9a5fcc0a7a 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Spout/SpoutOutput.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/Spout/SpoutOutput.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -24,8 +25,11 @@ #include #include +// clang-format off // D3D11On12 for D3D12 interop +#include #include +// clang-format on // Vulkan interop #if __has_include() && defined(QT_FEATURE_vulkan) && __has_include() @@ -533,8 +537,6 @@ struct SpoutNode final : score::gfx::OutputNode void createOutput(score::gfx::OutputConfiguration conf) override { - m_renderState = std::make_shared(); - // Choose backend based on requested API switch(conf.graphicsApi) { @@ -555,12 +557,13 @@ struct SpoutNode final : score::gfx::OutputNode break; } - auto rhi = m_renderState->rhi; - if(!rhi) + if(!m_renderState || !m_renderState->rhi) { qWarning() << "Failed to create QRhi for Spout output"; + m_renderState.reset(); return; } + auto rhi = m_renderState->rhi; // Use BGRA for D3D/Vulkan backends, RGBA for OpenGL auto format = (m_backend == QRhi::D3D11 || m_backend == QRhi::D3D12 || m_backend == QRhi::Vulkan) @@ -586,43 +589,36 @@ struct SpoutNode final : score::gfx::OutputNode m_backend = QRhi::OpenGLES2; m_spout = std::make_shared(); - m_renderState->surface = QRhiGles2InitParams::newFallbackSurface(); - QRhiGles2InitParams params; - params.fallbackSurface = m_renderState->surface; - score::GLCapabilities caps; - caps.setupFormat(params.format); - m_renderState->rhi = QRhi::create(QRhi::OpenGLES2, ¶ms, {}); - m_renderState->renderSize = QSize(m_settings.width, m_settings.height); - m_renderState->outputSize = m_renderState->renderSize; - m_renderState->api = score::gfx::GraphicsApi::OpenGL; - m_renderState->version = caps.qShaderVersion; + m_renderState = score::gfx::createRenderState( + score::gfx::GraphicsApi::OpenGL, + QSize(m_settings.width, m_settings.height), nullptr); + if(m_renderState) + m_renderState->outputSize = m_renderState->renderSize; } void createOutputD3D11() { m_backend = QRhi::D3D11; - QRhiD3D11InitParams params; - m_renderState->rhi = QRhi::create(QRhi::D3D11, ¶ms, {}); - m_renderState->renderSize = QSize(m_settings.width, m_settings.height); - m_renderState->outputSize = m_renderState->renderSize; - m_renderState->api = score::gfx::GraphicsApi::D3D11; - m_renderState->version = Gfx::Settings::shaderVersionForAPI(score::gfx::GraphicsApi::D3D11); + m_renderState = score::gfx::createRenderState( + score::gfx::GraphicsApi::D3D11, + QSize(m_settings.width, m_settings.height), nullptr); + if(m_renderState) + m_renderState->outputSize = m_renderState->renderSize; } void createOutputD3D12() { m_backend = QRhi::D3D12; - QRhiD3D12InitParams params; - m_renderState->rhi = QRhi::create(QRhi::D3D12, ¶ms, {}); - m_renderState->renderSize = QSize(m_settings.width, m_settings.height); - m_renderState->outputSize = m_renderState->renderSize; - m_renderState->api = score::gfx::GraphicsApi::D3D12; - m_renderState->version = Gfx::Settings::shaderVersionForAPI(score::gfx::GraphicsApi::D3D12); + m_renderState = score::gfx::createRenderState( + score::gfx::GraphicsApi::D3D12, + QSize(m_settings.width, m_settings.height), nullptr); + if(m_renderState) + m_renderState->outputSize = m_renderState->renderSize; // Get D3D12 device and command queue from QRhi - if(m_renderState->rhi) + if(m_renderState && m_renderState->rhi) { auto nativeHandles = static_cast( m_renderState->rhi->nativeHandles()); @@ -653,33 +649,16 @@ struct SpoutNode final : score::gfx::OutputNode { m_backend = QRhi::Vulkan; - // Create Vulkan instance with required extensions - auto* vkInst = score::gfx::staticVulkanInstance(); - if(!vkInst) - { - qWarning() << "SpoutOutput: No Vulkan instance available"; - return; - } - - QRhiVulkanInitParams params; - params.inst = vkInst; - - // Enable required device extensions for external memory - params.deviceExtensions << VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME - << VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME - << VK_KHR_DEDICATED_ALLOCATION_EXTENSION_NAME - << VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME - << VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME - << VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME; - - m_renderState->rhi = QRhi::create(QRhi::Vulkan, ¶ms, QRhi::EnableDebugMarkers, nullptr); - m_renderState->renderSize = QSize(m_settings.width, m_settings.height); - m_renderState->outputSize = m_renderState->renderSize; - m_renderState->api = score::gfx::GraphicsApi::Vulkan; - m_renderState->version = Gfx::Settings::shaderVersionForAPI(score::gfx::GraphicsApi::Vulkan); + // createRenderState already adds the VK_KHR_EXTERNAL_MEMORY{,_WIN32}, etc. + // extensions on Windows, plus shares the video-decode-capable VkDevice. + m_renderState = score::gfx::createRenderState( + score::gfx::GraphicsApi::Vulkan, + QSize(m_settings.width, m_settings.height), nullptr); + if(m_renderState) + m_renderState->outputSize = m_renderState->renderSize; // Create a D3D11 device for creating the shared texture - if(m_renderState->rhi) + if(m_renderState && m_renderState->rhi) { D3D_FEATURE_LEVEL featureLevels[] = {D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0}; UINT createDeviceFlags = D3D11_CREATE_DEVICE_BGRA_SUPPORT; @@ -781,6 +760,29 @@ struct SpoutNode final : score::gfx::OutputNode break; } m_created = false; + + // Backend-specific interop handles are gone above; now release the + // QRhi-owned resources. Order: render target -> render pass descriptor + // -> texture -> rhi (which is what RenderState::destroy() does). + if(!m_renderState) + return; + + // Persist-across-rebuild contract: registry survives RL teardown, + // so we tear down its QRhi resources here BEFORE + // RenderState::destroy() (called below) frees the device. + releaseRegistry(); + + delete m_renderTarget; + m_renderTarget = nullptr; + + delete m_renderState->renderPassDescriptor; + m_renderState->renderPassDescriptor = nullptr; + + delete m_texture; + m_texture = nullptr; + + m_renderState->destroy(); + m_renderState.reset(); } std::shared_ptr renderState() const override diff --git a/src/plugins/score-plugin-gfx/Gfx/Syphon/SyphonInput.mm b/src/plugins/score-plugin-gfx/Gfx/Syphon/SyphonInput.mm index 9821e04651..24703f856e 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Syphon/SyphonInput.mm +++ b/src/plugins/score-plugin-gfx/Gfx/Syphon/SyphonInput.mm @@ -8,7 +8,10 @@ #include #include #include +#include #include + +#include #include #include #include @@ -67,6 +70,7 @@ explicit Renderer(const SyphonInputNode &n) score::gfx::VideoMaterialUBO material; std::unique_ptr m_gpu{}; + std::pair m_shaders; // OpenGL receiver SyphonOpenGLClient* m_receiver{}; @@ -78,6 +82,8 @@ explicit Renderer(const SyphonInputNode &n) bool enabled{}; bool m_usingMetal{}; + int m_emptyFrameCount{0}; + static constexpr int kReopenAfterEmpty = 60; ~Renderer() { } @@ -99,10 +105,37 @@ explicit Renderer(const SyphonInputNode &n) return nullptr; } + // Whether the server we are bound to is still advertised in the Syphon + // directory. A *static* sender (publishes one frame then idles) keeps no + // "new frame" coming but stays in the directory — so we must NOT reconnect + // just because frames stopped; only reconnect once the server truly vanished. + bool serverStillPresent() + { + SyphonServerDirectory* ssd = [SyphonServerDirectory sharedDirectory]; + NSArray* servers = [ssd serversMatchingName:NULL appName:NULL]; + return findServer(servers, node.settings.path) != nullptr; + } + void openServer(QRhi& rhi) { enabled = false; + // Symmetric with releaseState(): stop any client we already hold before + // replacing it, otherwise the previous SyphonClient leaks (and keeps a + // connection open to the server). + if (m_mtlReceiver) + { + [m_mtlReceiver stop]; + m_mtlReceiver = nil; + } + if (m_receiver) + { + [m_receiver stop]; + m_receiver = nil; + } + m_currentMtlTexture = nil; + currentTex = 0; + SyphonServerDirectory *ssd = [SyphonServerDirectory sharedDirectory]; NSArray *servers = [ssd serversMatchingName:NULL appName:NULL]; if (servers.count == 0) @@ -147,7 +180,8 @@ void openServer(QRhi& rhi) } score::gfx::TextureRenderTarget renderTargetForInput(const score::gfx::Port& p) override { return { }; } - void init(score::gfx::RenderList &renderer, QRhiResourceUpdateBatch &res) override + + void initState(score::gfx::RenderList &renderer, QRhiResourceUpdateBatch &res) override { // Initialize our rendering structures auto& rhi = *renderer.state.rhi; @@ -216,7 +250,10 @@ void init(score::gfx::RenderList &renderer, QRhiResourceUpdateBatch &res) overri { m_gpu = std::make_unique(QRhiTexture::RGBA8, 4, metadata, QString{}); } - createPipelines(renderer); + + // Cache shaders from GPU decoder init + if (m_gpu) + m_shaders = m_gpu->init(renderer); if (m_usingMetal && mtlTex) { @@ -226,27 +263,54 @@ void init(score::gfx::RenderList &renderer, QRhiResourceUpdateBatch &res) overri { rebuildTexture(glImg); } + + m_initialized = true; } - void createPipelines(score::gfx::RenderList& r) + void addOutputPass( + score::gfx::RenderList& renderer, score::gfx::Edge& edge, + QRhiResourceUpdateBatch& res) override { - if (m_gpu) + if (!m_gpu) + return; + if (!m_shaders.first.isValid() || !m_shaders.second.isValid()) + return; + + auto rt = renderer.renderTargetForOutput(edge); + if (rt.renderTarget) + { + auto pip = score::gfx::buildPipeline( + renderer, renderer.defaultTriangle(), m_shaders.first, m_shaders.second, rt, + m_processUBO, m_materialUBO, m_gpu->samplers); + if (pip.pipeline) + m_p.emplace_back(&edge, score::gfx::Pass{rt, pip, nullptr}); + } + } + + void removeOutputPass(score::gfx::RenderList& renderer, score::gfx::Edge& edge) override + { + auto it = ossia::find_if(m_p, [&](const auto& p) { return p.first == &edge; }); + if (it != m_p.end()) { - auto shaders = m_gpu->init(r); - SCORE_ASSERT(m_p.empty()); - score::gfx::defaultPassesInit( - m_p, - this->node.output[0]->edges, - r, - r.defaultTriangle(), - shaders.first, - shaders.second, - m_processUBO, - m_materialUBO, - m_gpu->samplers); + it->second.release(); + m_p.erase(it); } } + bool hasOutputPassForEdge(score::gfx::Edge& edge) const override + { + return ossia::find_if(m_p, [&](const auto& p) { return p.first == &edge; }) + != m_p.end(); + } + + void init(score::gfx::RenderList &renderer, QRhiResourceUpdateBatch &res) override + { + initState(renderer, res); + + for (auto* edge : this->node.output[0]->edges) + addOutputPass(renderer, *edge, res); + } + void rebuildTexture(SyphonOpenGLImage* img) { SCORE_ASSERT(!m_gpu->samplers.empty()); @@ -274,7 +338,7 @@ void rebuildTexture(SyphonOpenGLImage* img) t->gltype = GL_UNSIGNED_INT_8_8_8_8_REV; } for(auto& pass : m_p) - pass.second.srb->create(); + pass.second.p.srb->create(); } void rebuildTextureMetal(id mtlTex) @@ -293,7 +357,7 @@ void rebuildTextureMetal(id mtlTex) tex->createFrom(nativeTex); for(auto& pass : m_p) - pass.second.srb->create(); + pass.second.p.srb->create(); } void update(score::gfx::RenderList &renderer, @@ -304,13 +368,26 @@ void update(score::gfx::RenderList &renderer, { auto& rhi = *renderer.state.rhi; openServer(rhi); + m_emptyFrameCount = 0; } if (m_usingMetal) { // Metal path if (!m_mtlReceiver || !m_mtlReceiver.hasNewFrame) + { + if (++m_emptyFrameCount >= kReopenAfterEmpty) + { + m_emptyFrameCount = 0; + // Only reconnect if the server is actually gone. A healthy static + // sender simply stops producing new frames while staying present; + // dropping it here would reconnect forever and lose the last frame. + if (!m_mtlReceiver || !serverStillPresent()) + enabled = false; + } return; + } + m_emptyFrameCount = 0; id mtlTex = [m_mtlReceiver newFrameImage]; if (!mtlTex) @@ -336,7 +413,18 @@ void update(score::gfx::RenderList &renderer, { // OpenGL path if (!m_receiver || !m_receiver.hasNewFrame) + { + if (++m_emptyFrameCount >= kReopenAfterEmpty) + { + m_emptyFrameCount = 0; + // Only reconnect if the server actually vanished (see Metal path): + // a static sender stays present but stops sending new frames. + if (!m_receiver || !serverStillPresent()) + enabled = false; + } return; + } + m_emptyFrameCount = 0; auto img = [m_receiver newFrameImage]; if (!img) @@ -370,22 +458,27 @@ void runRenderPass( score::gfx::defaultRenderPass(renderer, mesh, m_meshBuffer, cb, edge, m_p); } - void release(score::gfx::RenderList& r) override + void releaseState(score::gfx::RenderList& r) override { - if (enabled) + if (!m_initialized) + return; + + // Stop whenever a receiver exists — NOT only when enabled. A receiver can + // be alive while enabled==false (e.g. after the empty-frame path cleared + // enabled but left the client connected), and skipping -stop in that case + // leaks the SyphonClient. This also mirrors openServer(), which is the only + // other place receivers are created. + if (m_mtlReceiver) { - if (m_mtlReceiver) - { - [m_mtlReceiver stop]; - m_mtlReceiver = nil; - } - if (m_receiver) - { - [m_receiver stop]; - m_receiver = nil; - } - enabled = false; + [m_mtlReceiver stop]; + m_mtlReceiver = nil; + } + if (m_receiver) + { + [m_receiver stop]; + m_receiver = nil; } + enabled = false; m_currentMtlTexture = nil; currentTex = 0; @@ -404,7 +497,15 @@ void release(score::gfx::RenderList& r) override p.second.release(); m_p.clear(); - m_meshBuffer.buffers.clear(); + m_meshBuffer = {}; + m_shaders = {}; + + m_initialized = false; + } + + void release(score::gfx::RenderList& r) override + { + releaseState(r); } }; diff --git a/src/plugins/score-plugin-gfx/Gfx/Syphon/SyphonOutput.mm b/src/plugins/score-plugin-gfx/Gfx/Syphon/SyphonOutput.mm index b6073fa78e..fdb164c092 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Syphon/SyphonOutput.mm +++ b/src/plugins/score-plugin-gfx/Gfx/Syphon/SyphonOutput.mm @@ -1,6 +1,7 @@ #include "SyphonOutput.hpp" #include +#include #include #include #include @@ -177,33 +178,21 @@ void setRenderer(std::shared_ptr r) override void createOutput(score::gfx::OutputConfiguration conf) override { - m_renderState = std::make_shared(); - m_renderState->renderSize = QSize(m_settings.width, m_settings.height); - m_renderState->outputSize = m_renderState->renderSize; - - if (conf.graphicsApi == score::gfx::GraphicsApi::Metal) + // Syphon supports GL or Metal; the upstream graphics API picks which one. + const auto api = (conf.graphicsApi == score::gfx::GraphicsApi::Metal) + ? score::gfx::GraphicsApi::Metal + : score::gfx::GraphicsApi::OpenGL; + m_usingMetal = (api == score::gfx::GraphicsApi::Metal); + + m_renderState = score::gfx::createRenderState( + api, QSize(m_settings.width, m_settings.height), nullptr); + if(!m_renderState || !m_renderState->rhi) { - // Metal backend - QRhiMetalInitParams params; - m_renderState->rhi = QRhi::create(QRhi::Metal, ¶ms, {}); - m_renderState->api = score::gfx::GraphicsApi::Metal; - m_renderState->version = Gfx::Settings::shaderVersionForAPI(score::gfx::GraphicsApi::Metal); - m_usingMetal = true; - } - else - { - // OpenGL backend - m_renderState->surface = QRhiGles2InitParams::newFallbackSurface(); - QRhiGles2InitParams params; - params.format.setMajorVersion(3); - params.format.setMinorVersion(2); - params.format.setProfile(QSurfaceFormat::CompatibilityProfile); - params.fallbackSurface = m_renderState->surface; - m_renderState->rhi = QRhi::create(QRhi::OpenGLES2, ¶ms, {}); - m_renderState->api = score::gfx::GraphicsApi::OpenGL; - m_renderState->version = QShaderVersion(120); - m_usingMetal = false; + qWarning() << "SyphonOutput: failed to create QRhi"; + m_renderState.reset(); + return; } + m_renderState->outputSize = m_renderState->renderSize; auto rhi = m_renderState->rhi; m_texture = rhi->newTexture( @@ -240,6 +229,28 @@ void destroyOutput() override } m_created = false; + + // Release Syphon servers above first; they hold native GL/Metal handles + // into the rhi's device. Now tear down the rhi-owned resources. + if(!m_renderState) + return; + + // Persist-across-rebuild contract: registry survives RL teardown, + // so we tear down its QRhi resources here BEFORE + // RenderState::destroy() (called below) frees the device. + releaseRegistry(); + + delete m_renderTarget; + m_renderTarget = nullptr; + + delete m_renderState->renderPassDescriptor; + m_renderState->renderPassDescriptor = nullptr; + + delete m_texture; + m_texture = nullptr; + + m_renderState->destroy(); + m_renderState.reset(); } std::shared_ptr renderState() const override diff --git a/src/plugins/score-plugin-gfx/Gfx/TexturePort.cpp b/src/plugins/score-plugin-gfx/Gfx/TexturePort.cpp index 5847097cec..6160694b7b 100644 --- a/src/plugins/score-plugin-gfx/Gfx/TexturePort.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/TexturePort.cpp @@ -9,8 +9,7 @@ #include #include -#include -#include +#include #include #include @@ -41,108 +40,34 @@ class GraphPreviewWidget : public QWidget public: GraphPreviewWidget(const TextureOutlet& outlet, Gfx::DocumentPlugin& plug) : outlet_p{&outlet} - , plug{&plug} { setLayout(new Inspector::VBoxLayout{this}); - score::gfx::OutputNode::Configuration conf{}; - auto window = std::make_unique(conf, true); - node = window.get(); - screenId = plug.context.register_preview_node(std::move(window)); - if(screenId != -1) - { - if(outlet.nodeId != -1) - { - nodeId = outlet.nodeId; - e = {{nodeId, 0}, {screenId, 0}}; - plug.context.connect_preview_node(*e); - } - timerId = startTimer(16); - } + m_rhiWidget = new RhiPreviewWidget(this); + m_rhiWidget->setMinimumWidth(100); + m_rhiWidget->setMaximumWidth(300); + m_rhiWidget->setMinimumHeight(200); + m_rhiWidget->setMaximumHeight(200); + m_rhiWidget->useContext(&plug.context, outlet.nodeId); + layout()->addWidget(m_rhiWidget); + + // TextureOutlet::nodeId has no notifier — poll for changes so a + // process re-instantiation rewires the preview to the new producer. + startTimer(16); } - void timerEvent(QTimerEvent*) + void timerEvent(QTimerEvent*) override { - const auto& w = node->window(); - if(!w) + if(!outlet_p || !m_rhiWidget) return; - - if(!outlet_p) - return; - - auto& outlet = *outlet_p; - - if(outlet.nodeId != nodeId) - { - if(e) - { - if(plug) - plug->context.disconnect_preview_node(*e); - e = std::nullopt; - } - - if(outlet.nodeId != -1) - { - nodeId = outlet.nodeId; - e = {{nodeId, 0}, {screenId, 0}}; - - if(plug) - plug->context.connect_preview_node(*e); - } - } - - if(!container) - { - qwindow = w.get(); - this->window = w; - - container = QWidget::createWindowContainer(qwindow, this); - container->setMinimumWidth(100); - container->setMaximumWidth(300); - container->setMinimumHeight(200); - container->setMaximumHeight(200); - this->layout()->addWidget(container); - } - node->render(); + m_rhiWidget->setProducerNodeId(outlet_p->nodeId); } - ~GraphPreviewWidget() - { - if(qwindow) - { - // Take back ownership of the window - qwindow->setParent(nullptr); - qwindow->close(); - QChildEvent ev(QEvent::ChildRemoved, qwindow); - ((QObject*)container)->event(&ev); - } - - // We "garbage collect" the window - QTimer::singleShot(1, [w = this->window] { }); - if(plug) - { - if(e) - { - plug->context.disconnect_preview_node(*e); - } - plug->context.unregister_preview_node(screenId); - } - } + ~GraphPreviewWidget() override = default; private: QPointer outlet_p; - QPointer plug; - score::gfx::ScreenNode* node{}; - std::optional e; - - std::shared_ptr window; - - QPointer qwindow{}; - QWidget* container{}; - - int screenId = score::gfx::invalid_node_index; - int nodeId = score::gfx::invalid_node_index; - int timerId{}; + RhiPreviewWidget* m_rhiWidget{}; }; TextureInlet::~TextureInlet() { } diff --git a/src/plugins/score-plugin-gfx/Gfx/VSA/Process.cpp b/src/plugins/score-plugin-gfx/Gfx/VSA/Process.cpp index 4c16529942..eff765b3be 100644 --- a/src/plugins/score-plugin-gfx/Gfx/VSA/Process.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/VSA/Process.cpp @@ -10,8 +10,10 @@ #include #include +#include #include #include +#include #include @@ -141,6 +143,7 @@ Model::Model( metadata().setInstanceName(*this); m_outlets.push_back(new TextureOutlet{"Texture Out", Id(1), this}); + m_scriptPath = init; (void)setProgram(programFromVSAVertexShaderPath(init, {})); } @@ -175,7 +178,9 @@ Process::ScriptChangeResult Model::setProgram(ShaderSource f) m_program.vertex = f.vertex; m_program.fragment.clear(); m_processedProgram.fragment.clear(); - if(const auto& [processed, error] = ProgramCache::instance().get(f); bool(processed)) + if(const auto& [processed, error] + = ProgramCache::instance().get(f, m_scriptPath); + bool(processed)) { ossia::flat_map previous_values; for(auto inl : m_inlets) @@ -246,7 +251,9 @@ Process::Descriptor ProcessFactory::descriptor(QString path) const noexcept template <> void DataStreamReader::read(const Gfx::VSA::Model& proc) { - m_stream << proc.m_program; + auto& ctx = score::IDocument::documentContext(proc); + m_stream << proc.m_program + << score::relativizeFilePath(proc.m_scriptPath, ctx); readPorts(*this, proc.m_inlets, proc.m_outlets); @@ -257,7 +264,12 @@ template <> void DataStreamWriter::write(Gfx::VSA::Model& proc) { Gfx::ShaderSource s; - m_stream >> s; + m_stream >> s >> proc.m_scriptPath; + if(!proc.m_scriptPath.isEmpty()) + { + auto& ctx = score::IDocument::documentContext(proc); + proc.m_scriptPath = score::locateFilePath(proc.m_scriptPath, ctx); + } s.type = isf::parser::ShaderType::VertexShaderArt; (void)proc.setVertex(s.vertex); @@ -272,6 +284,11 @@ template <> void JSONReader::read(const Gfx::VSA::Model& proc) { obj["Vertex"] = proc.vertex(); + if(!proc.m_scriptPath.isEmpty()) + { + auto& ctx = score::IDocument::documentContext(proc); + obj["Root"] = score::relativizeFilePath(proc.m_scriptPath, ctx); + } readPorts(*this, proc.m_inlets, proc.m_outlets); } @@ -282,6 +299,15 @@ void JSONWriter::write(Gfx::VSA::Model& proc) Gfx::ShaderSource s; s.vertex = obj["Vertex"].toString(); s.type = isf::parser::ShaderType::VertexShaderArt; + if(auto r = obj.tryGet("Root")) + { + proc.m_scriptPath <<= *r; + if(!proc.m_scriptPath.isEmpty()) + { + auto& ctx = score::IDocument::documentContext(proc); + proc.m_scriptPath = score::locateFilePath(proc.m_scriptPath, ctx); + } + } (void)proc.setVertex(s.vertex); writePorts( diff --git a/src/plugins/score-plugin-gfx/Gfx/VSA/Process.hpp b/src/plugins/score-plugin-gfx/Gfx/VSA/Process.hpp index 1191dc8bdd..8efa772d46 100644 --- a/src/plugins/score-plugin-gfx/Gfx/VSA/Process.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/VSA/Process.hpp @@ -57,6 +57,10 @@ class Model final : public Process::ProcessModel void errorMessage(int line, const QString& arg_2) const W_SIGNAL(errorMessage, line, arg_2); + // Absolute path of the shader file this model was loaded from. Used as + // the base for quoted #include resolution. Empty for in-memory source. + QString rootPath() const noexcept { return m_scriptPath; } + private: [[nodiscard]] Process::ScriptChangeResult setProgram(ShaderSource f); void loadPreset(const Process::Preset& preset) override; @@ -66,6 +70,7 @@ class Model final : public Process::ProcessModel ShaderSource m_program; ProcessedProgram m_processedProgram; + QString m_scriptPath; }; struct ProcessFactory final : Process::ProcessFactory_T diff --git a/src/plugins/score-plugin-gfx/Gfx/Widgets/RhiPreviewWidget.cpp b/src/plugins/score-plugin-gfx/Gfx/Widgets/RhiPreviewWidget.cpp new file mode 100644 index 0000000000..6c159e807f --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Widgets/RhiPreviewWidget.cpp @@ -0,0 +1,267 @@ +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace Gfx +{ +namespace +{ +constexpr int kPreviewIntervalMs = 16; // ~60 Hz +} + +RhiPreviewWidget::RhiPreviewWidget(QWidget* parent) + : QWidget{parent} +{ + // Opaque painter target: every paintEvent fully overwrites the area + // (image blit or solid clear), so Qt can skip background fill. + setAttribute(Qt::WA_OpaquePaintEvent, true); + setAttribute(Qt::WA_NoSystemBackground, true); +} + +RhiPreviewWidget::~RhiPreviewWidget() +{ + detach(); +} + +void RhiPreviewWidget::useGraph( + score::gfx::Graph* graph, + std::function onAttached, + std::function onAboutToDetach) +{ + detach(); + m_backend = Backend::Graph; + m_graph = graph; + m_onAttached = std::move(onAttached); + m_onAboutToDetach = std::move(onAboutToDetach); + m_ctx = nullptr; + attach(); +} + +void RhiPreviewWidget::useContext(GfxContext* ctx, int32_t producerNodeId) +{ + detach(); + m_backend = Backend::Context; + m_ctx = ctx; + m_producerNodeId = producerNodeId; + m_graph = nullptr; + attach(); +} + +void RhiPreviewWidget::setProducerNodeId(int32_t id) +{ + if(id == m_producerNodeId) + return; + + const int32_t oldId = m_producerNodeId; + m_producerNodeId = id; + + // Hot-rewire the producer→preview edge. Only meaningful on the + // Context backend; the Graph backend rewires through the caller's + // attach/detach callbacks. + if(m_backend == Backend::Context && m_ctx + && m_screenNodeId != score::gfx::invalid_node_index) + { + if(m_edgeConnected) + { + m_ctx->disconnect_preview_node( + EdgeSpec{{oldId, 0}, {m_screenNodeId, 0}}); + m_edgeConnected = false; + } + if(m_producerNodeId != score::gfx::invalid_node_index) + { + m_ctx->connect_preview_node( + EdgeSpec{{m_producerNodeId, 0}, {m_screenNodeId, 0}}); + m_edgeConnected = true; + } + } +} + +void RhiPreviewWidget::attach() +{ + if(m_backend == Backend::None) + return; + + m_readback = std::make_shared(); + + auto node = std::make_unique(); + node->shared_readback = m_readback; + // Match the offscreen render size to the widget's pixel size; the + // BackgroundNode allocates its own QRhi target at this size. + const qreal dpr = devicePixelRatioF(); + const QSize px{ + qMax(1, int(width() * dpr)), qMax(1, int(height() * dpr))}; + if(width() > 0 && height() > 0) + node->setSize(px); + m_node = node.get(); + + switch(m_backend) + { + case Backend::Graph: { + if(!m_graph) + { + m_node = nullptr; + return; + } + + // Keep ownership: Graph::removeNode does not delete; we delete in + // detach() once we've removed the node + its render list. + m_graph->addNode(node.release()); + + // The caller wires producer→preview edges here, then arranges + // for a render list to be built (typically via createAllRenderLists). + if(m_onAttached) + m_onAttached(*m_node); + break; + } + + case Backend::Context: { + if(!m_ctx) + { + m_node = nullptr; + return; + } + + // register_node (not register_preview_node) so that GfxContext's + // recomputeTimers picks up BackgroundNode::configuration(). + // manualRenderingRate and drives render() automatically — the + // BackgroundNode does its own offscreen frame + readback there. + // We just trigger update() on the widget timer to repaint. + m_screenNodeId = m_ctx->register_node( + std::unique_ptr{node.release()}); + if(m_screenNodeId == score::gfx::invalid_node_index) + { + m_node = nullptr; + return; + } + if(m_producerNodeId != score::gfx::invalid_node_index) + { + m_ctx->connect_preview_node( + EdgeSpec{{m_producerNodeId, 0}, {m_screenNodeId, 0}}); + m_edgeConnected = true; + } + break; + } + + case Backend::None: + break; + } + + // Single timer: refreshes the widget at preview rate. For the Graph + // backend it also drives BackgroundNode::render() directly (the + // manager's graph has no GfxContext timers); for the Context backend + // GfxContext drives render() via its manual timer and we only need + // update() here. + if(m_timerId == 0) + m_timerId = startTimer(kPreviewIntervalMs); +} + +void RhiPreviewWidget::detach() +{ + if(m_timerId) + { + killTimer(m_timerId); + m_timerId = 0; + } + + switch(m_backend) + { + case Backend::Graph: { + if(m_node && m_graph) + { + if(m_onAboutToDetach) + m_onAboutToDetach(*m_node); + m_graph->destroyOutputRenderList(*m_node); + m_graph->removeNode(m_node); + } + delete m_node; + m_node = nullptr; + break; + } + + case Backend::Context: { + if(m_ctx && m_screenNodeId != score::gfx::invalid_node_index) + { + if(m_edgeConnected) + { + m_ctx->disconnect_preview_node( + EdgeSpec{{m_producerNodeId, 0}, {m_screenNodeId, 0}}); + m_edgeConnected = false; + } + m_ctx->unregister_node(m_screenNodeId); + } + m_screenNodeId = score::gfx::invalid_node_index; + // GfxContext owns the node lifetime via its command queue; we + // do not delete here. + m_node = nullptr; + break; + } + + case Backend::None: + m_node = nullptr; + break; + } + + m_readback.reset(); +} + +void RhiPreviewWidget::resizeEvent(QResizeEvent* ev) +{ + QWidget::resizeEvent(ev); + if(m_node) + { + const qreal dpr = devicePixelRatioF(); + const QSize px{ + qMax(1, int(ev->size().width() * dpr)), + qMax(1, int(ev->size().height() * dpr))}; + m_node->setSize(px); + } +} + +void RhiPreviewWidget::timerEvent(QTimerEvent* ev) +{ + if(ev->timerId() != m_timerId) + { + QWidget::timerEvent(ev); + return; + } + + // Graph backend: drive the offscreen frame + readback ourselves + // (the manager's private graph has no timer infrastructure). + // Context backend: GfxContext drives render() via its manual timer. + if(m_backend == Backend::Graph && m_node) + m_node->render(); + + update(); +} + +void RhiPreviewWidget::paintEvent(QPaintEvent*) +{ + QPainter painter{this}; + + if(m_readback) + { + const auto& rb = *m_readback; + const int w = rb.pixelSize.width(); + const int h = rb.pixelSize.height(); + const int expected = w * h * 4; + if(w > 0 && h > 0 && rb.data.size() >= expected) + { + QImage img{ + reinterpret_cast(rb.data.constData()), + w, h, w * 4, QImage::Format_RGBA8888}; + painter.drawImage(rect(), img); + return; + } + } + + painter.fillRect(rect(), Qt::black); +} +} diff --git a/src/plugins/score-plugin-gfx/Gfx/Widgets/RhiPreviewWidget.hpp b/src/plugins/score-plugin-gfx/Gfx/Widgets/RhiPreviewWidget.hpp new file mode 100644 index 0000000000..e6dacef043 --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Widgets/RhiPreviewWidget.hpp @@ -0,0 +1,92 @@ +#pragma once +#include + +#include + +#include +#include +#include + +struct QRhiReadbackResult; + +namespace score::gfx +{ +struct Graph; +struct BackgroundNode; +} + +namespace Gfx +{ +class GfxContext; + +/** + * @brief A QWidget that paints a score::gfx render-graph output without using + * QWidget::createWindowContainer (broken on macOS) or QRhiWidget + * (forces the toplevel to switch to RHI compositing — flash + perf + * impact). + * + * The graph renders into an offscreen QRhi texture owned by a + * score::gfx::BackgroundNode; each frame is read back into a QImage-shaped + * QByteArray and drawn in paintEvent. CPU readback is cheap at preview + * resolutions and avoids touching Qt's compositor RHI entirely. + * + * Two backends: + * - Graph backend (useGraph): caller owns a score::gfx::Graph and drives + * wiring through callbacks. Used by ShaderPreviewManager. + * - Context backend (useContext): caller routes registration through a + * Gfx::GfxContext. The GfxContext's manual timer drives the offscreen + * render; the widget only triggers QWidget::update() to refresh the + * painted image. Used by GraphPreviewWidget (texture-port preview). + */ +class SCORE_PLUGIN_GFX_EXPORT RhiPreviewWidget : public QWidget +{ +public: + explicit RhiPreviewWidget(QWidget* parent = nullptr); + ~RhiPreviewWidget() override; + + /// Graph backend. onAttached fires once the BackgroundNode has been + /// registered with the graph (its render list is built). The caller wires + /// producer→preview edges in there. onAboutToDetach fires before the + /// BackgroundNode is removed; the caller must remove any edges it added. + void useGraph( + score::gfx::Graph* graph, + std::function onAttached, + std::function onAboutToDetach); + + /// Context backend. The producer node id can be updated at any time; the + /// widget rewires the preview edge accordingly. + void useContext(GfxContext* ctx, int32_t producerNodeId); + void setProducerNodeId(int32_t id); + +protected: + void paintEvent(QPaintEvent* ev) override; + void resizeEvent(QResizeEvent* ev) override; + void timerEvent(QTimerEvent* ev) override; + +private: + void attach(); + void detach(); + + enum class Backend + { + None, + Graph, + Context + } m_backend{Backend::None}; + + // Graph backend + score::gfx::Graph* m_graph{}; + std::function m_onAttached; + std::function m_onAboutToDetach; + + // Context backend + GfxContext* m_ctx{}; + int32_t m_producerNodeId{-1}; + int32_t m_screenNodeId{-1}; + bool m_edgeConnected{false}; + + std::shared_ptr m_readback; + score::gfx::BackgroundNode* m_node{}; // owned by m_graph or m_ctx after attach + int m_timerId{}; +}; +} diff --git a/src/plugins/score-plugin-gfx/Gfx/Window/OffscreenDevice.hpp b/src/plugins/score-plugin-gfx/Gfx/Window/OffscreenDevice.hpp new file mode 100644 index 0000000000..b806133f14 --- /dev/null +++ b/src/plugins/score-plugin-gfx/Gfx/Window/OffscreenDevice.hpp @@ -0,0 +1,95 @@ +#pragma once + +#include +#include + +#include +#include +#include + +#include + +namespace Gfx +{ + +// Headless device used when SCORE_FORCE_OFFSCREEN_WINDOW selects this +// window device by name. Wraps a BackgroundNode — which already drives +// beginOffscreenFrame/endOffscreenFrame — without the ScenarioDocumentView +// dependency of background_device. Exposes only the parameters required by +// offscreen tests (size, rendersize) and holds the shared_readback used by +// WindowDevice::grabTo to write frames to disk. +class offscreen_device : public ossia::net::device_base +{ + // unique_ptr ownership: BackgroundNode is not a QObject child of any + // parent in this class (it inherits NodeModel, not QObject), so a raw + // `new BackgroundNode` with no matching `delete` in the dtor leaked + // every offscreen device cycle — including the rhi resources its + // ~BackgroundNode → destroyOutput would have released. unique_ptr + // restores the pair. + std::unique_ptr m_node; + gfx_node_base m_root; + QObject m_qtContext; + + ossia::net::parameter_base* size_param{}; + ossia::net::parameter_base* rendersize_param{}; + +public: + offscreen_device(std::unique_ptr proto, std::string name) + : ossia::net::device_base{std::move(proto)} + , m_node{std::make_unique()} + , m_root{*this, *static_cast(m_protocol.get()), m_node.get(), name} + { + this->m_capabilities.change_tree = true; + m_node->shared_readback = std::make_shared(); + + { + auto size_node = std::make_unique("size", *this, m_root); + size_param = size_node->create_parameter(ossia::val_type::VEC2F); + size_param->push_value(ossia::vec2f{1280.f, 720.f}); + m_node->setSize(QSize{1280, 720}); + size_param->add_callback([this](const ossia::value& v) { + if(auto val = v.target()) + { + ossia::qt::run_async(&m_qtContext, [node = m_node.get(), v = *val] { + node->setSize({(int)v[0], (int)v[1]}); + }); + } + }); + m_root.add_child(std::move(size_node)); + } + + { + auto size_node + = std::make_unique("rendersize", *this, m_root); + ossia::net::set_description( + *size_node, "Set to [0, 0] to use the viewport's size"); + rendersize_param = size_node->create_parameter(ossia::val_type::VEC2F); + rendersize_param->push_value(ossia::vec2f{0.f, 0.f}); + rendersize_param->add_callback([this](const ossia::value& v) { + if(auto val = v.target()) + { + ossia::qt::run_async(&m_qtContext, [node = m_node.get(), v = *val] { + node->setRenderSize({(int)v[0], (int)v[1]}); + }); + } + }); + m_root.add_child(std::move(size_node)); + } + } + + ~offscreen_device() + { + m_protocol->stop(); + m_root.clear_children(); + m_protocol.reset(); + // m_node destroyed by unique_ptr → ~BackgroundNode → destroyOutput + // (releases RT/RPD/depth tex/colour tex + the offscreen rhi). + } + + score::gfx::BackgroundNode* node() const noexcept { return m_node.get(); } + + const gfx_node_base& get_root_node() const override { return m_root; } + gfx_node_base& get_root_node() override { return m_root; } +}; + +} diff --git a/src/plugins/score-plugin-gfx/Gfx/Window/WindowDevice.hpp b/src/plugins/score-plugin-gfx/Gfx/Window/WindowDevice.hpp index 434c0033ba..6ce8985356 100644 --- a/src/plugins/score-plugin-gfx/Gfx/Window/WindowDevice.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/Window/WindowDevice.hpp @@ -37,7 +37,7 @@ static score::gfx::ScreenNode* createScreenNode( }; auto node = new score::gfx::ScreenNode{ - make_configuration(), false, (settings.autoplay || !settings.gui)}; + make_configuration(), false, (settings.autoplay && !settings.gui)}; node->setSwapchainFlag(swapFlag); node->setSwapchainFormat(swapFormat); @@ -105,6 +105,7 @@ class window_device : public ossia::net::device_base } public: + score::gfx::ScreenNode* screen() const noexcept { return m_screen; } ~window_device() { if(auto w = m_screen->window()) diff --git a/src/plugins/score-plugin-gfx/Gfx/WindowCapture/WindowCaptureNode.cpp b/src/plugins/score-plugin-gfx/Gfx/WindowCapture/WindowCaptureNode.cpp index bad3ba129f..5b8804a95f 100644 --- a/src/plugins/score-plugin-gfx/Gfx/WindowCapture/WindowCaptureNode.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/WindowCapture/WindowCaptureNode.cpp @@ -3,8 +3,11 @@ #include #include #include +#include #include +#include + #include #if defined(__linux__) @@ -52,7 +55,7 @@ class WindowCaptureNode::Renderer : public score::gfx::NodeRenderer return {}; } - void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override + void initState(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override { auto& rhi = *renderer.state.rhi; @@ -73,9 +76,14 @@ class WindowCaptureNode::Renderer : public score::gfx::NodeRenderer m_width = 640; m_height = 480; - // Use BGRA8 — native format for all capture backends + // BGRA8 covers Windows / macOS / X11 backends. PipeWire on Wayland may + // negotiate SPA_VIDEO_FORMAT_RGBA / RGBx (mapped to CapturedFrame::CPU_RGBA) + // — we recreate the texture in QRhiTexture::RGBA8 the first time a CPU_RGBA + // frame arrives. Without that branch, RGBA bytes were uploaded as BGRA and + // displayed with R/B swapped. + m_textureFormat = QRhiTexture::BGRA8; m_texture = rhi.newTexture( - QRhiTexture::BGRA8, QSize{m_width, m_height}, 1, QRhiTexture::Flag{}); + m_textureFormat, QSize{m_width, m_height}, 1, QRhiTexture::Flag{}); m_texture->create(); m_sampler = rhi.newSampler( @@ -112,11 +120,8 @@ class WindowCaptureNode::Renderer : public score::gfx::NodeRenderer { auto [vertS, fragS] = score::gfx::makeShaders( renderer.state, score::gfx::GPUVideoDecoder::vertexShader(), frag); - - const score::gfx::Sampler samplers[] = {{m_sampler, m_texture}}; - score::gfx::defaultPassesInit( - m_p, this->node.output[0]->edges, renderer, mesh, vertS, fragS, - m_processUBO, m_materialUBO, samplers); + m_vertexS = vertS; + m_fragmentS = fragS; } // Start capturing @@ -132,6 +137,83 @@ class WindowCaptureNode::Renderer : public score::gfx::NodeRenderer target.regionH = node.settings.regionH; const_cast(node).backend->start(target); } + + m_initialized = true; + } + + void addOutputPass( + score::gfx::RenderList& renderer, score::gfx::Edge& edge, + QRhiResourceUpdateBatch& res) override + { + if(!m_vertexS.isValid() || !m_fragmentS.isValid()) + return; + + auto rt = renderer.renderTargetForOutput(edge); + if(rt.renderTarget) + { + const score::gfx::Sampler samplers[] = {{m_sampler, m_texture}}; + auto pip = score::gfx::buildPipeline( + renderer, renderer.defaultTriangle(), m_vertexS, m_fragmentS, rt, + m_processUBO, m_materialUBO, samplers); + if(pip.pipeline) + m_p.emplace_back(&edge, score::gfx::Pass{rt, pip, nullptr}); + } + } + + void removeOutputPass(score::gfx::RenderList& renderer, score::gfx::Edge& edge) override + { + auto it = ossia::find_if(m_p, [&](const auto& p) { return p.first == &edge; }); + if(it != m_p.end()) + { + it->second.release(); + m_p.erase(it); + } + } + + bool hasOutputPassForEdge(score::gfx::Edge& edge) const override + { + return ossia::find_if(m_p, [&](const auto& p) { return p.first == &edge; }) + != m_p.end(); + } + + void releaseState(score::gfx::RenderList& r) override + { + if(!m_initialized) + return; + + if(node.backend) + const_cast(node).backend->stop(); + +#if HAS_DMABUF_IMPORT + if(m_dmaBufImporter) + m_dmaBufImporter->cleanupPlane(m_dmaBufPlane); +#endif + + for(auto& [edge, pass] : m_p) + pass.release(); + m_p.clear(); + + delete m_texture; + m_texture = nullptr; + delete m_sampler; + m_sampler = nullptr; + delete m_processUBO; + m_processUBO = nullptr; + delete m_materialUBO; + m_materialUBO = nullptr; + m_meshBuffer = {}; + m_vertexS = {}; + m_fragmentS = {}; + + m_initialized = false; + } + + void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override + { + initState(renderer, res); + + for(auto* edge : this->node.output[0]->edges) + addOutputPass(renderer, *edge, res); } void update( @@ -145,16 +227,41 @@ class WindowCaptureNode::Renderer : public score::gfx::NodeRenderer if(frame.type == CapturedFrame::None || frame.width <= 0 || frame.height <= 0) return; - // Handle resize - if(frame.width != m_width || frame.height != m_height) + // Detect format mismatch and recreate the texture in the matching format. + // PipeWire negotiates RGBA/RGBx on some compositors (yields CPU_RGBA); + // X11 / Windows / macOS yield CPU_BGRA. The two formats can both arrive + // in a single session if the user changes Wayland compositors mid-session + // or if the backend renegotiates. Done before the resize check so a + // simultaneous resize+format change is handled in a single create. + QRhiTexture::Format wanted = m_textureFormat; + if(frame.type == CapturedFrame::CPU_RGBA) + wanted = QRhiTexture::RGBA8; + else if(frame.type == CapturedFrame::CPU_BGRA) + wanted = QRhiTexture::BGRA8; + // Other branches (D3D11_Texture / IOSurface_Ref / DMABUF) recreate the + // texture below via createFrom(...) on the native handle and don't go + // through this CPU upload path. + + const bool formatChanged = (wanted != m_textureFormat); + const bool sizeChanged = (frame.width != m_width || frame.height != m_height); + + if(formatChanged || sizeChanged) { m_width = frame.width; m_height = frame.height; - // Only resize for CPU upload path — GPU paths recreate from native handle + // Only the CPU upload paths participate in setPixelSize/setFormat + // recreation. GPU import paths replace the texture wholesale via + // createFrom() further down. if(frame.type == CapturedFrame::CPU_BGRA || frame.type == CapturedFrame::CPU_RGBA) { - m_texture->setPixelSize(QSize{m_width, m_height}); + if(formatChanged) + { + m_texture->setFormat(wanted); + m_textureFormat = wanted; + } + if(sizeChanged) + m_texture->setPixelSize(QSize{m_width, m_height}); m_texture->create(); } } @@ -234,27 +341,7 @@ class WindowCaptureNode::Renderer : public score::gfx::NodeRenderer void release(score::gfx::RenderList& r) override { - if(node.backend) - const_cast(node).backend->stop(); - -#if HAS_DMABUF_IMPORT - if(m_dmaBufImporter) - m_dmaBufImporter->cleanupPlane(m_dmaBufPlane); -#endif - - for(auto& [edge, pass] : m_p) - pass.release(); - m_p.clear(); - - delete m_texture; - m_texture = nullptr; - delete m_sampler; - m_sampler = nullptr; - delete m_processUBO; - m_processUBO = nullptr; - delete m_materialUBO; - m_materialUBO = nullptr; - m_meshBuffer = {}; + releaseState(r); } void runRenderPass( @@ -273,7 +360,10 @@ class WindowCaptureNode::Renderer : public score::gfx::NodeRenderer QRhiBuffer* m_processUBO{}; QRhiBuffer* m_materialUBO{}; QRhiTexture* m_texture{}; + QRhiTexture::Format m_textureFormat{QRhiTexture::BGRA8}; QRhiSampler* m_sampler{}; + QShader m_vertexS; + QShader m_fragmentS; score::gfx::VideoMaterialUBO m_material; int m_width{}; diff --git a/src/plugins/score-plugin-gfx/Gfx/WindowDevice.cpp b/src/plugins/score-plugin-gfx/Gfx/WindowDevice.cpp index f6e1fe73bd..c5440f544b 100644 --- a/src/plugins/score-plugin-gfx/Gfx/WindowDevice.cpp +++ b/src/plugins/score-plugin-gfx/Gfx/WindowDevice.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -19,6 +20,24 @@ W_OBJECT_IMPL(Gfx::WindowDevice) namespace Gfx { +// SCORE_FORCE_OFFSCREEN_WINDOW=Name1,Name2 forces any matching WindowDevice +// (whatever its Single/Background/MultiWindow mode) into a headless offscreen +// render path. Used by tests that need grabTo output but must not pop a +// platform window. +static bool shouldForceOffscreen(const QString& name) +{ + static const QByteArray env = qgetenv("SCORE_FORCE_OFFSCREEN_WINDOW"); + if(env.isEmpty()) + return false; + for(const auto& part : env.split(',')) + { + const auto trimmed = QString::fromUtf8(part).trimmed(); + if(!trimmed.isEmpty() && trimmed == name) + return true; + } + return false; +} + score::gfx::Window* WindowDevice::window() const noexcept { if(m_dev) @@ -75,6 +94,44 @@ void WindowDevice::disconnect() deviceChanged(prev.get(), nullptr); } +void WindowDevice::grabTo(const QString& path) const +{ + if(auto dev = dynamic_cast(m_dev.get())) + { + if(auto screen = dev->screen()) + { + if(auto win = screen->window()) + { + auto screen = win->screen(); + auto wid = win->winId(); + auto grab = screen->grabWindow(wid); + grab.save(path); + } + } + } + else if(auto dev = dynamic_cast(m_dev.get())) + { + // TODO + } + else if(auto dev = dynamic_cast(m_dev.get())) + { + if(auto node = dev->node(); node && node->shared_readback) + { + const auto& rb = *node->shared_readback; + const int w = rb.pixelSize.width(); + const int h = rb.pixelSize.height(); + const int expected = w * h * 4; + if(w > 0 && h > 0 && rb.data.size() >= expected) + { + QImage img{ + reinterpret_cast(rb.data.constData()), w, h, w * 4, + QImage::Format_RGBA8888}; + img.save(path); + } + } + } +} + bool WindowDevice::reconnect() { disconnect(); @@ -90,6 +147,18 @@ bool WindowDevice::reconnect() auto view = m_ctx.document.view(); auto main_view = view ? qobject_cast( &view->viewDelegate()) : nullptr; + + if(shouldForceOffscreen(m_settings.name)) + { + m_dev = std::make_unique( + std::unique_ptr(m_protocol), + m_settings.name.toStdString()); + + enableCallbacks(); + deviceChanged(nullptr, m_dev.get()); + return connected(); + } + switch(set.mode) { case WindowMode::Background: { diff --git a/src/plugins/score-plugin-gfx/Gfx/WindowDevice.hpp b/src/plugins/score-plugin-gfx/Gfx/WindowDevice.hpp index e0549f04e5..daca092767 100644 --- a/src/plugins/score-plugin-gfx/Gfx/WindowDevice.hpp +++ b/src/plugins/score-plugin-gfx/Gfx/WindowDevice.hpp @@ -2,22 +2,14 @@ #include #include -#include -#include -#include -#include +#include class QComboBox; class QCheckBox; class QDoubleSpinBox; -class QGraphicsEllipseItem; -class QGraphicsLineItem; -class QGraphicsPolygonItem; class QLabel; class QStackedWidget; class QSpinBox; -class QGraphicsView; - namespace score::gfx { class Window; @@ -74,6 +66,8 @@ class SCORE_PLUGIN_GFX_EXPORT WindowDevice final : public GfxOutputDevice void disconnect() override; bool reconnect() override; + void grabTo(const QString& path) const; + W_SLOT(grabTo) private: gfx_protocol_base* m_protocol{}; mutable std::unique_ptr m_dev; diff --git a/src/plugins/score-plugin-js/JS/ApplicationPlugin.cpp b/src/plugins/score-plugin-js/JS/ApplicationPlugin.cpp index 0766388866..2ad09cce24 100644 --- a/src/plugins/score-plugin-js/JS/ApplicationPlugin.cpp +++ b/src/plugins/score-plugin-js/JS/ApplicationPlugin.cpp @@ -17,6 +17,8 @@ #include #include +#include +#include #if __has_include() #include @@ -32,6 +34,33 @@ namespace JS { +// Check whether the input is a script, or a file path. +// An existing file always wins: a real path may legitimately contain +// characters (parentheses, braces, ...) that also occur in inline source, +// so the file-existence check must come FIRST. Only when the input is not +// an existing file do we fall back to the inline-source heuristic. +static bool stringIsScript(const QString& input) +{ + if(input.isEmpty()) + return false; + + if(QFileInfo fileInfo{input}; fileInfo.exists() && fileInfo.isFile()) + return false; + + if(input.length() > 4096) + return true; + + for(QChar ch : input) + { + const char16_t c = ch.unicode(); + if(c == '\n' || c == '\r' || c == ';' || c == '{' || c == '}' || c == '(' + || c == ')') + return true; + } + + return true; +} + ApplicationPlugin::ApplicationPlugin(const score::GUIApplicationContext& ctx) : score::GUIApplicationPlugin{ctx} { @@ -79,7 +108,25 @@ ApplicationPlugin::ApplicationPlugin(const score::GUIApplicationContext& ctx) parser.addOption(script_opt); parser.parse(ctx.applicationSettings.arguments); - this->m_start_script = parser.value(script_opt); + auto script = parser.value(script_opt); + if(stringIsScript(script)) + { + this->m_start_script = script; + } + else if(!script.isEmpty()) + { + QFile f{script}; + if(f.open(QIODevice::ReadOnly)) + { + this->m_start_script = f.readAll(); + this->m_start_script_path = QFileInfo{f}.canonicalPath(); + } + else + { + qWarning() << "JS::ApplicationPlugin: could not open --script file" + << script << ":" << f.errorString(); + } + } } void ApplicationPlugin::on_newDocument(score::Document& doc) @@ -124,7 +171,11 @@ void ApplicationPlugin::on_createdDocument(score::Document& doc) if(!m_start_script.isEmpty()) { - QTimer::singleShot(100, this, [this] { m_consoleEngine.evaluate(m_start_script); }); + QTimer::singleShot(100, this, [this] { + if(!m_start_script_path.isEmpty()) + m_consoleEngine.addImportPath(m_start_script_path); + m_consoleEngine.evaluate(m_start_script); + }); } } void ApplicationPlugin::afterStartup() diff --git a/src/plugins/score-plugin-js/JS/ApplicationPlugin.hpp b/src/plugins/score-plugin-js/JS/ApplicationPlugin.hpp index db4aae0d45..03558e60d4 100644 --- a/src/plugins/score-plugin-js/JS/ApplicationPlugin.hpp +++ b/src/plugins/score-plugin-js/JS/ApplicationPlugin.hpp @@ -45,5 +45,6 @@ class ApplicationPlugin final ossia::net::network_context_ptr m_asioContext; QString m_start_script; + QString m_start_script_path; }; } diff --git a/src/plugins/score-plugin-js/JS/Executor/GPUNode.cpp b/src/plugins/score-plugin-js/JS/Executor/GPUNode.cpp index f60a4d4709..c24132bd84 100644 --- a/src/plugins/score-plugin-js/JS/Executor/GPUNode.cpp +++ b/src/plugins/score-plugin-js/JS/Executor/GPUNode.cpp @@ -32,8 +32,10 @@ #include #include #include +#include #include +#include namespace JS { struct engine_key @@ -86,6 +88,14 @@ struct GpuNode : score::gfx::NodeModel JS::Script* m_object{}; QPointer m_item{}; + // Qt Quick runtime. Created in GpuRenderer::initState(), destroyed + // when the Engine itself is destroyed (GpuRenderer::release() drops + // the map entry and the renderer's own shared_ptr, bringing refcount + // to zero). Destruction runs while the owning QRhi is still alive — + // see the note in GpuRenderer::release() for why this matters. + QQuickRenderControl* m_quickRenderControl{}; + QQuickWindow* m_quickWindow{}; + std::vector m_jsInlets; std::vector> m_ctrlInlets; std::vector> m_impulseInlets; @@ -94,13 +104,17 @@ struct GpuNode : score::gfx::NodeModel ossia::spsc_queue ui_messages; - void init(GpuRenderer& renderer, GpuNode& node, QQuickWindow* window); + void init( + GpuRenderer& renderer, GpuNode& node, QQuickWindow* window, + score::gfx::RenderList& rl); - void createItem(GpuRenderer& renderer, GpuNode& node); + void createItem( + GpuRenderer& renderer, GpuNode& node, score::gfx::RenderList& rl); void updateItemTextureOut(QQuickWindow* window); - void setupComponent(GpuRenderer& renderer, GpuNode& node); + void setupComponent( + GpuRenderer& renderer, GpuNode& node, score::gfx::RenderList& rl); void releaseItem(); @@ -146,20 +160,21 @@ struct GpuNode : score::gfx::NodeModel std::pair> acquireEngine(QRhi* rhi) { const auto key = engine_key{std::this_thread::get_id(), rhi}; - // FIXME find if there's a more atomic way to implement this with insert_or_visit, - // without calling init() inside the map's lock. std::shared_ptr res; - m_engines.visit(key, [&](const auto& engine) { res = engine.second; }); - - if(!res) - { - res = std::make_shared(); - m_engines.insert({key, res}); - } + m_engines.try_emplace_and_visit( + key, + std::make_shared(), + [&](auto& slot) { res = slot.second; }, // newly-inserted visitor + [&](auto& slot) { res = slot.second; }); // existing-key visitor return {key, res}; } - void releaseEngine(QRhi* rhi) { m_engines.erase({std::this_thread::get_id(), rhi}); } + // Release by the key stored at acquire time, NOT by the current thread id. + // If releaseState() ever runs on a different thread than initState()'s + // insert (e.g. under SCORE_THREADED_GFX), erasing by the current-thread + // key would leave the stale Engine (with m_quickWindow set) mapped, and + // the next acquire would return it and trip the SCORE_ASSERT in initState(). + void releaseEngine(const engine_key& key) { m_engines.erase(key); } boost::concurrent_flat_map, engine_key_hash> m_engines; @@ -243,19 +258,53 @@ void main () std::vector m_inputSamplers; - void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override + // All setup lives in initState() rather than init(), because the + // incremental graph-edit path (Graph::incrementalEdgeUpdate) calls + // initState() directly on newly-spawned renderers without ever going + // through init(). If we put setup in init(), a play/stop/play cycle + // leaves the new GpuRenderer with empty shaders, no window, no engine, + // and the next update() crashes in defaultUBOUpdate. Mirror + // RenderedISFNode's split: initState() does all shared state; + // the inherited GenericNodeRenderer::init() calls initState() then + // addOutputPass() per output edge. + // Ignore the base GenericNodeRenderer::updateInputTexture behavior: + // GpuRenderer's m_samplers is a private, single-entry vector holding the + // internal "y_tex" sampler that points at m_internalTex (the texture Qt + // Quick renders into, which our fragment shader samples). Its 8 visible + // texture-inlet ports are routed through m_engine->m_texInlets and the + // per-frame res.copyTexture in update() — they are NOT meant to drive + // m_samplers. The base implementation indexes m_samplers by image-input + // position, so a sink-sampler update for input[0] (Image 1) writes + // m_samplers[0].texture = image1_rt_texture and rebinds the SRB's y_tex + // sampler away from m_internalTex, which makes the presentation render + // Image 1's content directly instead of the Qt Quick tree. This fires + // whenever Graph::updateAllSinkSamplers runs after initial pass + // construction — i.e. on every live graph edit — which is the + // "presentation reverts to Image 1" regression. + // + // Leaving it as a no-op is correct: sink-sampler updates targeting inlet + // items are already handled by GpuRenderer::update's per-frame + // copyTexture path (GPUNode.cpp:~470), which reads rt.texture fresh + // every frame. + void updateInputTexture( + const score::gfx::Port& input, QRhiTexture* tex, + QRhiTexture* depthTex = nullptr) override { - auto& rhi = *renderer.state.rhi; + } + void initState(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) override + { + auto& rhi = *renderer.state.rhi; // Init the texture on which we are going to render // FIXME RGBA32F m_internalTex = score::gfx::createRenderTarget( renderer.state, QRhiTexture::RGBA8, renderer.state.renderSize, renderer.state.samples, true); - // Init basic rendering ubos - const auto& mesh = renderer.defaultQuad(); - defaultMeshInit(renderer, mesh, res); + // Use the quad mesh (GenericNodeRenderer::initState would default to + // triangle). The inherited addOutputPass uses m_mesh to build pipelines. + m_mesh = &renderer.defaultQuad(); + defaultMeshInit(renderer, *m_mesh, res); processUBOInit(renderer); std::tie(m_vertexS, m_fragmentS) = score::gfx::makeShaders(renderer.state, vertex_shader, fragment_shader); @@ -275,82 +324,97 @@ void main () m_samplers.push_back({sampler, m_internalTex.texture}); } - defaultPassesInit(renderer, mesh); + // Acquire the Engine. release() drops the map entry and our own + // ref, so we always get a fresh Engine here — tying the Qt Quick + // runtime lifetime strictly to (initState, release) lets us free + // all QRhi-owned buffers before the RHI itself is destroyed in + // Graph::~Graph. + auto [key, engine] = node.acquireEngine(&rhi); + m_engineKey = key; + m_engine = engine; + if(!m_engine) + { + m_initialized = true; + return; + } - // Init the QQuick render stuff - m_renderControl = new QQuickRenderControl{}; - m_window = new QQuickWindow{m_renderControl}; + SCORE_ASSERT(!m_engine->m_quickWindow); + m_engine->m_quickRenderControl = new QQuickRenderControl{}; + m_engine->m_quickWindow = new QQuickWindow{m_engine->m_quickRenderControl}; #if QT_HAS_VULKAN if(renderer.state.api == score::gfx::GraphicsApi::Vulkan) - { - m_window->setVulkanInstance(score::gfx::staticVulkanInstance()); - } + m_engine->m_quickWindow->setVulkanInstance( + score::gfx::staticVulkanInstance()); #endif if(auto win = renderer.state.window.lock()) { QObject::connect( - win.get(), &score::gfx::Window::interactiveEvent, m_window, - [qqw = QPointer{m_window}](QEvent* e) { + win.get(), &score::gfx::Window::interactiveEvent, + m_engine->m_quickWindow, + [qqw = QPointer{m_engine->m_quickWindow}](QEvent* e) { if(auto q = qqw.get()) QCoreApplication::sendEvent(q, e); }, Qt::DirectConnection); } - m_window->setGraphicsDevice(QQuickGraphicsDevice::fromRhi(&rhi)); - + m_engine->m_quickWindow->setGraphicsDevice( + QQuickGraphicsDevice::fromRhi(&rhi)); + m_engine->m_quickWindow->setColor(Qt::transparent); + m_engine->m_quickRenderControl->initialize(); + // Mark the window as "visible" so QQuickItem::grabToImage() works. + // The window is driven by QQuickRenderControl (no native OS + // window) — this only sets the internal flag. + QQuickWindowPrivate::get(m_engine->m_quickWindow)->visible = true; + + m_window = m_engine->m_quickWindow; + m_renderControl = m_engine->m_quickRenderControl; + + // Size and render target are per-RenderList and must be refreshed + // on every initState() (resize changes the RT dimensions). const auto sz = renderer.state.renderSize; m_window->setWidth(sz.width()); m_window->setHeight(sz.height()); m_window->contentItem()->setWidth(sz.width()); - m_window->contentItem()->setWidth(sz.height()); - m_window->setColor(Qt::transparent); - - m_renderControl->initialize(); + m_window->contentItem()->setHeight(sz.height()); m_window->setRenderTarget( QQuickRenderTarget::fromRhiRenderTarget(m_internalTex.renderTarget)); - // Mark the window as "visible" so that QQuickItem::grabToImage() works. - // The window is managed by QQuickRenderControl (no native OS window), - // so this only sets the internal flag without creating a real window. - QQuickWindowPrivate::get(m_window)->visible = true; + m_engine->init(*this, node, m_window, renderer); + // Tolerant of script/port mismatches (live-edited QML may not line up + // with the node's declared ports): skip bad inlets instead of aborting. + // Mirrors Engine::setupComponent's guards. + for(auto& [texture_in, i] : this->m_engine->m_texInlets) + { + if(i >= (int)this->node.input.size()) + continue; + score::gfx::Port* port = this->node.input[i]; + if(!port || port->type != score::gfx::Types::Image) + continue; + auto rt = renderer.renderTargetForInputPort(*port); + auto item = qobject_cast(texture_in->item()); + if(item && rt.texture) + item->setSize(rt.texture->pixelSize()); + } + sourceIndex.store(node.sourceIndex.load()); + m_initialized = true; } void reloadEngine(score::gfx::RenderList& renderer) { - auto* rhi = renderer.state.rhi; - auto oldSourceIndex = this->sourceIndex.exchange(this->node.sourceIndex); - //= std::exchange(this->sourceIndex, this->node.sourceIndex.load()); - // yes technically there is the overflow case but it's 2^64 editions away... - if(oldSourceIndex < this->node.sourceIndex) - { - if(m_engine) - { - m_engine->releaseItem(); - } - - node.releaseEngine(rhi); - m_engine.reset(); - auto [key, engine] = node.acquireEngine(rhi); - m_tid = key.id; - m_engine = engine; - if(m_engine) - { - m_engine->init(*this, node, m_window); + // Guard: initState() bails out early if Engine acquisition failed, + // leaving m_window/m_renderControl/m_engine null. update() can still + // be invoked in that degraded state — short-circuit here. + if(!m_window || !m_renderControl || !m_engine) + return; - for(auto& [texture_in, i] : this->m_engine->m_texInlets) - { - SCORE_ASSERT(this->node.input.size() > i); - score::gfx::Port* port = this->node.input[i]; - SCORE_ASSERT(port->type == score::gfx::Types::Image); - auto rt = renderer.renderTargetForInputPort(*port); - auto item = qobject_cast(texture_in->item()); - SCORE_ASSERT(item); - if(rt.texture) - item->setSize(rt.texture->pixelSize()); - } - } - } + // NOTE: GpuNode::sourceIndex is fixed at 1 and never incremented (the + // incrementer that drove the in-place script reload was removed), so the + // GpuRenderer::sourceIndex seeded in initState() always equals it. The + // mid-play "drop the QML tree, keep the QQuickWindow, re-init" reload + // branch that used to live here was therefore dead code and has been + // removed. A live script change currently goes through a full + // releaseState()/initState() cycle instead. } void update( @@ -360,30 +424,64 @@ void main () reloadEngine(renderer); defaultUBOUpdate(renderer, res); - // Schedule a copy of the input textures into the actual textures + if(!m_engine) + return; + + // Schedule a copy of the input textures into the actual textures. + // Tolerant of script/port mismatches (live-edited QML): skip bad inlets + // instead of asserting. Mirrors Engine::setupComponent's guards. { for(auto& [texture_in, i] : this->m_engine->m_texInlets) { - SCORE_ASSERT(this->node.input.size() > i); + if(i >= (int)this->node.input.size()) + continue; score::gfx::Port* port = this->node.input[i]; - SCORE_ASSERT(port->type == score::gfx::Types::Image); + if(!port || port->type != score::gfx::Types::Image) + continue; auto rt = renderer.renderTargetForInputPort(*port); auto item = qobject_cast(texture_in->item()); - SCORE_ASSERT(item); + if(!item) + continue; auto itemRenderer = item->renderer; auto texture = item->texture; if(itemRenderer && texture && rt.texture) { - if(rt.texture->pixelSize() == texture->pixelSize() - && rt.texture->sampleCount() == texture->sampleCount()) + const bool sameSize = rt.texture->pixelSize() == texture->pixelSize(); + const bool sameSamples + = rt.texture->sampleCount() == texture->sampleCount(); + if(sameSize && sameSamples) { QRhiTextureCopyDescription desc; res.copyTexture(texture, rt.texture, desc); } + else if(!sameSize) + { + // The upstream RT changed dimensions since the last initState(). + // Resize the inlet item so Qt Quick rebuilds its QSGRhiLayer at + // the new size; this frame's copy is intentionally skipped + // (src/dst pair is mismatched) and the next update() will copy + // correctly once the layer texture is recreated. + item->setSize(rt.texture->pixelSize()); + } else { - qDebug() << "Mismatch!!!" << rt.texture->pixelSize() << texture->pixelSize() - << rt.texture->sampleCount() << texture->sampleCount(); + // Size matches but sample count differs (e.g. the inlet item's + // QSGRhiLayer is single-sampled while the upstream RT is MSAA). + // QRhi::copyTexture requires matching sample counts, so the copy + // can't run and setSize() is a no-op here — without a diagnostic + // the inlet would stay silently black. We can't resolve/recreate + // the layer at a different sample count from outside Qt Quick, so + // the defined fallback is: skip the copy (the inlet keeps its + // last content rather than showing undefined data) and warn once + // per item so the condition is observable. + if(m_warnedSampleMismatch.insert(item).second) + { + qWarning() << "JS::GPUNode: texture inlet" << i + << "sample-count mismatch (upstream" + << rt.texture->sampleCount() << "vs inlet" + << texture->sampleCount() + << ") - copy skipped, inlet may appear stale/black"; + } } } } @@ -406,6 +504,8 @@ void main () score::gfx::RenderList& renderer, QRhiCommandBuffer& cb, QRhiResourceUpdateBatch*& res, score::gfx::Edge& e) override { + if(!m_window || !m_renderControl || !m_engine) + return; // Here we run the Qt Quick render loop which handles its own pass if(auto sz = m_window->size(); sz != m_window->contentItem()->size()) { @@ -429,7 +529,6 @@ void main () item->update(); } } - // 2. Render m_window->beforeRendering(); @@ -439,7 +538,6 @@ void main () cd->deliveryAgentPrivate()->flushFrameSynchronousEvents(m_window); cd->polishItems(); - m_window->afterRendering(); m_window->afterAnimating(); @@ -454,20 +552,55 @@ void main () cd->syncSceneGraph(); rc->rc->endSync(); - // render: cd->renderSceneGraph(); - // endFrame: m_window->afterFrameEnd(); + // Disassociate our transient cb — Qt's own qsgrhisupport pairs + // setCustomCommandBuffer(cb) with setCustomCommandBuffer(nullptr) + // to avoid leaving a dangling pointer past the frame. + cd->setCustomCommandBuffer(nullptr); + // Symmetric reset of QQuickRenderControlPrivate::cb. The earlier + // assignment at `rc->cb = &cb` (line ~523) bound the private field + // to a stack reference parameter; without this nullptr reset the + // pointer dangled into reclaimed stack memory after the frame + // returned. Whether Qt internals dereferenced it between frames + // depended on the QQuickRenderControlPrivate event-loop paths + // (animation tick / glyph upload completion / sync without render), + // but the fix is one line either way and removes the foot-gun. + rc->cb = nullptr; + + // Force-drain Qt Quick's glyph-cache resource-update batch. The batch + // is lazily allocated in preprocess() (storeGlyphs → createTexture → + // glyphCacheResourceUpdates) and is normally released when a glyph + // node renders and calls commitResourceUpdates. When the QML scene + // has no glyph node, preprocess still populates the cache but no + // draw ever commits → the batch stays pinned, permanently consuming + // one slot of the 64-slot QRhi pool *per render context*. Each + // window resize spawns a fresh QQuickRenderControl + render context, + // so after a handful of resizes the pool exhausts and SIGSEGV lands + // inside QSGRhiDistanceFieldGlyphCache::createTexture. Merge any + // pending uploads into our outer batch so they still land, then + // reset the context's pointer so the pool slot returns. + if(auto* rcp = QQuickRenderControlPrivate::get(m_renderControl)) + { + if(auto* defRc = qobject_cast(rcp->rc)) + { + if(auto* pending = defRc->maybeGlyphCacheResourceUpdates()) + { + if(res) + res->merge(pending); + defRc->resetGlyphCacheResources(); + } + } + } if(m_engine && m_engine->m_engine) { m_engine->m_engine->collectGarbage(); } - - QEvent* updateRequest = new QEvent(QEvent::UpdateRequest); - QCoreApplication::postEvent(m_window, updateRequest); + // No UpdateRequest post needed: runInitialPasses drives sync/render + // directly via polishItems/syncSceneGraph/renderSceneGraph each frame. } void runRenderPass( @@ -476,16 +609,12 @@ void main () { const auto& mesh = renderer.defaultQuad(); defaultRenderPass(renderer, mesh, cb, edge); - m_window->frameSwapped(); + if(m_window) + m_window->frameSwapped(); } - void release(score::gfx::RenderList& r) override + void releaseState(score::gfx::RenderList& r) override { - if(m_engine) - { - m_engine->releaseItem(); - } - for(auto sampler : m_inputSamplers) { delete sampler.sampler; @@ -493,16 +622,48 @@ void main () } m_inputSamplers.clear(); - if(m_window) - { - m_window->deleteLater(); - m_window = nullptr; - } - - if(m_renderControl) + // Tear down the Engine here — this is the last hook we get while + // the QRhi is still alive. Graph::~Graph calls RenderList::release() + // before out->destroyOutput() (which calls RenderState::destroy(), + // killing the RHI); the GpuRenderer destructor runs later, after + // the RHI is gone, so any QRhi-owned buffers still held by the + // QQuickRenderControl/QQuickWindow would leak (VUID-vkDestroyDevice + // validation fires at process exit). + // + // An earlier version kept the Engine alive across release+init to + // avoid re-creating the Qt Quick scene graph on every window + // resize, because each cycle pinned ~1 batch slot in Qt Quick's + // response to setRenderTarget. That workaround is no longer needed: + // the real batch-pool exhaustion was SimpleRenderedISFNode::initPass + // leaking an unsubmitted batch per addOutputPass (fixed separately), + // and Qt Quick's per-cycle slot churn alone doesn't exhaust the + // 64-slot pool in practice. + // + // Living in releaseState() (not release()) is what lets live graph + // edits that make this node unreachable actually free the Engine: + // Graph::reconcileAllRenderLists calls releaseState() on orphaned + // renderers, never release(). A previous version had the teardown + // in release(), which meant node.releaseEngine() never ran on a + // live disconnect — the next reconnection's acquireEngine returned + // the stale entry with m_quickWindow already set and tripped the + // SCORE_ASSERT in initState(). + // + // USER-VISIBLE BEHAVIOR (known tradeoff): destroying the Engine here + // discards the entire QML runtime — the QQmlEngine, the Script object + // and ALL its script-side runtime state (JS variables, timers, + // accumulated/animation state, etc.). Because releaseState()/initState() + // run on every output resize (the render-target dimensions change), a + // mid-performance window/output resize silently restarts the user's + // script from scratch. Only the declared model state (node.m_modelState, + // replayed via Script.loadState() in Engine::setupComponent) survives; + // anything the script kept in plain JS variables is lost. This is + // accepted for the deterministic-teardown lifetime guarantees above. + m_window = nullptr; + m_renderControl = nullptr; + if(m_engine) { - m_renderControl->deleteLater(); - m_renderControl = nullptr; + m_engine.reset(); + node.releaseEngine(m_engineKey); } m_internalTex.release(); @@ -510,15 +671,23 @@ void main () defaultRelease(r); } + void release(score::gfx::RenderList& r) override { releaseState(r); } + score::gfx::TextureRenderTarget m_internalTex; QQuickRenderControl* m_renderControl{}; QQuickWindow* m_window{}; ossia::spsc_queue m_messages; - std::thread::id m_tid; + // Key under which our Engine was inserted in node.m_engines at acquire + // time. We release by this stored key (see GpuNode::releaseEngine). + JS::engine_key m_engineKey{}; std::shared_ptr m_engine; + // Texture inlet items for which a sample-count mismatch has already been + // reported, to rate-limit the warning to once per item (see update()). + std::set m_warnedSampleMismatch; + friend struct GpuNode; }; @@ -576,8 +745,9 @@ GpuNode::GpuNode( } } } -GpuNode::~GpuNode() { } - +GpuNode::~GpuNode() +{ +} void GpuNode::Engine::tick() { @@ -653,21 +823,52 @@ GpuNode::Engine::~Engine() m_context = nullptr; m_engine = nullptr; // Not owned here! + + // Destroy the persistent Qt Quick runtime synchronously. Order matches + // Qt's own QQuickWidget: QQuickRenderControl first (its destructor + // calls invalidate() and deletes the QSGRenderContext), then the + // QQuickWindow. + delete m_quickRenderControl; + m_quickRenderControl = nullptr; + delete m_quickWindow; + m_quickWindow = nullptr; } void GpuNode::Engine::releaseItem() { - qDebug(Q_FUNC_INFO); if(m_item) { + // LOAD-BEARING: these two detach calls must precede deleteLater(). + // The immediate caller (GpuRenderer::reloadEngine, GPUNode.cpp:419-420) + // follows this with init(), whose QML reactive bindings and child-walkers + // must not observe the dying item. setParentItem(nullptr) removes it from + // contentItem->childItems() synchronously; setParent(nullptr) severs the + // QObject ownership chain. deleteLater() then safely defers actual + // destruction to the next event loop tick. Collapsing the two detach + // calls into deleteLater() alone would briefly expose two items under + // contentItem to the new createItem(), breaking the scene graph. m_item->setParent(nullptr); m_item->setParentItem(nullptr); m_item->deleteLater(); m_item = nullptr; } + // A script reload destroys the whole QML tree. Clear the script- + // associated state here so Engine::init()'s `if(!m_item)` rebuild + // path can recreate everything cleanly without leaking the old + // component/object or appending to the inlet vectors. + delete m_object; + m_object = nullptr; + delete m_component; + m_component = nullptr; + m_jsInlets.clear(); + m_ctrlInlets.clear(); + m_impulseInlets.clear(); + m_valInlets.clear(); + m_texInlets.clear(); } -void GpuNode::Engine::setupComponent(GpuRenderer& renderer, GpuNode& node) +void GpuNode::Engine::setupComponent( + GpuRenderer& renderer, GpuNode& node, score::gfx::RenderList& rl) { // FIXME refactor with CPUNode // FIXME only works because same thread right now. @@ -685,18 +886,13 @@ void GpuNode::Engine::setupComponent(GpuRenderer& renderer, GpuNode& node) }, Qt::QueuedConnection); }, Qt::DirectConnection); - if(const auto& on_load = m_object->loadState(); on_load.isCallable()) - { - QVariantMap vm; - for(auto& [k, v]: node.m_modelState) { - if(auto res = v.apply(ossia::qt::ossia_to_qvariant{}); res.isValid()) - vm[k] = std::move(res); - } - on_load.call({m_engine->toScriptValue(vm)}); - } - + // (1) Enumerate QML children into the typed inlet vectors FIRST. loadState() + // below fires reactive bindings like `ShaderEffectSource.sourceItem = + // root.inletItems[src]`; those need each inlet item to already be at its + // final pixel size so QQuickShaderEffectSource::updatePaintNode + // (qquickshadereffectsource.cpp:657-664) does not take the "source item + // is 0x0, delete paint node, return nullptr" branch on the first sync. int input_i = 0; - for(auto n : m_object->children()) { if(auto imp_in = qobject_cast(n)) @@ -725,6 +921,44 @@ void GpuNode::Engine::setupComponent(GpuRenderer& renderer, GpuNode& node) input_i++; } } + + // (2) Size each texture-inlet item to its upstream RT's pixel size BEFORE + // loadState runs. QML's Component.onCompleted has already rebound each + // inlet item's width/height to inletContainer.width/.height via + // Qt.binding (presentation.qml:50-53), and inletContainer is 0x0 at + // this point because outputRoot hasn't been reparented to contentItem + // yet (updateItemTextureOut runs after this). Setting the size + // explicitly breaks that binding and pins each item to the RT pixel + // size — which is exactly what the copyTexture(rt.texture -> + // item->texture) in GpuRenderer::update requires anyway (that copy is + // skipped on any pixelSize mismatch — GPUNode.cpp:456-466). + for(auto& [texture_in, i] : m_texInlets) + { + if(i >= (int)node.input.size()) + continue; + score::gfx::Port* port = node.input[i]; + if(!port || port->type != score::gfx::Types::Image) + continue; + auto rt = rl.renderTargetForInputPort(*port); + auto* item = qobject_cast(texture_in->item()); + if(item && rt.texture) + item->setSize(rt.texture->pixelSize()); + } + + // (3) Now run loadState. Every ShaderEffectSource that resolves its + // sourceItem to an inletItem during the stateVersion++ re-binding pass + // will see a non-zero-sized source item and the first scene-graph sync + // will create its QSGRhiLayer (qsgrhilayer.cpp:248-254 "!m_item || + // m_pixelSize.isEmpty()" branch is avoided). + if(const auto& on_load = m_object->loadState(); on_load.isCallable()) + { + QVariantMap vm; + for(auto& [k, v]: node.m_modelState) { + if(auto res = v.apply(ossia::qt::ossia_to_qvariant{}); res.isValid()) + vm[k] = std::move(res); + } + on_load.call({m_engine->toScriptValue(vm)}); + } } void GpuNode::Engine::updateItemTextureOut(QQuickWindow* window) @@ -744,14 +978,15 @@ void GpuNode::Engine::updateItemTextureOut(QQuickWindow* window) } } -void GpuNode::Engine::createItem(GpuRenderer& renderer, GpuNode& node) +void GpuNode::Engine::createItem( + GpuRenderer& renderer, GpuNode& node, score::gfx::RenderList& rl) { m_component = new QQmlComponent{this->m_engine.get()}; m_component->setData(node.source.toUtf8(), QUrl::fromLocalFile(node.m_root)); if(m_component->isError()) { - qDebug() << m_component->errorString(); + qWarning() << m_component->errorString(); return; } @@ -763,10 +998,12 @@ void GpuNode::Engine::createItem(GpuRenderer& renderer, GpuNode& node) return; } - setupComponent(renderer, node); + setupComponent(renderer, node, rl); } -void GpuNode::Engine::init(GpuRenderer& renderer, GpuNode& node, QQuickWindow* window) +void GpuNode::Engine::init( + GpuRenderer& renderer, GpuNode& node, QQuickWindow* window, + score::gfx::RenderList& rl) { if(!m_item) { @@ -784,13 +1021,13 @@ void GpuNode::Engine::init(GpuRenderer& renderer, GpuNode& node, QQuickWindow* w if(!m_context) { m_context = new QQmlContext{m_engine.get()}; - m_execFuncs = new DeviceContext{*m_engine}; + m_execFuncs = new DeviceContext{*m_engine, m_context}; m_execFuncs->init(); m_context->setContextProperty("Device", m_execFuncs); setupExecFuncs(this, &node, m_execFuncs->m_impl); } - createItem(renderer, node); + createItem(renderer, node, rl); } updateItemTextureOut(window); @@ -854,62 +1091,45 @@ void gpu_exec_node::setScript( exec_context->ui->unregister_node(id); id = score::gfx::invalid_node_index; - //if(id < 0) + auto n = std::make_unique( + m_context, std::move(new_state), root, str, this->root_inputs(), + this->root_outputs()); + { - auto n = std::make_unique( - m_context, std::move(new_state), root, str, this->root_inputs(), - this->root_outputs()); + auto& element = *m_context; + n->moveToThread(m_context->thread()); + n->m_uiContext = m_context; + n->m_messageToUi = [ctx=m_context] (const QVariant& v){ + OSSIA_ENSURE_CURRENT_THREAD_KIND(ossia::thread_type::Ui); + if(!ctx) + return; + ctx->executionToUi(v); + }; + + QObject::connect( + &element, &JS::ProcessModel::uiToExecution, n.get(), &JS::GpuNode::uiMessage); + QObject::connect( + &element, &JS::ProcessModel::stateElementChanged, n.get(), + &JS::GpuNode::stateElementChanged); { - auto& element = *m_context; - - n->moveToThread(m_context->thread()); - n->m_uiContext = m_context; - n->m_messageToUi = [ctx=m_context] (const QVariant& v){ - OSSIA_ENSURE_CURRENT_THREAD_KIND(ossia::thread_type::Ui); - if(!ctx) - return; - ctx->executionToUi(v); - }; - QObject::connect( - &element, &JS::ProcessModel::uiToExecution, n.get(), &JS::GpuNode::uiMessage); - QObject::connect( - &element, &JS::ProcessModel::stateElementChanged, n.get(), - &JS::GpuNode::stateElementChanged); + int i = 0; + for(auto& ctl : element.inlets()) { - - int i = 0; - for(auto& ctl : element.inlets()) + if(auto ctrl = qobject_cast(ctl)) { - if(auto ctrl = qobject_cast(ctl)) - { - ossia::texture_inlet& inl - = static_cast(*root_inputs()[i]); - n->process(i, inl.data); // Setup render_target_spec - // FIXME this should be done at a more general level, right now it's only done here - // and in avendish nodes - } - i++; + ossia::texture_inlet& inl + = static_cast(*root_inputs()[i]); + n->process(i, inl.data); // Setup render_target_spec + // FIXME this should be done at a more general level, right now it's only done here + // and in avendish nodes } + i++; } } - id = exec_context->ui->register_node(std::move(n)); - } - /* - else - { - // FIXME need to update the ports if they changed on the host side! - auto msg = exec_context->allocateMessage(1); - msg.node_id = id; - msg.input.emplace_back(score::gfx::FunctionMessage{[str](score::gfx::Node& nn) { - auto& n = static_cast(nn); - n.source = str; // FIXME mutex - n.sourceIndex++; - }}); - exec_context->ui->send_message(std::move(msg)); } -*/ + id = exec_context->ui->register_node(std::move(n)); } } #endif diff --git a/src/plugins/score-plugin-js/JS/Qml/EditContext.hpp b/src/plugins/score-plugin-js/JS/Qml/EditContext.hpp index b274479a26..99f2820a47 100644 --- a/src/plugins/score-plugin-js/JS/Qml/EditContext.hpp +++ b/src/plugins/score-plugin-js/JS/Qml/EditContext.hpp @@ -1,4 +1,5 @@ #pragma once +#include #include #include @@ -157,7 +158,9 @@ class SCORE_PLUGIN_JS_EXPORT EditJsContext : public QObject W_SLOT(outlets) QObject* createCable(QObject* outlet, QObject* inlet); - W_SLOT(createCable) + W_SLOT(createCable, (QObject*, QObject*)) + QObject* createCable(QObject* outlet, QObject* inlet, Process::CableType type); + W_SLOT(createCable, (QObject*, QObject*, Process::CableType)) void setAddress(QObject* obj, QString addr); W_SLOT(setAddress) diff --git a/src/plugins/score-plugin-js/JS/Qml/EditContext.port.cpp b/src/plugins/score-plugin-js/JS/Qml/EditContext.port.cpp index b5846702f7..bb1549e8cf 100644 --- a/src/plugins/score-plugin-js/JS/Qml/EditContext.port.cpp +++ b/src/plugins/score-plugin-js/JS/Qml/EditContext.port.cpp @@ -103,6 +103,12 @@ int EditJsContext::outlets(QObject* obj) } QObject* EditJsContext::createCable(QObject* outlet, QObject* inlet) +{ + return createCable(outlet, inlet, Process::CableType::ImmediateGlutton); +} + +QObject* +EditJsContext::createCable(QObject* outlet, QObject* inlet, Process::CableType tp) { auto doc = ctx(); if(!doc) @@ -118,7 +124,7 @@ QObject* EditJsContext::createCable(QObject* outlet, QObject* inlet) auto& root = score::IDocument::get(doc->document); auto [m, _] = macro(*doc); - auto& c = m->createCable(root, *src, *sink, Process::CableType::ImmediateGlutton); + auto& c = m->createCable(root, *src, *sink, tp); return &c; } diff --git a/src/plugins/score-plugin-threedim/CMakeLists.txt b/src/plugins/score-plugin-threedim/CMakeLists.txt index cf29d7946f..16ffced3ac 100644 --- a/src/plugins/score-plugin-threedim/CMakeLists.txt +++ b/src/plugins/score-plugin-threedim/CMakeLists.txt @@ -14,6 +14,24 @@ endif() find_package(${QT_VERSION} REQUIRED COMPONENTS Xml) +# fastgltf — vendored glTF 2.0 parser. The library auto-downloads simdjson +# on first configure (into 3rdparty/fastgltf/deps/simdjson/) unless a +# system simdjson is found via find_package. +if(NOT TARGET fastgltf) + set(FASTGLTF_COMPILE_AS_CPP20 ON CACHE BOOL "" FORCE) + add_subdirectory("${3RDPARTY_FOLDER}/fastgltf" "${CMAKE_CURRENT_BINARY_DIR}/fastgltf" EXCLUDE_FROM_ALL) +endif() + +# spz — Niantic / Adobe reference SPZ decoder for compressed 3DGS files. +# v1-3 only (v4/ZSTD stubbed; see 3rdparty/spz/CMakeLists.txt). Pulls +# in zlib via ZLIB::ZLIB. +if(NOT TARGET spz) + set(SPZ_BUILD_TOOLS OFF CACHE BOOL "" FORCE) + set(SPZ_BUILD_PYTHON_BINDINGS OFF CACHE BOOL "" FORCE) + set(SPZ_BUILD_EXTENSIONS OFF CACHE BOOL "" FORCE) + add_subdirectory("${3RDPARTY_FOLDER}/spz" "${CMAKE_CURRENT_BINARY_DIR}/spz" EXCLUDE_FROM_ALL) +endif() + # libssynth add_library( ssynth STATIC @@ -77,12 +95,125 @@ add_library( Threedim/GeometryToBufferStrategies.cpp Threedim/Noise.hpp Threedim/Noise.cpp - Threedim/ObjLoader.hpp - Threedim/ObjLoader.cpp + Threedim/GeometryLoader.hpp + Threedim/GeometryLoader.cpp + Threedim/AssetLoader.hpp + Threedim/AssetLoader.cpp + Threedim/FbxParser.hpp + Threedim/FbxParser.cpp + Threedim/GltfParser.hpp + Threedim/GltfParser.cpp + Threedim/VcgImporters.hpp + Threedim/VcgImporters.cpp + Threedim/Camera.hpp + Threedim/Camera.cpp + Threedim/CameraArray.hpp + Threedim/CameraArray.cpp + Threedim/CameraSwitch.hpp + Threedim/Light.hpp + Threedim/Light.cpp + Threedim/Transform3D.hpp + Threedim/Transform3D.cpp + Threedim/TransformHelper.hpp + + Threedim/ScenePreprocessor/Executor.hpp + Threedim/ScenePreprocessor/Executor.cpp + Threedim/ScenePreprocessor/Metadata.hpp + Threedim/ScenePreprocessor/Process.hpp + Threedim/ScenePreprocessor/Process.cpp + + Threedim/SceneFilter/Executor.hpp + Threedim/SceneFilter/Executor.cpp + Threedim/SceneFilter/Metadata.hpp + Threedim/SceneFilter/Process.hpp + Threedim/SceneFilter/Process.cpp + + Threedim/FlattenedSceneFilter/Executor.hpp + Threedim/FlattenedSceneFilter/Executor.cpp + Threedim/FlattenedSceneFilter/Metadata.hpp + Threedim/FlattenedSceneFilter/Process.hpp + Threedim/FlattenedSceneFilter/Process.cpp + + Threedim/MergeGeometries/Executor.hpp + Threedim/MergeGeometries/Executor.cpp + Threedim/MergeGeometries/Metadata.hpp + Threedim/MergeGeometries/Process.hpp + Threedim/MergeGeometries/Process.cpp + + Threedim/SceneGraphFilter.hpp + Threedim/SceneGraphFilter.cpp + Threedim/SceneSwitch.hpp + Threedim/SceneSelector.hpp + Threedim/SceneSelector.cpp + Threedim/SceneGroup.hpp + Threedim/SceneGroup.cpp + Threedim/SceneDuplicator.hpp + Threedim/SceneDuplicator.cpp + Threedim/SceneFromMeshes.hpp + Threedim/SceneFromMeshes.cpp + Threedim/SceneInspector.hpp + Threedim/SceneInspector.cpp + Threedim/CreateCollection.hpp + Threedim/CreateCollection.cpp + Threedim/SceneResourceRoute.hpp + Threedim/SceneResourceRoute.cpp + Threedim/InjectBuffer.hpp + Threedim/InjectBuffer.cpp + Threedim/InjectTexture.hpp + Threedim/InjectTexture.cpp + Threedim/TagAs.hpp + Threedim/TagAs.cpp + Threedim/PBRMesh.hpp + Threedim/PBRMesh.cpp + Threedim/MaterialOverride.hpp + Threedim/MaterialOverride.cpp + Threedim/ConfigurePrimitive.hpp + Threedim/ConfigurePrimitive.cpp + Threedim/Instancer.hpp + Threedim/Instancer.cpp + Threedim/ShadowCascadeSetup.hpp + Threedim/ShadowCascadeSetup.cpp + Threedim/EnvironmentLoader.hpp + Threedim/EnvironmentLoader.cpp + Threedim/AnimationPlayer.hpp + Threedim/AnimationPlayer.cpp + Threedim/HumanoidPose.hpp + Threedim/HumanoidPresets.hpp + Threedim/HumanoidRetarget.hpp + Threedim/HumanoidSourceAdapters.hpp + Threedim/HumanoidSourceMaps.hpp + Threedim/InverseKinematics.hpp + Threedim/TextToMesh.hpp + Threedim/TextToMesh.cpp + Threedim/TextToTexture.hpp + Threedim/ExtractBuffer2.hpp + Threedim/ExtractBuffer2.cpp + Threedim/ExtractSceneBuffer.hpp + Threedim/ExtractSceneBuffer.cpp + Threedim/ExtractTexture.hpp + Threedim/ExtractTexture.cpp + Threedim/BufferInfo.hpp + Threedim/TextureInfo.hpp + Threedim/ImageLoader.hpp + Threedim/ImageLoader.cpp + Threedim/TangentUtils.hpp + Threedim/BufferToGeometryCommon.hpp + Threedim/Debug.hpp + Threedim/MeshHelpers.hpp Threedim/PCLToGeometry.hpp Threedim/PCLToGeometry.cpp Threedim/Ply.hpp Threedim/Ply.cpp + Threedim/PrimitiveCloud/PlyParser.hpp + Threedim/PrimitiveCloud/PlyParser.cpp + Threedim/PrimitiveCloud/SplatBinary.hpp + Threedim/PrimitiveCloud/SplatBinary.cpp + Threedim/PrimitiveCloud/SpzCodec.hpp + Threedim/PrimitiveCloud/SpzCodec.cpp + Threedim/PrimitiveCloud/SceneFromCloud.hpp + Threedim/PrimitiveCloud/SceneFromCloud.cpp + Threedim/PrimitiveCloud/FormatOverride.hpp + Threedim/PrimitiveCloud/FormatOverride.cpp Threedim/Primitive.hpp Threedim/Primitive.cpp Threedim/StructureSynth.hpp @@ -107,16 +238,9 @@ add_library( Threedim/RenderPipeline/Process.cpp Threedim/RenderPipeline/Layer.hpp - Threedim/Splat/Executor.hpp - Threedim/Splat/Executor.cpp - Threedim/Splat/Metadata.hpp - Threedim/Splat/Process.hpp - Threedim/Splat/Process.cpp - Threedim/Splat/GaussianSplatNode.hpp - Threedim/Splat/GaussianSplatNode.cpp - "${3RDPARTY_FOLDER}/miniply/miniply.cpp" "${3RDPARTY_FOLDER}/mikktspace/mikktspace.c" + "${3RDPARTY_FOLDER}/ufbx/ufbx.c" score_plugin_threedim.hpp score_plugin_threedim.cpp) @@ -132,8 +256,10 @@ target_include_directories( "${3RDPARTY_FOLDER}/vcglib" "${3RDPARTY_FOLDER}/miniply" "${3RDPARTY_FOLDER}/mikktspace" - "${3RDPARTY_FOLDER}/opengametools/src") + "${3RDPARTY_FOLDER}/opengametools/src" + "${3RDPARTY_FOLDER}/ufbx") target_link_libraries( score_plugin_threedim PRIVATE score_plugin_engine score_plugin_avnd - score_plugin_gfx fmt::fmt ssynth Eigen3::Eigen) + score_plugin_gfx fmt::fmt ssynth Eigen3::Eigen + fastgltf::fastgltf spz) diff --git a/src/plugins/score-plugin-threedim/Threedim/AnimationPlayer.cpp b/src/plugins/score-plugin-threedim/Threedim/AnimationPlayer.cpp new file mode 100644 index 0000000000..d2d2cf02a0 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/AnimationPlayer.cpp @@ -0,0 +1,442 @@ +#include "AnimationPlayer.hpp" + +#include +#include + +#include +#include +#include +#include +#include + +namespace Threedim +{ + +namespace +{ + +// Accumulated TRS override for a single scene_node. Any missing field +// (has_* == false) leaves the original value untouched. +struct TRSOverride +{ + float translation[3]{}; + float rotation[4]{}; // quaternion x,y,z,w + float scale[3]{}; + bool has_translation{false}; + bool has_rotation{false}; + bool has_scale{false}; +}; + +using TRSMap = std::unordered_map; + +// Binary-search for the segment `[times[i], times[i+1]]` that contains `t`. +// Returns (i, alpha) with alpha ∈ [0, 1). For t at or after the last +// keyframe, returns (n-2, 1) so the caller lands on the final value. +struct SegmentLookup +{ + std::size_t lower{}; + float alpha{}; +}; + +SegmentLookup +findSegment(const std::vector& times, float t) noexcept +{ + const std::size_t n = times.size(); + if(n == 0) + return {0, 0.f}; + if(n == 1 || t <= times.front()) + return {0, 0.f}; + if(t >= times.back()) + return {n - 1, 1.f}; // alpha unused in the lerp when clamped below + + // std::upper_bound finds the first key > t → segment is its left neighbour. + auto it = std::upper_bound(times.begin(), times.end(), t); + const std::size_t upper = std::size_t(it - times.begin()); + const std::size_t lower = upper - 1; + const float t0 = times[lower]; + const float t1 = times[upper]; + const float span = t1 - t0; + const float alpha = span > 1e-8f ? (t - t0) / span : 0.f; + return {lower, alpha}; +} + +// Lerp for scalars / vec3 / vec4 depending on `stride`. Step and linear +// covered; cubic_spline is treated as linear for this first pass (proper +// cubic_spline keyframes pack `in-tangent, value, out-tangent` per slot +// at 3× stride — handling it right requires knowing the format, added +// later). +void sampleLinear( + const std::vector& values, std::size_t stride, SegmentLookup s, + float* out) noexcept +{ + const std::size_t n = values.size() / stride; + if(n == 0) + return; + if(s.lower >= n - 1 || s.alpha <= 0.f) + { + const std::size_t idx = std::min(s.lower, n - 1); + std::memcpy(out, values.data() + idx * stride, stride * sizeof(float)); + return; + } + const float* a = values.data() + s.lower * stride; + const float* b = values.data() + (s.lower + 1) * stride; + const float alpha = s.alpha; + for(std::size_t i = 0; i < stride; ++i) + out[i] = a[i] + (b[i] - a[i]) * alpha; +} + +// Quaternion slerp via QQuaternion — handles shortest-arc vs. double-cover. +void sampleSlerp( + const std::vector& values, SegmentLookup s, float out[4]) noexcept +{ + const std::size_t n = values.size() / 4; + if(n == 0) + return; + if(s.lower >= n - 1 || s.alpha <= 0.f) + { + const std::size_t idx = std::min(s.lower, n - 1); + std::memcpy(out, values.data() + idx * 4, 4 * sizeof(float)); + return; + } + const float* a = values.data() + s.lower * 4; + const float* b = values.data() + (s.lower + 1) * 4; + // glTF convention: (x, y, z, w). QQuaternion uses (scalar, x, y, z). + QQuaternion qa(a[3], a[0], a[1], a[2]); + QQuaternion qb(b[3], b[0], b[1], b[2]); + QQuaternion r = QQuaternion::slerp(qa, qb, s.alpha).normalized(); + out[0] = r.x(); + out[1] = r.y(); + out[2] = r.z(); + out[3] = r.scalar(); +} + +// Walk the raw scene tree and emit a cloned subtree with overrides +// applied. Subtrees that contain no animated node are returned as the +// same shared_ptr (structural sharing) so downstream caches see +// unchanged pointers for the un-animated branches. +struct CloneVisitor +{ + const TRSMap& overrides; + + // Recursive scan: is any descendant (including this node) animated? + // Result cached per-visit via the node identity — quick + // short-circuit since scene trees are typically shallow. + bool subtree_is_animated(const ossia::scene_node& n) const noexcept + { + if(overrides.find(n.id.value) != overrides.end()) + return true; + if(!n.has_children()) + return false; + for(const auto& child : *n.children) + { + if(auto* sub = ossia::get_if(&child)) + if(*sub && subtree_is_animated(**sub)) + return true; + } + return false; + } + + ossia::scene_node_ptr clone(const ossia::scene_node_ptr& orig) const + { + if(!orig) + return orig; + if(!subtree_is_animated(*orig)) + return orig; // whole subtree unchanged → share + + auto new_node = std::make_shared(*orig); + std::vector new_children; + if(orig->children) + new_children.reserve(orig->children->size()); + + bool xform_replaced = false; + auto it = overrides.find(orig->id.value); + const auto* ov = it != overrides.end() ? &it->second : nullptr; + + if(orig->children) + { + for(const auto& payload : *orig->children) + { + if(ov && !xform_replaced) + { + if(auto* xf = ossia::get_if(&payload)) + { + // Override the first scene_transform we encounter in this + // node's children (GltfParser / FbxParser convention: + // they prepend one as the first child of each node). + ossia::scene_transform merged = *xf; + if(ov->has_translation) + std::memcpy(merged.translation, ov->translation, 12); + if(ov->has_rotation) + std::memcpy(merged.rotation, ov->rotation, 16); + if(ov->has_scale) + std::memcpy(merged.scale, ov->scale, 12); + new_children.push_back(merged); + xform_replaced = true; + continue; + } + } + + // Recurse into sub-scene_node payloads so descendants can + // also be animated. + if(auto* sub = ossia::get_if(&payload)) + { + new_children.push_back(clone(*sub)); + continue; + } + + new_children.push_back(payload); + } + } + + // If this node is animated but had no scene_transform child, insert + // one at the start so the TRS takes effect on subsequent siblings. + if(ov && !xform_replaced) + { + ossia::scene_transform inserted{}; + inserted.rotation[3] = 1.f; // identity quaternion w + inserted.scale[0] = inserted.scale[1] = inserted.scale[2] = 1.f; + if(ov->has_translation) + std::memcpy(inserted.translation, ov->translation, 12); + if(ov->has_rotation) + std::memcpy(inserted.rotation, ov->rotation, 16); + if(ov->has_scale) + std::memcpy(inserted.scale, ov->scale, 12); + new_children.insert(new_children.begin(), inserted); + } + + new_node->children + = std::make_shared>( + std::move(new_children)); + new_node->dirty_index = orig->dirty_index + 1; + return new_node; + } +}; + +// Compute world-space transform matrix for a scene_transform payload. +QMatrix4x4 trsToMat(const ossia::scene_transform& t) noexcept +{ + QMatrix4x4 m; + m.translate(t.translation[0], t.translation[1], t.translation[2]); + m.rotate(QQuaternion( + t.rotation[3], t.rotation[0], t.rotation[1], t.rotation[2])); + m.scale(t.scale[0], t.scale[1], t.scale[2]); + return m; +} + +// Walk the (post-override) scene tree collecting world-space transform +// matrices keyed by scene_node_id::value. Used by the skinning path to +// resolve each joint's glTF joint_node_ids[i] → world matrix without +// re-walking the tree per joint. +using WorldMatMap = std::unordered_map; +void collectNodeWorldMatrices( + const ossia::scene_node& n, const QMatrix4x4& parentWorld, + WorldMatMap& out) +{ + // A node's TRS is conventionally stored as the first scene_transform + // payload among its children (GltfParser / FbxParser / SceneGroup all + // follow this). + QMatrix4x4 local; + if(n.children) + { + for(const auto& p : *n.children) + { + if(auto* xf = ossia::get_if(&p)) + { + local = trsToMat(*xf); + break; + } + } + } + const QMatrix4x4 world = parentWorld * local; + if(n.id.value != 0) + out[n.id.value] = world; + if(n.children) + { + for(const auto& p : *n.children) + { + if(auto* sub = ossia::get_if(&p)) + if(*sub) + collectNodeWorldMatrices(**sub, world, out); + } + } +} + +} // namespace + +void AnimationPlayer::operator()() +{ + const auto& in = inputs.scene_in.scene; + if(!in.state || in.state->empty() || !in.state->animations + || in.state->animations->empty()) + { + outputs.scene_out.scene = in; + outputs.scene_out.dirty = 0; + return; + } + + float t = inputs.time.value; + // The speed control contributes purely additive offset between + // consecutive calls so users who wire only the Time inlet get + // unmodified behavior. If the user leaves Time at 0 and moves Speed, + // we integrate Speed over frame-delta (approximated as 1/60 s per + // call — halp doesn't expose a deterministic dt yet). + const float speed = inputs.speed.value; + if(t == m_prev_time && speed != 1.f && speed != 0.f) + t = m_prev_time + speed * (1.f / 60.f); + m_prev_time = t; + + // Collect animation_components to sample. + const auto& anims = *in.state->animations; + const int clip_i = inputs.clip_index.value; + std::vector clips; + clips.reserve(anims.size()); + if(clip_i < 0) + { + for(const auto& a : anims) + if(a) + clips.push_back(a.get()); + } + else if(std::size_t(clip_i) < anims.size() && anims[clip_i]) + { + clips.push_back(anims[clip_i].get()); + } + + TRSMap overrides; + for(const auto* clip : clips) + { + float clip_t = t; + if(inputs.loop.value && clip->duration > 0.f) + { + // Modulo into [0, duration). std::fmod preserves sign; add and + // modulo again for negative t (caused by negative speed). + clip_t = std::fmod(t, clip->duration); + if(clip_t < 0.f) + clip_t += clip->duration; + } + else if(clip->duration > 0.f) + { + clip_t = std::clamp(clip_t, 0.f, clip->duration); + } + + for(const auto& channel : clip->channels) + { + if(!channel.times || !channel.values) + continue; + const auto& times = *channel.times; + const auto& values = *channel.values; + auto seg = findSegment(times, clip_t); + + auto& ov = overrides[channel.target_node_id]; + switch(channel.target_path) + { + case ossia::animation_target::translation: { + sampleLinear(values, 3, seg, ov.translation); + ov.has_translation = true; + break; + } + case ossia::animation_target::rotation: { + sampleSlerp(values, seg, ov.rotation); + ov.has_rotation = true; + break; + } + case ossia::animation_target::scale: { + sampleLinear(values, 3, seg, ov.scale); + ov.has_scale = true; + break; + } + default: + // weights / custom — deliberately ignored; see header comment. + break; + } + } + } + + if(overrides.empty()) + { + // No channels matched anything at this time (e.g., empty keyframe + // arrays). Pass through without bumping version. + outputs.scene_out.scene = in; + outputs.scene_out.dirty = 0; + return; + } + + // Clone-and-override the tree. + CloneVisitor vis{overrides}; + auto new_roots + = std::make_shared>(); + if(in.state->roots) + { + new_roots->reserve(in.state->roots->size()); + for(const auto& r : *in.state->roots) + new_roots->push_back(vis.clone(r)); + } + + auto new_state = std::make_shared(*in.state); + new_state->roots = new_roots; + new_state->version = ++m_version_counter; + new_state->dirty_index = in.state->dirty_index + 1; + + // ── Skinning update ────────────────────────────────────────────── + // When the scene has skeletons, walk the (post-override) tree once, + // cache every node's world-space matrix, then compute each skin's + // joint_matrix[i] = worldMat[joint_node_ids[i]] × inverse_bind. + // Pack into a fresh buffer_resource per skin and republish the + // skeletons list so downstream consumers see the new matrices. + if(in.state->skeletons && !in.state->skeletons->empty()) + { + WorldMatMap worlds; + for(const auto& r : *new_roots) + if(r) + collectNodeWorldMatrices(*r, QMatrix4x4{}, worlds); + + auto new_skels + = std::make_shared>(); + new_skels->reserve(in.state->skeletons->size()); + for(const auto& src : *in.state->skeletons) + { + if(!src) + { + new_skels->push_back(src); + continue; + } + const std::size_t n = src->joints.size(); + // Pack N joint matrices as column-major float[16] entries. + auto matrices = std::make_shared>(n * 16, 0.f); + for(std::size_t j = 0; j < n; ++j) + { + QMatrix4x4 ibm; + std::memcpy( + ibm.data(), src->joints[j].inverse_bind_matrix, + sizeof(float) * 16); + QMatrix4x4 world; + if(j < src->joint_node_ids.size()) + { + auto it = worlds.find(src->joint_node_ids[j].value); + if(it != worlds.end()) + world = it->second; + } + const QMatrix4x4 jm = world * ibm; + std::memcpy( + matrices->data() + j * 16, jm.constData(), sizeof(float) * 16); + } + auto buf = std::make_shared(); + ossia::buffer_data bd; + bd.data = std::shared_ptr(matrices, matrices->data()); + bd.byte_size = int64_t(matrices->size() * sizeof(float)); + bd.usage_hint = ossia::buffer_data::usage::storage_buffer; + buf->resource = std::move(bd); + buf->dirty_index = new_state->version; + + auto cloned = std::make_shared(*src); + cloned->joint_matrices_buffer = std::move(buf); + cloned->dirty_index = new_state->version; + new_skels->push_back(std::move(cloned)); + } + new_state->skeletons = std::move(new_skels); + } + + outputs.scene_out.scene.state = std::move(new_state); + outputs.scene_out.dirty = ossia::scene_port::dirty_animation; +} + +} // namespace Threedim diff --git a/src/plugins/score-plugin-threedim/Threedim/AnimationPlayer.hpp b/src/plugins/score-plugin-threedim/Threedim/AnimationPlayer.hpp new file mode 100644 index 0000000000..46d119b94f --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/AnimationPlayer.hpp @@ -0,0 +1,94 @@ +#pragma once +#include +#include + +#include + +#include +#include +#include + +namespace Threedim +{ + +// Samples an incoming scene's animation channels at a user-provided +// time and emits a scene_spec whose animated scene_nodes carry updated +// scene_transform payloads (TRS) or whose skeletons carry updated bone +// poses. Passthrough when the input scene has no animations. +// +// Sampling model: +// - animation_channel.target_node_id refers to a scene_node::id. +// - target_path ∈ {translation, rotation, scale, weights, custom}. +// - `times` + `values` hold the keyframes; `interpolation` is step / +// linear / cubic_spline. +// +// Output layout: +// - For TRS channels: find the first `scene_transform` payload in +// the matching node's children (the convention GltfParser / +// FbxParser follow — they prepend one per node) and override its +// translation/rotation/scale fields. +// - Subtrees that don't touch any animated node are shared as-is +// (shared_ptr reuse), so downstream identity caches stay hot +// outside the animated branch. +// - Materials / skeletons / cameras / environment pass through by +// shared_ptr identity. +// +// Currently unsupported (passthrough): +// - weights (morph targets). +// - custom paths. +// - skeletal joint tracks that target joints inside a +// skeleton_component rather than scene_node ids. +// These are follow-ups; they need the same sample-and-override pattern +// but on different storage. +class AnimationPlayer +{ +public: + halp_meta(name, "Animation Player") + halp_meta(category, "Visuals/3D/Scene") + halp_meta(c_name, "animation_player") + halp_meta(authors, "ossia team") + halp_meta( + manual_url, + "https://ossia.io/score-docs/processes/animation-player.html") + halp_meta(uuid, "2b4d7e8c-3a5f-4b9d-91c6-8d2e0f3a7b5e") + + struct ins + { + struct + { + halp_meta(name, "Scene In"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_in; + + halp::hslider_f32<"Time", halp::range{0., 3600., 0.}> time; + halp::hslider_f32<"Speed", halp::range{-4., 4., 1.}> speed; + halp::toggle<"Loop"> loop; + // When unset, 0 = first animation_component, 1 = second, …. -1 = + // blend all (sum of all channels — useful when animations target + // disjoint node sets, which is common for glTF scenes). Clamped to + // the number of components at sample time. + halp::spinbox_i32<"Clip index", halp::irange{-1, 32, -1}> clip_index; + } inputs; + + struct outs + { + struct + { + halp_meta(name, "Scene Out"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_out; + } outputs; + + void operator()(); + + std::shared_ptr m_cached_state; + int64_t m_version_counter{0}; + + // Previous time — used only for the "speed" control's time advance; + // if the user is wiring a direct time inlet, this is ignored. + float m_prev_time{0.f}; +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/AssetLoader.cpp b/src/plugins/score-plugin-threedim/Threedim/AssetLoader.cpp new file mode 100644 index 0000000000..44756f2b92 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/AssetLoader.cpp @@ -0,0 +1,368 @@ +#include "AssetLoader.hpp" + +#include "FbxParser.hpp" +#include "GltfParser.hpp" +#include "Ply.hpp" +#include "PrimitiveCloud/FormatOverride.hpp" +#include "PrimitiveCloud/PlyParser.hpp" +#include "PrimitiveCloud/SceneFromCloud.hpp" +#include "PrimitiveCloud/SplatBinary.hpp" +#include "PrimitiveCloud/SpzCodec.hpp" +#include "SceneFromMeshes.hpp" +#include "VcgImporters.hpp" + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +namespace Threedim +{ + +// ============================================================================= +// AssetLoaderRegistry — process-wide parser dispatch table. +// +// Storage is a function-local Meyers singleton so registrations at +// static-init time work without worrying about dynamic-init order across +// translation units. The small-vector-ish layout (O(N) lookup over a +// ~4-entry list) is fine: registrations are one-shot per addon. +// ============================================================================= +namespace +{ +struct RegistryState +{ + std::mutex mutex; + std::vector> entries; +}; +RegistryState& registryInstance() +{ + static RegistryState s; + return s; +} + +std::string toLower(std::string_view s) +{ + std::string out; + out.reserve(s.size()); + for(char c : s) + out.push_back(char(std::tolower((unsigned char)c))); + return out; +} +} // namespace + +void AssetLoaderRegistry::register_parser( + std::string_view extension, ParseFn fn) +{ + if(!fn || extension.empty()) + return; + auto key = toLower(extension); + auto& r = registryInstance(); + std::lock_guard lock{r.mutex}; + for(auto& e : r.entries) + { + if(e.first == key) + { + e.second = fn; // Last writer wins. + return; + } + } + r.entries.emplace_back(std::move(key), fn); +} + +AssetLoaderRegistry::ParseFn +AssetLoaderRegistry::lookup(std::string_view extension_lower) noexcept +{ + if(extension_lower.empty()) + return nullptr; + auto& r = registryInstance(); + std::lock_guard lock{r.mutex}; + for(auto const& e : r.entries) + if(e.first == extension_lower) + return e.second; + return nullptr; +} + +namespace +{ + +static bool hasSuffixCI(std::string_view path, std::string_view ext) noexcept +{ + if(path.size() < ext.size() + 1) + return false; + if(path[path.size() - ext.size() - 1] != '.') + return false; + auto a = path.rbegin(); + auto b = ext.rbegin(); + for(; b != ext.rend(); ++a, ++b) + { + char x = (char)std::tolower((unsigned char)*a); + char y = (char)std::tolower((unsigned char)*b); + if(x != y) return false; + } + return true; +} + +// Extract the lowercased suffix after the final '.' (no dot). Empty +// on a dotless path. Used to consult AssetLoaderRegistry after the +// built-in dispatch misses. +static std::string extensionLowerCI(std::string_view path) +{ + auto pos = path.find_last_of('.'); + if(pos == std::string_view::npos || pos + 1 >= path.size()) + return {}; + return toLower(path.substr(pos + 1)); +} + +// Reuse FbxParser / GltfParser's static parsers by constructing a throwaway +// inner instance, invoking the apply-lambda they return, and lifting the +// parsed raw scene_state out. No cross-frame state from the inner loader +// leaks into AssetLoader; its m_raw_state shared_ptr is copied into ours. +// +// Pin the file_type explicitly (halp::text_file_view — the default for +// every loader's halp::file_port<"..."> here). A forwarding-reference +// template parameter deduced from both the data arg and the function +// pointer's by-value parameter produces a deduction conflict +// (FileT& vs FileT), so we skip deduction. +template +static std::shared_ptr +runInnerParser(const halp::text_file_view& data, + std::function (*parse)(halp::text_file_view)) +{ + auto apply = parse(data); + if(!apply) + return nullptr; + Loader inner; + apply(inner); + return inner.m_raw_state; +} + +} // namespace + +std::function +AssetLoader::ins::asset_t::process(file_type tv) +{ + if(tv.filename.empty()) + return {}; + + const std::string_view fname{tv.filename}; + std::shared_ptr loaded; + + if(hasSuffixCI(fname, "fbx")) + { + loaded = runInnerParser(tv, &FbxParser::ins::fbx_t::process); + } + else if(hasSuffixCI(fname, "gltf") || hasSuffixCI(fname, "glb")) + { + loaded = runInnerParser(tv, &GltfParser::ins::gltf_t::process); + } + else if(hasSuffixCI(fname, "obj")) + { + Threedim::float_vec buf; + auto meshes = Threedim::ObjFromString(tv.bytes, buf); + if(!meshes.empty()) + { + const QString label = QFileInfo(QString::fromStdString(std::string{fname})) + .fileName(); + loaded = Threedim::sceneStateFromMeshes( + std::move(meshes), std::move(buf), label.toStdString()); + } + } + else if(hasSuffixCI(fname, "ply")) + { + // Sniff the header first: a PLY whose vertex element carries + // splat-style columns (or no face element) goes through the + // primitive-cloud path; everything else stays on the existing + // mesh path. The sniff only reads the textual header, no row data. + if(Threedim::PrimitiveCloud::ply_is_splat_shaped(fname)) + { + auto cloud = Threedim::PrimitiveCloud::parse_ply(fname); + if(cloud) + { + const QString label + = QFileInfo(QString::fromStdString(std::string{fname})).fileName(); + loaded = Threedim::PrimitiveCloud::sceneStateFromCloud( + std::move(cloud), label.toStdString()); + } + } + else + { + Threedim::float_vec buf; + auto meshes = Threedim::PlyFromFile(fname, buf); + if(!meshes.empty()) + { + const QString label + = QFileInfo(QString::fromStdString(std::string{fname})).fileName(); + loaded = Threedim::sceneStateFromMeshes( + std::move(meshes), std::move(buf), label.toStdString()); + } + } + } + else if(hasSuffixCI(fname, "stl")) + { + Threedim::float_vec buf; + auto meshes = Threedim::StlFromFile(fname, buf); + if(!meshes.empty()) + { + const QString label = QFileInfo(QString::fromStdString(std::string{fname})) + .fileName(); + loaded = Threedim::sceneStateFromMeshes( + std::move(meshes), std::move(buf), label.toStdString()); + } + } + else if(hasSuffixCI(fname, "off")) + { + Threedim::float_vec buf; + auto meshes = Threedim::OffFromFile(fname, buf); + if(!meshes.empty()) + { + const QString label = QFileInfo(QString::fromStdString(std::string{fname})) + .fileName(); + loaded = Threedim::sceneStateFromMeshes( + std::move(meshes), std::move(buf), label.toStdString()); + } + } + else if(hasSuffixCI(fname, "splat")) + { + // Antimatter15 binary .splat: 32 bytes/primitive, fixed schema. + auto cloud = Threedim::PrimitiveCloud::parse_splat_binary(tv.bytes); + if(cloud) + { + const QString label + = QFileInfo(QString::fromStdString(std::string{fname})).fileName(); + loaded = Threedim::PrimitiveCloud::sceneStateFromCloud( + std::move(cloud), label.toStdString()); + } + } + else if(hasSuffixCI(fname, "spz")) + { + // Niantic .spz v1-3: gzip-compressed column-grouped 3DGS data. + // Decoded via the vendored Niantic library (3rdparty/spz), + // transposed into the canonical 62-float row layout that the + // 3dgs.classic preset reads. v4 (NGSP-magic + ZSTD) returns + // nullptr — see 3rdparty/spz/CMakeLists.txt for the rationale. + auto cloud = Threedim::PrimitiveCloud::parse_spz(tv.bytes); + if(cloud) + { + const QString label + = QFileInfo(QString::fromStdString(std::string{fname})).fileName(); + loaded = Threedim::PrimitiveCloud::sceneStateFromCloud( + std::move(cloud), label.toStdString()); + } + } + else + { + // Built-ins all missed — consult the addon-registered parsers. + // score-addon-academy registers its USD loader here at module load. + const std::string ext = extensionLowerCI(fname); + if(auto fn = AssetLoaderRegistry::lookup(ext)) + loaded = fn(tv); + } + + if(!loaded) + return {}; + + return [state = std::move(loaded)](AssetLoader& self) mutable { + self.m_parsed_state = std::move(state); + self.rebuild_format_state(); // m_parsed → m_overridden + self.m_cached_xform.valid = false; // force wrap rebuild + self.rebuild_wrapped_state(); + }; +} + +void AssetLoader::rebuild_format_state() +{ + m_cached_format_override = inputs.format_override.value; + m_overridden_state = Threedim::PrimitiveCloud::applyFormatOverride( + m_parsed_state, m_cached_format_override); + // The wrapped state derives from m_overridden_state and must be + // rebuilt whenever the override changes. + m_cached_xform.valid = false; + rebuild_wrapped_state(); +} + +void AssetLoader::rebuild_wrapped_state() +{ + m_wrapped_state = Threedim::wrapSceneWithTransform( + m_overridden_state, inputs, m_cached_xform, m_version_counter, m_xform_ref); +} + +void AssetLoader::operator()() +{ + if(!m_parsed_state) + { + outputs.scene_out.scene.state = nullptr; + outputs.scene_out.dirty = 0; + return; + } + + if(Threedim::transformChanged(inputs, m_cached_xform)) + rebuild_wrapped_state(); + + outputs.scene_out.scene.state = m_wrapped_state; + outputs.scene_out.dirty = ossia::scene_port::dirty_transform; +} + +void AssetLoader::init(score::gfx::RenderList& r, QRhiResourceUpdateBatch& res) +{ + if(!raw_transform_slot.valid()) + { + raw_transform_slot = r.registry().allocate( + score::gfx::GpuResourceRegistry::Arena::RawTransform, + sizeof(score::gfx::RawLocalTransform)); + m_xform_ref = r.registry().toOssiaRef(raw_transform_slot); + // Force the wrapped state to be rebuilt so the emitted + // scene_transform carries the fresh ref. + m_cached_xform.valid = false; + } + if(raw_transform_slot.valid()) + { + score::gfx::RawLocalTransform seed{}; + r.registry().updateSlot(res, raw_transform_slot, &seed, sizeof(seed)); + } +} + +void AssetLoader::update( + score::gfx::RenderList& r, QRhiResourceUpdateBatch& res, score::gfx::Edge*) +{ + if(!raw_transform_slot.valid()) + return; + + score::gfx::RawLocalTransform xform{}; + xform.translation[0] = inputs.position.value.x; + xform.translation[1] = inputs.position.value.y; + xform.translation[2] = inputs.position.value.z; + QQuaternion q = QQuaternion::fromEulerAngles( + inputs.rotation.value.x, inputs.rotation.value.y, + inputs.rotation.value.z); + xform.rotation[0] = q.x(); + xform.rotation[1] = q.y(); + xform.rotation[2] = q.z(); + xform.rotation[3] = q.scalar(); + xform.scale[0] = inputs.scale.value.x; + xform.scale[1] = inputs.scale.value.y; + xform.scale[2] = inputs.scale.value.z; + r.registry().updateSlot(res, raw_transform_slot, &xform, sizeof(xform)); +} + +void AssetLoader::release(score::gfx::RenderList& r) +{ + if(raw_transform_slot.valid()) + r.registry().free(raw_transform_slot); + m_xform_ref = {}; + // Clear cached scene_state so the next operator()() rebuilds against + // the post-release registry. Producer-state-drift Option A — see + // matching comment in Light::release. m_parsed_state stays valid + // (parser output, no slot refs); only m_overridden_state and + // m_wrapped_state embed registry refs and need clearing. + m_overridden_state.reset(); + m_wrapped_state.reset(); +} + +} // namespace Threedim diff --git a/src/plugins/score-plugin-threedim/Threedim/AssetLoader.hpp b/src/plugins/score-plugin-threedim/Threedim/AssetLoader.hpp new file mode 100644 index 0000000000..4775820bee --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/AssetLoader.hpp @@ -0,0 +1,166 @@ +#pragma once +#include +#include +#include +#include +#include + +#include + +#include + +#include + +#include +#include + +class QRhiResourceUpdateBatch; + +namespace score::gfx +{ +class RenderList; +struct Edge; +} + +namespace Threedim +{ + +// External scene-file parser registry. Addons that ship format-specific +// parsers (score-addon-academy's USD loader, a future Alembic loader, +// etc.) register themselves here so AssetLoader can dispatch to them +// without a link-time dependency from score-plugin-threedim to the addon. +// +// The registered callback takes the same halp::text_file_view that the +// built-in glTF / FBX parsers receive and returns a populated +// ossia::scene_state on success, or a null shared_ptr on failure / +// unhandled input. AssetLoader wraps the state with the Position / +// Rotation / Scale controls exactly as it does for the built-ins. +// +// Extensions are matched case-insensitively on the suffix after the +// final '.'. Registrations that duplicate an extension replace any +// prior one (last writer wins). Calls are thread-safe. +class SCORE_PLUGIN_THREEDIM_EXPORT AssetLoaderRegistry +{ +public: + using ParseFn = std::shared_ptr (*)( + const halp::text_file_view&); + + // Register a parser for an extension (without the dot). Safe at + // static-init time — the underlying storage is a function-local + // Meyers singleton. + static void register_parser(std::string_view extension, ParseFn fn); + + // Lookup by lowercased extension. Returns nullptr if no match. + static ParseFn lookup(std::string_view extension_lower) noexcept; +}; + +// Unified 3D asset loader. Accepts .fbx / .gltf / .glb / .obj / .ply / +// .stl / .off natively, plus .usd / .usda / .usdc / .usdz when +// score-addon-academy is loaded (it registers its UsdParser through +// AssetLoaderRegistry at module init). +// +// Dispatches by file extension to the appropriate parser: +// .fbx → ufbx (FbxParser's static parser) +// .gltf / .glb → fastgltf (GltfParser's static parser) +// .obj → tinyobjloader + sceneStateFromMeshes +// .ply → miniply + sceneStateFromMeshes +// .stl / .off → vcglib + sceneStateFromMeshes +// .usd / .usda / .usdc → OpenUSD (academy UsdParser, optional) +// .usdz → OpenUSD (academy UsdParser, optional) +// (others) → AssetLoaderRegistry::lookup(ext) +// +// Position / Rotation / Scale controls wrap the loaded scene at a single +// root TRS via TransformHelper::wrapSceneWithTransform — same convention +// as FbxParser / GltfParser. +// +// For the geometry-only formats (OBJ/PLY/STL/OFF) the output is a scene +// with one scene_node per mesh part, each containing a mesh_component +// referencing a single shared CPU buffer. FBX/glTF retain their rich +// scene hierarchy (lights, cameras, materials, skeletons, animations). +class AssetLoader +{ +public: + halp_meta(name, "Asset Loader") + halp_meta(category, "Visuals/3D") + halp_meta(c_name, "asset_loader") + halp_meta(authors, "ossia team, ufbx / fastgltf / tinyobj / miniply / vcglib") + halp_meta( + manual_url, + "https://ossia.io/score-docs/processes/asset-loader.html") + halp_meta(uuid, "2f6a8c41-7d93-4e5b-b1c8-4e3f9a7d2c5b") + + struct ins + { + struct asset_t : halp::file_port<"Asset file"> + { + halp_meta( + extensions, + "3D assets (*.fbx *.gltf *.glb *.obj *.ply *.stl *.off " + "*.splat *.spz " + "*.usd *.usda *.usdc *.usdz)"); + static std::function process(file_type data); + } asset; + + PositionControl position; + RotationControl rotation; + ScaleControl scale; + + // Stamps every primitive_cloud_component emitted by this asset + // with `format_id = value` when non-empty. Empty falls back to the + // parser's autodetection (PLY column sniffing, .splat / .spz + // hardcoded). Used to route unrecognised PLY columns or addon- + // produced files through a FlattenedSceneFilterNode in mode 12. + struct format_override_t : halp::lineedit<"Format override (auto if empty)", ""> + { + void update(AssetLoader& n) { n.rebuild_format_state(); } + } format_override; + } inputs; + + struct outs + { + struct + { + halp_meta(name, "Scene"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_out; + } outputs; + + void operator()(); + + // Render-thread hooks. init() claims a RawTransform slot for the + // single root wrapping xform this node emits (TransformHelper's + // scene-wrapping transform). update() uploads the current TRS. + void init(score::gfx::RenderList& r, QRhiResourceUpdateBatch& res); + void update( + score::gfx::RenderList& r, QRhiResourceUpdateBatch& res, + score::gfx::Edge* e); + void release(score::gfx::RenderList& r); + + // Raw scene as parsed from the file — stable as long as the file + // doesn't change. The pipeline is: + // m_parsed_state (parser output, never mutated) + // ↓ applyFormatOverride(format_override.value) + // m_overridden_state (format_id rewrites applied, or = parsed) + // ↓ wrapSceneWithTransform(position/rotation/scale) + // m_wrapped_state (final, published downstream) + std::shared_ptr m_parsed_state; + std::shared_ptr m_overridden_state; + std::shared_ptr m_wrapped_state; + std::string m_cached_format_override; + CachedTRS m_cached_xform; + int64_t m_version_counter{0}; + + // Re-runs applyFormatOverride from the parsed state. Triggered by the + // lineedit's update() callback when the user edits the override + // field; also called once after parsing. + void rebuild_format_state(); + + score::gfx::GpuResourceRegistry::Slot raw_transform_slot; + ossia::gpu_slot_ref m_xform_ref{}; + +private: + void rebuild_wrapped_state(); +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/BufferInfo.hpp b/src/plugins/score-plugin-threedim/Threedim/BufferInfo.hpp new file mode 100644 index 0000000000..b1374d6dc0 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/BufferInfo.hpp @@ -0,0 +1,69 @@ +#pragma once +#include +#include +#include +#include + +#include +#include + +namespace Threedim +{ +// Tiny inspector node: takes a halp::gpu_buffer_input and exposes its +// metadata (handle, byte size, byte offset, dirty flag) on regular +// value-output ports plus a single human-readable summary string. Use +// it as a debug breakpoint in any GPU buffer pipeline -- e.g. +// SomeBufferSource -> BufferInfo -> Downstream -- to verify that the +// buffer is actually wired up and that its size matches what the +// downstream expects. +// +// Mirrors the structure of GeometryInfo: pure CPU operator(), no GPU +// init/update/runInitialPasses needed because the framework already +// publishes the gpu_buffer's metadata into our input port each tick. +class BufferInfo +{ +public: + halp_meta(name, "Buffer Info") + halp_meta(category, "Visuals/Utilities") + halp_meta(c_name, "buffer_info") + halp_meta(manual_url, "https://ossia.io/score-docs/processes/buffer-info.html") + halp_meta(uuid, "f1a3d6c8-2b4e-4c5d-8a9f-1e2d3c4b5a60") + + struct + { + halp::gpu_buffer_input<"Buffer"> buffer; + } inputs; + + struct + { + // Numeric metadata, exposed individually so it can be patched into + // other ports (size-driven UBO updates etc.). + halp::val_port<"Byte size", int64_t> byte_size; + halp::val_port<"Byte offset", int64_t> byte_offset; + // Raw native handle as an opaque integer. Useful only for visual + // identity ("did the upstream rebuild this buffer?"); the value is + // a QRhiBuffer* on every backend score supports today. + halp::val_port<"Handle", int64_t> handle; + halp::val_port<"Changed", bool> changed; + // One-line, copy-pasteable summary for tooltips / log scraping. + halp::val_port<"Readable", std::string> readable; + } outputs; + + void operator()() + { + const auto& b = inputs.buffer.buffer; + outputs.byte_size.value = b.byte_size; + outputs.byte_offset.value = b.byte_offset; + outputs.handle.value = reinterpret_cast(b.handle); + outputs.changed.value = b.changed; + + auto& ret = outputs.readable.value; + ret.clear(); + fmt::format_to( + std::back_inserter(ret), + "handle=0x{:x}, byte_size={}, byte_offset={}, changed={}", + reinterpret_cast(b.handle), b.byte_size, b.byte_offset, + b.changed ? "yes" : "no"); + } +}; +} diff --git a/src/plugins/score-plugin-threedim/Threedim/BufferToGeometry.cpp b/src/plugins/score-plugin-threedim/Threedim/BufferToGeometry.cpp index 1827dff0b7..002abbc367 100644 --- a/src/plugins/score-plugin-threedim/Threedim/BufferToGeometry.cpp +++ b/src/plugins/score-plugin-threedim/Threedim/BufferToGeometry.cpp @@ -108,13 +108,12 @@ void BuffersToGeometry::operator()() // Check if anything changed bool meshChanged = false; bool buffersChanged = false; - bool transformChanged = false; - - // Check transform changes - // (Assuming PositionControl, RotationControl, ScaleControl have .value members) - // You'll need to compute the transform matrix and compare - // For now, mark as changed if any transform input changed - transformChanged = true; // Simplified - compute properly based on your controls + // Compute TRS matrix from position/rotation/scale controls. Returns + // true iff the matrix actually changed — the old code hard-coded + // `true`, firing a downstream transform rebuild every frame even + // when the knobs hadn't moved. + const bool transformChanged + = computeTRSMatrix(inputs, outputs.geometry.transform, m_cachedTRS); // Check mesh configuration changes if(inputs.vertices.value != m_prevVertices || inputs.topology.value != m_prevTopology diff --git a/src/plugins/score-plugin-threedim/Threedim/BufferToGeometry.hpp b/src/plugins/score-plugin-threedim/Threedim/BufferToGeometry.hpp index 9343514709..d15f369f0d 100644 --- a/src/plugins/score-plugin-threedim/Threedim/BufferToGeometry.hpp +++ b/src/plugins/score-plugin-threedim/Threedim/BufferToGeometry.hpp @@ -1,5 +1,6 @@ #pragma once #include "BufferToGeometryCommon.hpp" +#include "TransformHelper.hpp" #include #include @@ -109,6 +110,7 @@ class BuffersToGeometry PrimitiveTopology m_prevTopology{}; CullMode m_prevCullMode{}; FrontFace m_prevFrontFace{}; + CachedTRS m_cachedTRS{}; struct ui { diff --git a/src/plugins/score-plugin-threedim/Threedim/BufferToGeometry2.cpp b/src/plugins/score-plugin-threedim/Threedim/BufferToGeometry2.cpp index b013c969e6..eaf2bfe49a 100644 --- a/src/plugins/score-plugin-threedim/Threedim/BufferToGeometry2.cpp +++ b/src/plugins/score-plugin-threedim/Threedim/BufferToGeometry2.cpp @@ -75,10 +75,11 @@ void BuffersToGeometry2::operator()() // Check if anything changed bool meshChanged = false; bool buffersChanged = false; - bool transformChanged = false; - - // Check transform changes - transformChanged = true; // Simplified - compute properly based on your controls + // Compute TRS matrix from position/rotation/scale controls; returns + // true iff the matrix actually changed (replaces the old hardcoded + // transformChanged=true which fired a downstream rebuild every frame). + const bool transformChanged + = computeTRSMatrix(inputs, outputs.geometry.transform, m_cachedTRS); // Check mesh configuration changes if(inputs.vertices.value != m_prevVertices || inputs.topology.value != m_prevTopology diff --git a/src/plugins/score-plugin-threedim/Threedim/BufferToGeometry2.hpp b/src/plugins/score-plugin-threedim/Threedim/BufferToGeometry2.hpp index d9f03fc452..69ee796759 100644 --- a/src/plugins/score-plugin-threedim/Threedim/BufferToGeometry2.hpp +++ b/src/plugins/score-plugin-threedim/Threedim/BufferToGeometry2.hpp @@ -1,5 +1,6 @@ #pragma once #include "BufferToGeometryCommon.hpp" +#include "TransformHelper.hpp" #include #include @@ -108,6 +109,7 @@ class BuffersToGeometry2 PrimitiveTopology m_prevTopology{}; CullMode m_prevCullMode{}; FrontFace m_prevFrontFace{}; + CachedTRS m_cachedTRS{}; struct ui { diff --git a/src/plugins/score-plugin-threedim/Threedim/Camera.cpp b/src/plugins/score-plugin-threedim/Threedim/Camera.cpp new file mode 100644 index 0000000000..dc25a61a31 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/Camera.cpp @@ -0,0 +1,104 @@ +#include "Camera.hpp" + +#include +#include + +namespace Threedim +{ + +// Order invariant: called by GfxRenderer::initState BEFORE the first +// operator()() and BEFORE processControlIn fires any rebuild() callback. +// m_camera_ref / m_xform_ref populated here are therefore safe to read +// in rebuild() without a guard. Adding prepare() to this node breaks the +// invariant — see CpuFilterNode.hpp for details. +void Camera::init(score::gfx::RenderList& r, QRhiResourceUpdateBatch& res) +{ + if(!raw_camera_slot.valid()) + { + raw_camera_slot = r.registry().allocate( + score::gfx::GpuResourceRegistry::Arena::RawCamera, + sizeof(score::gfx::RawCameraData)); + m_camera_ref = r.registry().toOssiaRef(raw_camera_slot); + } + if(raw_camera_slot.valid()) + { + score::gfx::RawCameraData seed{}; + r.registry().updateSlot(res, raw_camera_slot, &seed, sizeof(seed)); + } + if(!raw_transform_slot.valid()) + { + raw_transform_slot = r.registry().allocate( + score::gfx::GpuResourceRegistry::Arena::RawTransform, + sizeof(score::gfx::RawLocalTransform)); + m_xform_ref = r.registry().toOssiaRef(raw_transform_slot); + } + if(raw_transform_slot.valid()) + { + score::gfx::RawLocalTransform seed{}; + r.registry().updateSlot(res, raw_transform_slot, &seed, sizeof(seed)); + } +} + +void Camera::update( + score::gfx::RenderList& r, QRhiResourceUpdateBatch& res, score::gfx::Edge*) +{ + if(raw_transform_slot.valid()) + { + // Local TRS of the scene_transform this camera emits. Translation + // mirrors the eye; rotation matches the quaternion built in + // operator()() (local -Z → (target - eye)); scale is identity. + score::gfx::RawLocalTransform xform{}; + xform.translation[0] = inputs.eye.value.x; + xform.translation[1] = inputs.eye.value.y; + xform.translation[2] = inputs.eye.value.z; + QVector3D forward( + inputs.target.value.x - inputs.eye.value.x, + inputs.target.value.y - inputs.eye.value.y, + inputs.target.value.z - inputs.eye.value.z); + if(forward.lengthSquared() > 1e-8f) + { + forward.normalize(); + QQuaternion q = QQuaternion::fromDirection( + -forward, QVector3D(0.f, 1.f, 0.f)); + xform.rotation[0] = q.x(); + xform.rotation[1] = q.y(); + xform.rotation[2] = q.z(); + xform.rotation[3] = q.scalar(); + } + xform.scale[0] = 1.f; + xform.scale[1] = 1.f; + xform.scale[2] = 1.f; + r.registry().updateSlot(res, raw_transform_slot, &xform, sizeof(xform)); + } + + if(!raw_camera_slot.valid()) + return; + + score::gfx::RawCameraData raw{}; + raw.eye[0] = inputs.eye.value.x; + raw.eye[1] = inputs.eye.value.y; + raw.eye[2] = inputs.eye.value.z; + raw.target[0] = inputs.target.value.x; + raw.target[1] = inputs.target.value.y; + raw.target[2] = inputs.target.value.z; + raw.up[0] = 0.f; + raw.up[1] = 1.f; + raw.up[2] = 0.f; + raw.yfov = inputs.fov.value * float(M_PI) / 180.f; + raw.znear = inputs.near_plane.value; + raw.zfar = inputs.far_plane.value; + raw.projection = 0u; // perspective + r.registry().updateSlot(res, raw_camera_slot, &raw, sizeof(raw)); +} + +void Camera::release(score::gfx::RenderList& r) +{ + if(raw_camera_slot.valid()) + r.registry().free(raw_camera_slot); + if(raw_transform_slot.valid()) + r.registry().free(raw_transform_slot); + m_camera_ref = {}; + m_xform_ref = {}; +} + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/Camera.hpp b/src/plugins/score-plugin-threedim/Threedim/Camera.hpp new file mode 100644 index 0000000000..7939d533c8 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/Camera.hpp @@ -0,0 +1,216 @@ +#pragma once +#include +#include + +#include + +#include + +#include +#include +#include + +#include +#include +#include +#include + +class QRhiResourceUpdateBatch; + +namespace score::gfx +{ +class RenderList; +struct Edge; +} + +namespace Threedim +{ + +// Scene-producing camera node. Emits a scene_spec containing: +// - a scene_node with an id derived from this node's uuid (so the flatten +// visitor can attribute the camera back to it), +// - a scene_transform placing the camera at eye looking at target, +// - a camera_component carrying yfov / znear / zfar. +// +// ScenePreprocessor packs every camera it collects into its Camera UBO +// output — when merged with a scene tree this camera becomes one entry in +// that array. active_camera_id defaults to this node's id so a single +// Camera is picked up automatically. +class Camera +{ +public: + halp_meta(name, "Camera") + halp_meta(c_name, "camera_avnd") + halp_meta(category, "Visuals/3D/Scene") + halp_meta(authors, "ossia team") + halp_meta(uuid, "4c91b5e2-8d76-4ab3-9f14-6e0d8b3a2c57") + + struct ins + { + // Port-driven rebuild: every control carries an `update(Camera&)` + // callback that fires only when its value changes, triggering a + // `rebuild()` on the Camera. `operator()()` then just republishes + // the already-built m_state — no per-frame memcmp, no per-frame + // version bump, no merge_scenes / preprocessor thrash. + // + // halp::range only supports scalar inits (broadcast across x/y/z), so + // the non-uniform defaults are applied in the subclass constructor. + struct Eye : halp::xyz_spinboxes_f32<"Eye", halp::range{-10000., 10000., 0.}> + { + Eye() { value = {0.f, 1.f, 3.f}; } + void update(Camera& n) { n.rebuild(); } + } eye; + struct : halp::xyz_spinboxes_f32<"Target", halp::range{-10000., 10000., 0.}> + { void update(Camera& n) { n.rebuild(); } } target; + struct : halp::hslider_f32<"FOV", halp::range{5., 170., 60.}> + { void update(Camera& n) { n.rebuild(); } } fov; + struct : halp::hslider_f32<"Near", halp::range{0.001, 10., 0.1}> + { void update(Camera& n) { n.rebuild(); } } near_plane; + struct : halp::hslider_f32<"Far", halp::range{1., 100000., 1000.}> + { void update(Camera& n) { n.rebuild(); } } far_plane; + } inputs; + + struct outs + { + struct + { + halp_meta(name, "Scene"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_out; + } outputs; + + // Stable scene_node_id for this camera across frames. Set once in the + // first call. Used as scene_state::active_camera_id so ScenePreprocessor + // picks THIS camera even when other cameras show up in merged scenes. + ossia::scene_node_id m_id{}; + std::shared_ptr m_state; + int64_t m_version{0}; + // Dirty bits to stamp on the next emission. Accumulated in rebuild() + // and cleared after operator()() publishes them. When no control + // changed this frame, operator()() republishes the same m_state with + // dirty=0 so the preprocessor's pointer+version comparison short- + // circuits the rebuild path. + uint8_t m_pending_dirty{ossia::scene_port::dirty_transform}; + // Stable ids for the single scene_transform + camera_component this + // node emits (minted on first rebuild). + uint64_t m_xform_stable_id{}; + uint64_t m_camera_stable_id{}; + + // Rebuild m_state from current inputs. Called from every port's + // `update()` callback (fires only on control changes), and once from + // `operator()()` on the first tick to seed m_state. + void rebuild() + { + if(!m_state) + { + m_state = std::make_shared(); + // Deterministic, non-zero id keyed on this node's address. Non-zero + // so merge_scenes' active_camera_id resolution treats it as "set". + m_id.value = reinterpret_cast(this) | 0x1u; + } + if(m_camera_stable_id == 0) m_camera_stable_id = ossia::mint_stable_id(); + if(m_xform_stable_id == 0) m_xform_stable_id = ossia::mint_stable_id(); + + // Rebuild as {scene_transform, camera_component} inside a scene_node. + auto cam = std::make_shared(); + cam->stable_id = m_camera_stable_id; + cam->projection = ossia::camera_projection::perspective; + cam->yfov = inputs.fov.value * float(M_PI) / 180.f; + cam->znear = inputs.near_plane.value; + cam->zfar = inputs.far_plane.value; + // Propagate the RawCamera arena slot ref (populated in init()). + cam->raw_slot = m_camera_ref; + + // Encode the world transform as TRS for the scene_transform payload. + ossia::scene_transform xform; + xform.stable_id = m_xform_stable_id; + xform.translation[0] = inputs.eye.value.x; + xform.translation[1] = inputs.eye.value.y; + xform.translation[2] = inputs.eye.value.z; + // Build a quaternion for the camera's world orientation. Qt's + // QQuaternion::fromDirection(direction, up) maps local +Z (NOT -Z) to + // `direction` — see QMatrix4x4::fromAxes in Qt source, which takes + // zAxis = direction. We want the camera's local +Z axis (the "back" + // axis of a GL camera) to point along (eye − target) so that local -Z + // (the GL viewing direction) points from eye toward target. Hence the + // -forward. Equivalently: the inverse of the TRS matches + // QMatrix4x4::lookAt(eye, target, up). + QVector3D forward( + inputs.target.value.x - inputs.eye.value.x, + inputs.target.value.y - inputs.eye.value.y, + inputs.target.value.z - inputs.eye.value.z); + if(forward.lengthSquared() > 1e-8f) + { + forward.normalize(); + QQuaternion q = QQuaternion::fromDirection( + -forward, QVector3D(0.f, 1.f, 0.f)); + xform.rotation[0] = q.x(); + xform.rotation[1] = q.y(); + xform.rotation[2] = q.z(); + xform.rotation[3] = q.scalar(); + } + else + { + xform.rotation[0] = 0.f; + xform.rotation[1] = 0.f; + xform.rotation[2] = 0.f; + xform.rotation[3] = 1.f; + } + xform.scale[0] = 1.f; + xform.scale[1] = 1.f; + xform.scale[2] = 1.f; + // Propagate the RawTransform slot ref (populated in init()). + xform.raw_slot = m_xform_ref; + + auto children = std::make_shared>(); + children->push_back(xform); + children->push_back(ossia::camera_component_ptr(std::move(cam))); + + auto node = std::make_shared(); + node->id = m_id; + node->children = std::move(children); + + auto roots + = std::make_shared>(); + roots->push_back(std::move(node)); + + m_state->roots = std::move(roots); + m_state->active_camera_id = m_id; + m_version++; + m_state->version = m_version; + m_pending_dirty = ossia::scene_port::dirty_transform; + } + + void operator()() + { + if(!m_state) + rebuild(); + outputs.scene_out.scene.state = m_state; + outputs.scene_out.dirty = m_pending_dirty; + m_pending_dirty = 0; + } + + // Render-thread hooks. init claims one RawCamera slot; update packs + // eye / target / up / yfov / znear / zfar into a RawCameraData and + // uploads; release returns the slot. The preprocessor will consume + // the slot in a later pass (aspect-ratio-aware matrix composition + // happens there); for now the scene_spec emission still drives + // packAndUploadCameras and this slot is a producer-half plumbing. + void init(score::gfx::RenderList& r, QRhiResourceUpdateBatch& res); + void update( + score::gfx::RenderList& r, QRhiResourceUpdateBatch& res, + score::gfx::Edge* e); + void release(score::gfx::RenderList& r); + + score::gfx::GpuResourceRegistry::Slot raw_camera_slot; + score::gfx::GpuResourceRegistry::Slot raw_transform_slot; + + // Ossia-facing snapshots, stamped on the emitted components' + // raw_slot fields so the preprocessor can locate this camera's + // GPU bytes via isLive() + offset. Written once in init(). + ossia::gpu_slot_ref m_camera_ref{}; + ossia::gpu_slot_ref m_xform_ref{}; +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/CameraArray.cpp b/src/plugins/score-plugin-threedim/Threedim/CameraArray.cpp new file mode 100644 index 0000000000..4b3d2d397d --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/CameraArray.cpp @@ -0,0 +1,133 @@ +#include "CameraArray.hpp" + +#include +#include + +#include + +namespace Threedim +{ + +namespace +{ +// Same face layout as operator()() — keep the two definitions in sync; +// FlattenVisitor pulls scene_transform from the scene_spec emission, +// ScenePreprocessor will (later) consume the raw slots here. +struct Face +{ + float forward[3]; + float up[3]; +}; +constexpr std::array kFaces{{ + {{ 1.f, 0.f, 0.f}, {0.f, -1.f, 0.f}}, // +X + {{-1.f, 0.f, 0.f}, {0.f, -1.f, 0.f}}, // -X + {{ 0.f, 1.f, 0.f}, {0.f, 0.f, 1.f}}, // +Y + {{ 0.f, -1.f, 0.f}, {0.f, 0.f, -1.f}}, // -Y + {{ 0.f, 0.f, 1.f}, {0.f, -1.f, 0.f}}, // +Z + {{ 0.f, 0.f, -1.f}, {0.f, -1.f, 0.f}}, // -Z +}}; +} + +// Order invariant: called by GfxRenderer::initState BEFORE the first +// operator()() and BEFORE processControlIn fires any rebuild() callback. +// m_array_ref populated here is therefore safe to read in rebuild() +// without a guard. Adding prepare() to this node breaks the invariant — +// see CpuFilterNode.hpp for details. +void CameraArray::init( + score::gfx::RenderList& r, QRhiResourceUpdateBatch& res) +{ + if(!raw_camera_slot.valid()) + { + raw_camera_slot = r.registry().allocate( + score::gfx::GpuResourceRegistry::Arena::RawCamera, + 6 * sizeof(score::gfx::RawCameraData)); + m_array_ref = r.registry().toOssiaRef(raw_camera_slot); + } + if(raw_camera_slot.valid()) + { + score::gfx::RawCameraData seed[6]{}; + r.registry().updateSlot(res, raw_camera_slot, &seed, sizeof(seed)); + } + if(!raw_transform_slot.valid()) + { + raw_transform_slot = r.registry().allocate( + score::gfx::GpuResourceRegistry::Arena::RawTransform, + 6 * sizeof(score::gfx::RawLocalTransform)); + m_xform_array_ref = r.registry().toOssiaRef(raw_transform_slot); + } + if(raw_transform_slot.valid()) + { + score::gfx::RawLocalTransform seed[6]{}; + r.registry().updateSlot(res, raw_transform_slot, &seed, sizeof(seed)); + } +} + +void CameraArray::update( + score::gfx::RenderList& r, QRhiResourceUpdateBatch& res, score::gfx::Edge*) +{ + if(!raw_camera_slot.valid()) + return; + + const float eye[3]{ + inputs.origin.value.x, inputs.origin.value.y, inputs.origin.value.z}; + const float znear = inputs.near_plane.value; + const float zfar = inputs.far_plane.value; + + score::gfx::RawCameraData raw[6]{}; + for(int i = 0; i < 6; ++i) + { + raw[i].eye[0] = eye[0]; + raw[i].eye[1] = eye[1]; + raw[i].eye[2] = eye[2]; + raw[i].target[0] = eye[0] + kFaces[i].forward[0]; + raw[i].target[1] = eye[1] + kFaces[i].forward[1]; + raw[i].target[2] = eye[2] + kFaces[i].forward[2]; + raw[i].up[0] = kFaces[i].up[0]; + raw[i].up[1] = kFaces[i].up[1]; + raw[i].up[2] = kFaces[i].up[2]; + raw[i].yfov = float(M_PI) / 2.f; // 90° per face + raw[i].znear = znear; + raw[i].zfar = zfar; + raw[i].projection = 0u; // perspective + } + r.registry().updateSlot(res, raw_camera_slot, &raw, sizeof(raw)); + + if(raw_transform_slot.valid()) + { + // Per-face scene_transform local TRS: translation = origin; + // rotation from -forward via QQuaternion::fromDirection (same as + // the scene_spec emission path). scale = identity. + score::gfx::RawLocalTransform xforms[6]{}; + for(int i = 0; i < 6; ++i) + { + xforms[i].translation[0] = eye[0]; + xforms[i].translation[1] = eye[1]; + xforms[i].translation[2] = eye[2]; + QVector3D fwd( + kFaces[i].forward[0], kFaces[i].forward[1], kFaces[i].forward[2]); + QVector3D up(kFaces[i].up[0], kFaces[i].up[1], kFaces[i].up[2]); + QQuaternion q = QQuaternion::fromDirection(-fwd, up); + xforms[i].rotation[0] = q.x(); + xforms[i].rotation[1] = q.y(); + xforms[i].rotation[2] = q.z(); + xforms[i].rotation[3] = q.scalar(); + xforms[i].scale[0] = 1.f; + xforms[i].scale[1] = 1.f; + xforms[i].scale[2] = 1.f; + } + r.registry().updateSlot( + res, raw_transform_slot, &xforms, sizeof(xforms)); + } +} + +void CameraArray::release(score::gfx::RenderList& r) +{ + if(raw_camera_slot.valid()) + r.registry().free(raw_camera_slot); + if(raw_transform_slot.valid()) + r.registry().free(raw_transform_slot); + m_array_ref = {}; + m_xform_array_ref = {}; +} + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/CameraArray.hpp b/src/plugins/score-plugin-threedim/Threedim/CameraArray.hpp new file mode 100644 index 0000000000..c357282a19 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/CameraArray.hpp @@ -0,0 +1,228 @@ +#pragma once +#include +#include + +#include + +#include +#include // sizeof(score::gfx::RawCameraData) in operator()() + +#include +#include + +#include +#include +#include +#include +#include + +class QRhiResourceUpdateBatch; + +namespace score::gfx +{ +class RenderList; +struct Edge; +} + +namespace Threedim +{ + +// Scene-producing node that emits a six-camera array laid out for cubemap +// / multiview rendering. Each camera is a scene_node with a +// scene_transform + camera_component payload; ScenePreprocessor's flatten +// visitor picks them up into FlatScene::cameras, and +// packAndUploadCameras packs them into the Camera UBO aux-buffer on +// Geometry Out. Multiview shaders (MULTIVIEW=6) then index camera[0..5] +// via gl_ViewIndex. +// +// Face convention follows the GL cubemap layout: +// camera[0] = +X, [1] = -X, [2] = +Y, [3] = -Y, [4] = +Z, [5] = -Z +// Each face uses a 90° square FOV with aspect 1:1 — consumers should +// render into a cube render target at any square resolution. +class CameraArray +{ +public: + halp_meta(name, "Camera Array") + halp_meta(c_name, "camera_array_avnd") + halp_meta(category, "Visuals/3D/Scene") + halp_meta(authors, "ossia team") + halp_meta(uuid, "7a3e8d2f-1b94-4c6a-b7f5-8e2d0c1a4b93") + + // Six GL-ordered cubemap faces at 90° FoV, aspect 1:1. Suitable as + // both a reflection probe array and a point-shadow cube array — the + // distinction is downstream (which render target / depth-only flag), + // not in the camera math here. + struct ins + { + // Port-driven rebuild: each control's update() callback fires + // CameraArray::rebuild() on change. operator()() republishes. + struct : halp::xyz_spinboxes_f32<"Origin", halp::range{-10000., 10000., 0.}> + { void update(CameraArray& n) { n.rebuild(); } } origin; + struct : halp::hslider_f32<"Near", halp::range{0.001, 10., 0.1}> + { void update(CameraArray& n) { n.rebuild(); } } near_plane; + struct : halp::hslider_f32<"Far", halp::range{1., 100000., 1000.}> + { void update(CameraArray& n) { n.rebuild(); } } far_plane; + } inputs; + + struct outs + { + struct + { + halp_meta(name, "Scene"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_out; + } outputs; + + // Canonical cubemap face orientations in the GL convention: + // { forward, up }. right = forward × up. + struct Face + { + float forward[3]; + float up[3]; + }; + + // Six deterministic ids rooted at this node's address — each face + // needs a stable, distinct scene_node_id so merge_scenes treats them + // as six separate cameras (same-id camera entries would collapse). + std::array m_ids{}; + std::shared_ptr m_state; + int64_t m_version{0}; + uint8_t m_pending_dirty{ossia::scene_port::dirty_transform}; + + void rebuild() + { + if(!m_state) + { + m_state = std::make_shared(); + // Seed six distinct ids from this node's address. OR the per-face + // index in so they're all non-zero AND all distinct. + const auto base = reinterpret_cast(this); + for(int i = 0; i < 6; ++i) + m_ids[std::size_t(i)].value = (base ^ (std::uintptr_t(i + 1) << 1)) | 0x1u; + } + + static constexpr std::array kFaces{{ + {{ 1.f, 0.f, 0.f}, {0.f, -1.f, 0.f}}, // +X + {{-1.f, 0.f, 0.f}, {0.f, -1.f, 0.f}}, // -X + {{ 0.f, 1.f, 0.f}, {0.f, 0.f, 1.f}}, // +Y + {{ 0.f, -1.f, 0.f}, {0.f, 0.f, -1.f}}, // -Y + {{ 0.f, 0.f, 1.f}, {0.f, -1.f, 0.f}}, // +Z + {{ 0.f, 0.f, -1.f}, {0.f, -1.f, 0.f}}, // -Z + }}; + + const float near_f = inputs.near_plane.value; + const float far_f = inputs.far_plane.value; + const float eye[3] + = {inputs.origin.value.x, inputs.origin.value.y, + inputs.origin.value.z}; + + auto roots + = std::make_shared>(); + roots->reserve(6); + + for(int i = 0; i < 6; ++i) + { + auto cam = std::make_shared(); + cam->projection = ossia::camera_projection::perspective; + cam->yfov = float(M_PI) / 2.f; // 90° per face for a seamless cube + cam->aspect_ratio = 1.f; + cam->znear = near_f; + cam->zfar = far_f; + // Each face owns one RawCameraData inside our single 6-wide slot. + // Stamp a derived ref with the face's offset — same arena / + // internal_index / generation, offset bumped by i entries. + if(m_array_ref.valid()) + { + cam->raw_slot = m_array_ref; + cam->raw_slot.offset = m_array_ref.offset + + uint32_t(i * sizeof(score::gfx::RawCameraData)); + cam->raw_slot.size = uint32_t(sizeof(score::gfx::RawCameraData)); + } + + ossia::scene_transform xform; + xform.translation[0] = eye[0]; + xform.translation[1] = eye[1]; + xform.translation[2] = eye[2]; + + // Same rationale as Camera.hpp: Qt's QQuaternion::fromDirection + // maps local +Z to `direction`, but GL cameras look along local -Z + // — pass the negated forward so local -Z ends up pointing along + // +forward (the face-direction). + QVector3D fwd( + kFaces[std::size_t(i)].forward[0], kFaces[std::size_t(i)].forward[1], + kFaces[std::size_t(i)].forward[2]); + QVector3D up( + kFaces[std::size_t(i)].up[0], kFaces[std::size_t(i)].up[1], + kFaces[std::size_t(i)].up[2]); + QQuaternion q = QQuaternion::fromDirection(-fwd, up); + xform.rotation[0] = q.x(); + xform.rotation[1] = q.y(); + xform.rotation[2] = q.z(); + xform.rotation[3] = q.scalar(); + xform.scale[0] = 1.f; + xform.scale[1] = 1.f; + xform.scale[2] = 1.f; + // Per-face RawTransform slot ref — same shape as the camera + // array ref, offset bumped to the i-th RawLocalTransform slot. + if(m_xform_array_ref.valid()) + { + xform.raw_slot = m_xform_array_ref; + xform.raw_slot.offset = m_xform_array_ref.offset + + uint32_t(i * sizeof(score::gfx::RawLocalTransform)); + xform.raw_slot.size + = uint32_t(sizeof(score::gfx::RawLocalTransform)); + } + + auto children + = std::make_shared>(); + children->push_back(xform); + children->push_back(ossia::camera_component_ptr(std::move(cam))); + + auto node = std::make_shared(); + node->id = m_ids[std::size_t(i)]; + node->children = std::move(children); + + roots->push_back(std::move(node)); + } + + m_state->roots = std::move(roots); + // Face 0 (+X) acts as the "active" camera for non-multiview consumers + // that only read the first entry. Multiview shaders ignore this and + // index all six via gl_ViewIndex. + m_state->active_camera_id = m_ids[0]; + m_version++; + m_state->version = m_version; + m_pending_dirty = ossia::scene_port::dirty_transform; + } + + void operator()() + { + if(!m_state) + rebuild(); + outputs.scene_out.scene.state = m_state; + outputs.scene_out.dirty = m_pending_dirty; + m_pending_dirty = 0; + } + + // Render-thread hooks. A single RawCamera slot holds all six faces + // contiguously (6 × RawCameraData). The preprocessor will later + // consume this slot and compose view/projection matrices for each + // face with the target's aspect (1:1 for the cubemap case). + void init(score::gfx::RenderList& r, QRhiResourceUpdateBatch& res); + void update( + score::gfx::RenderList& r, QRhiResourceUpdateBatch& res, + score::gfx::Edge* e); + void release(score::gfx::RenderList& r); + + score::gfx::GpuResourceRegistry::Slot raw_camera_slot; + score::gfx::GpuResourceRegistry::Slot raw_transform_slot; + + // Ossia-facing base refs for our 6-wide RawCamera + 6-wide + // RawTransform slots. Each emitted camera_component / scene_transform + // gets these refs with its per-face offset bumped. + ossia::gpu_slot_ref m_array_ref{}; + ossia::gpu_slot_ref m_xform_array_ref{}; +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/CameraSwitch.hpp b/src/plugins/score-plugin-threedim/Threedim/CameraSwitch.hpp new file mode 100644 index 0000000000..a7ba217a54 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/CameraSwitch.hpp @@ -0,0 +1,346 @@ +#pragma once +#include +#include + +#include + +#include +#include + +#include +#include +#include + +namespace Threedim +{ + +// 4-way camera switch + weighted blender. +// +// Operates at the scene_spec level like SceneSwitch but specialised to a +// single purpose: select or blend between up to 4 Camera producers. Each +// input is expected to be the output of a `Threedim::Camera` node (or any +// scene_spec whose first root carries a scene_transform + camera_component +// pair). +// +// Modes: +// - Select: the `index` parameter picks one of the four inputs; the other +// three are ignored. Equivalent to dropping SceneSwitch in front +// of a camera, but avoids the caveat that non-camera scene data +// from the unselected inputs would get dropped too. +// - Blend : the `weights` (x,y,z,w) parameter linearly blends the +// positions + FOV + near/far of the four inputs, normalise-lerps +// (nlerp) the orientation quaternions. Weights are auto- +// normalised to sum=1 internally — users can pass raw +// envelopes / LFO outputs directly. +// +// Blend semantics chosen to match what TD's Camera Blend COMP does +// conceptually: treat each input camera as a "keyframe pose" and produce +// a smooth in-between. nlerp is fine for small angular deltas; when you +// need great-circle blending across wide angles, upgrade to slerp (two +// slerps for 4-way is the standard recipe). +// +// Unwired inputs fall back to a zero-weight contribution. When all wired +// inputs have zero effective weight the output is empty. +class CameraSwitch +{ +public: + halp_meta(name, "Camera Switch") + halp_meta(category, "Visuals/3D/Scene") + halp_meta(c_name, "camera_switch") + halp_meta(authors, "ossia team") + halp_meta( + manual_url, + "https://ossia.io/score-docs/processes/camera-switch.html") + halp_meta(uuid, "d1e8c4b7-6a32-4f9e-b5d8-2c4f3a1e8b6d") + + struct ins + { + struct + { + halp_meta(name, "Camera 0"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } cam0; + struct + { + halp_meta(name, "Camera 1"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } cam1; + struct + { + halp_meta(name, "Camera 2"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } cam2; + struct + { + halp_meta(name, "Camera 3"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } cam3; + + enum CameraMode + { + Select, + Blend + }; + // Port-driven rebuild: controls trigger CameraSwitch::rebuild(). + // Upstream camera-input changes are detected in operator()(). + struct Mode : halp::enum_t + { + struct range + { + std::string_view values[2]{"Select", "Blend"}; + CameraMode init{Select}; + }; + void update(CameraSwitch& n) { n.rebuild(); } + } mode; + + struct : halp::spinbox_i32<"Index", halp::irange{0, 3, 0}> + { void update(CameraSwitch& n) { n.rebuild(); } } index; + + // Four-channel blend weights. Negative values are clamped to zero. + struct : halp::xyzw_spinboxes_f32<"Weights", halp::range{-10000., 10000., 0.}> + { void update(CameraSwitch& n) { n.rebuild(); } } weights; + } inputs; + + struct outs + { + struct + { + halp_meta(name, "Scene Out"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_out; + } outputs; + + // Stable id for the synthesised camera in Blend mode. One id kept for the + // whole life of the node so downstream preprocessor logic treats frames + // as updates to the same camera rather than a sequence of add/remove + // events. + ossia::scene_node_id m_id{}; + std::shared_ptr m_state; + int64_t m_version{0}; + uint8_t m_pending_dirty{ossia::scene_port::dirty_transform}; + // Cached upstream identity for detecting scene_in pointer/version + // changes from within the new 5-line operator()() republish path. + const ossia::scene_state* m_cached_cam_state[4]{}; + int64_t m_cached_cam_ver[4]{-1, -1, -1, -1}; + + // Locate the first (scene_transform, camera_component) pair in a scene. + // Returns false if the input has no camera (or is empty). + static bool extractCameraPose( + const ossia::scene_spec& in, ossia::scene_transform& xform, + ossia::camera_component& cam) + { + if(!in.state || !in.state->roots || in.state->roots->empty()) + return false; + const auto& root = (*in.state->roots)[0]; + if(!root || !root->children) + return false; + + bool gotXform = false; + bool gotCam = false; + for(const auto& child : *root->children) + { + if(auto* t = ossia::get_if(&child)) + { + xform = *t; + gotXform = true; + } + else if(auto* c = ossia::get_if(&child)) + { + if(*c) + { + cam = **c; + gotCam = true; + } + } + } + return gotXform && gotCam; + } + + void rebuild() + { + const int mode = inputs.mode.value; + if(mode == ins::CameraMode::Select) + { + // Select-mode: operator()() forwards the picked upstream + // scene_spec directly; rebuild() just marks pending dirty so + // downstream sees a transition event. + m_pending_dirty = 0xFF; + return; + } + + // Blend mode. + float w[4]{ + inputs.weights.value.x, inputs.weights.value.y, + inputs.weights.value.z, inputs.weights.value.w}; + for(float& x : w) x = x > 0.f ? x : 0.f; + + const ossia::scene_spec* inputsArr[4]{ + &inputs.cam0.scene, &inputs.cam1.scene, + &inputs.cam2.scene, &inputs.cam3.scene}; + + // Extract each input's pose; zero the weight of any missing one. + ossia::scene_transform xforms[4]{}; + ossia::camera_component cams[4]{}; + float effWeights[4]{}; + float wsum = 0.f; + for(int i = 0; i < 4; ++i) + { + if(w[i] <= 0.f) continue; + if(!extractCameraPose(*inputsArr[i], xforms[i], cams[i])) + continue; + effWeights[i] = w[i]; + wsum += w[i]; + } + + if(wsum <= 1e-6f) + { + // No wired-and-weighted camera to blend — emit empty. + if(m_state) + { + m_state->roots.reset(); + m_state->active_camera_id = {}; + m_version++; + m_state->version = m_version; + } + // Bump dirty so consumers (preprocessor cache, downstream + // SceneSelector) detect the empty-state transition. Without + // this they'd see the same shared_ptr identity + stale + // version + dirty=0 and keep rendering last frame's blend. + m_pending_dirty = 0xFF; + return; + } + for(float& x : effWeights) x /= wsum; + + // Blend transform: translation is weighted sum; rotation is nlerp + // (weighted sum of quaternions, then normalise); scale is weighted sum. + ossia::scene_transform outX{}; + QQuaternion qSum(0, 0, 0, 0); + for(int i = 0; i < 4; ++i) + { + if(effWeights[i] <= 0.f) continue; + const float wi = effWeights[i]; + outX.translation[0] += xforms[i].translation[0] * wi; + outX.translation[1] += xforms[i].translation[1] * wi; + outX.translation[2] += xforms[i].translation[2] * wi; + outX.scale[0] += xforms[i].scale[0] * wi; + outX.scale[1] += xforms[i].scale[1] * wi; + outX.scale[2] += xforms[i].scale[2] * wi; + + // Quaternion double-cover handling: flip the sign of later quats if + // they point away from the running sum, to avoid interpolating the + // long way around. + QQuaternion qi( + xforms[i].rotation[3], xforms[i].rotation[0], + xforms[i].rotation[1], xforms[i].rotation[2]); + if(QQuaternion::dotProduct(qSum, qi) < 0.f) + qi = -qi; + qSum += qi * wi; + } + qSum.normalize(); + outX.rotation[0] = qSum.x(); + outX.rotation[1] = qSum.y(); + outX.rotation[2] = qSum.z(); + outX.rotation[3] = qSum.scalar(); + + // Blend camera parameters. + ossia::camera_component outCam{}; + outCam.projection = cams[0].projection; // projection mode not blendable + for(int i = 0; i < 4; ++i) + { + if(effWeights[i] <= 0.f) continue; + const float wi = effWeights[i]; + outCam.yfov += cams[i].yfov * wi; + outCam.aspect_ratio += cams[i].aspect_ratio * wi; + outCam.xmag += cams[i].xmag * wi; + outCam.ymag += cams[i].ymag * wi; + outCam.znear += cams[i].znear * wi; + outCam.zfar += cams[i].zfar * wi; + outCam.physical.focal_length += cams[i].physical.focal_length * wi; + outCam.physical.focus_distance += cams[i].physical.focus_distance * wi; + outCam.physical.fstop += cams[i].physical.fstop * wi; + } + + // Build the output scene_spec. + if(!m_state) + { + m_state = std::make_shared(); + m_id.value = reinterpret_cast(this) | 0x1u; + } + + auto camPtr = std::make_shared(std::move(outCam)); + auto children = std::make_shared>(); + children->push_back(outX); + children->push_back(ossia::camera_component_ptr(std::move(camPtr))); + + auto node = std::make_shared(); + node->id = m_id; + node->children = std::move(children); + + auto roots = std::make_shared>(); + roots->push_back(std::move(node)); + + m_state->roots = std::move(roots); + m_state->active_camera_id = m_id; + m_version++; + m_state->version = m_version; + m_pending_dirty = ossia::scene_port::dirty_transform; + } + + void operator()() + { + // Detect upstream camera-input pointer/version changes so a + // scene_in that changed without a local control event still causes + // a rebuild. Controls themselves trigger rebuild via their + // update() callbacks. + const ossia::scene_spec* cams[4]{ + &inputs.cam0.scene, &inputs.cam1.scene, + &inputs.cam2.scene, &inputs.cam3.scene}; + bool upstream_changed = false; + for(int i = 0; i < 4; ++i) + { + const auto* s = cams[i]->state.get(); + const int64_t v = s ? s->version : -1; + if(m_cached_cam_state[i] != s || m_cached_cam_ver[i] != v) + { + upstream_changed = true; + m_cached_cam_state[i] = s; + m_cached_cam_ver[i] = v; + } + } + + if(inputs.mode.value == ins::CameraMode::Select) + { + // Forward the picked upstream scene directly — no local + // shared_ptr identity to preserve beyond what upstream already + // maintains. + const int idx = inputs.index.value; + const ossia::scene_spec* picked = nullptr; + switch(idx) + { + case 0: picked = &inputs.cam0.scene; break; + case 1: picked = &inputs.cam1.scene; break; + case 2: picked = &inputs.cam2.scene; break; + case 3: picked = &inputs.cam3.scene; break; + default: picked = &inputs.cam0.scene; break; + } + outputs.scene_out.scene.state = picked->state; + outputs.scene_out.dirty + = (upstream_changed && picked->state) ? 0xFF : 0; + m_pending_dirty = 0; + return; + } + + if(!m_state || upstream_changed) + rebuild(); + outputs.scene_out.scene.state = m_state; + outputs.scene_out.dirty = m_pending_dirty; + m_pending_dirty = 0; + } +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/ConfigurePrimitive.cpp b/src/plugins/score-plugin-threedim/Threedim/ConfigurePrimitive.cpp new file mode 100644 index 0000000000..6dbafa8e4c --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/ConfigurePrimitive.cpp @@ -0,0 +1,236 @@ +#include "ConfigurePrimitive.hpp" + +#include + +namespace Threedim +{ + +namespace +{ + +// Minimal glob matcher — shared logic with SceneGraphFilter.cpp, but +// duplicated here to avoid pulling that TU's anonymous-namespace +// contents. Move to a shared header if a third node needs it. +bool configure_glob_match(std::string_view pattern, std::string_view text) noexcept +{ + std::size_t pi = 0, ti = 0; + std::size_t star_pi = std::string_view::npos; + std::size_t star_ti = 0; + bool star_double = false; + + while(ti < text.size()) + { + if(pi < pattern.size()) + { + char pc = pattern[pi]; + if(pc == '*') + { + star_double = (pi + 1 < pattern.size() && pattern[pi + 1] == '*'); + pi += star_double ? 2 : 1; + star_pi = pi; + star_ti = ti; + continue; + } + if(pc == '?' && text[ti] != '/') + { + ++pi; + ++ti; + continue; + } + if(pc == text[ti]) + { + ++pi; + ++ti; + continue; + } + } + if(star_pi != std::string_view::npos) + { + if(!star_double && text[star_ti] == '/') + return false; + pi = star_pi; + ++star_ti; + ti = star_ti; + continue; + } + return false; + } + while(pi < pattern.size() && pattern[pi] == '*') + ++pi; + return pi == pattern.size(); +} + +bool configure_any_match( + const std::vector& pats, std::string_view text) noexcept +{ + for(const auto& p : pats) + if(configure_glob_match(p, text)) + return true; + return false; +} + +struct PrimitiveWalker +{ + ConfigurePrimitive::Mode mode; + const std::vector& paths; + + // Returns the updated node. Shares the original shared_ptr when no + // descendant needed a change, so pointer identity is preserved for + // un-touched branches (keeps downstream caches warm). + ossia::scene_node_ptr + rewrite(const ossia::scene_node_ptr& src, const std::string& path) const + { + if(!src) + return src; + + const bool matches = configure_any_match(paths, path); + bool need_self_update = false; + bool new_active = src->active; + bool new_visible = src->visible; + + if(matches) + { + switch(mode) + { + case ConfigurePrimitive::SetActive: + new_active = true; + break; + case ConfigurePrimitive::SetInactive: + new_active = false; + break; + case ConfigurePrimitive::SetVisible: + new_visible = true; + break; + case ConfigurePrimitive::SetInvisible: + new_visible = false; + break; + case ConfigurePrimitive::SetActiveAndVisible: + new_active = true; + new_visible = true; + break; + case ConfigurePrimitive::SetInactiveAndInvisible: + new_active = false; + new_visible = false; + break; + } + need_self_update + = (new_active != src->active) || (new_visible != src->visible); + } + + if(!src->has_children()) + { + if(!need_self_update) + return src; + auto copy = std::make_shared(*src); + copy->active = new_active; + copy->visible = new_visible; + copy->dirty_index = src->dirty_index + 1; + return copy; + } + + auto new_children + = std::make_shared>(); + new_children->reserve(src->children->size()); + bool any_child_changed = false; + for(const auto& payload : *src->children) + { + if(auto* sub = ossia::get_if(&payload)) + { + if(!*sub) + { + new_children->push_back(payload); + continue; + } + std::string childPath = path + '/' + (*sub)->name; + auto rw = rewrite(*sub, childPath); + if(rw.get() != sub->get()) + any_child_changed = true; + new_children->push_back(rw ? rw : *sub); + } + else + { + new_children->push_back(payload); + } + } + + if(!need_self_update && !any_child_changed) + return src; + + auto copy = std::make_shared(*src); + copy->active = new_active; + copy->visible = new_visible; + copy->children = std::move(new_children); + copy->dirty_index = src->dirty_index + 1; + return copy; + } +}; + +} // namespace + +void ConfigurePrimitive::rebuild() +{ + const auto& in = inputs.scene_in.scene; + if(!in.state) + { + m_cached_out.reset(); + m_pending_dirty = 0; + return; + } + + const auto* in_state = in.state.get(); + const int64_t in_version = in.state->version; + + // Empty pattern list = no-op passthrough. Skip the walk entirely. + if(inputs.paths.value.empty()) + { + m_cached_out = in.state; + m_cached_in_state = in_state; + m_cached_in_version = in_version; + m_cached_mode = inputs.mode.value; + m_cached_paths = inputs.paths.value; + m_pending_dirty = 0xFF; + return; + } + + PrimitiveWalker w{Mode(inputs.mode.value), inputs.paths.value}; + auto new_roots + = std::make_shared>(); + if(in.state->roots) + { + new_roots->reserve(in.state->roots->size()); + for(const auto& r : *in.state->roots) + { + if(auto rw = w.rewrite(r, r ? ("/" + r->name) : std::string{})) + new_roots->push_back(std::move(rw)); + } + } + + auto new_state = std::make_shared(*in.state); + new_state->roots = std::move(new_roots); + new_state->version = ++m_version_counter; + new_state->dirty_index = in.state->dirty_index + 1; + + m_cached_out = new_state; + m_cached_in_state = in_state; + m_cached_in_version = in_version; + m_cached_mode = inputs.mode.value; + m_cached_paths = inputs.paths.value; + m_pending_dirty = 0xFF; +} + +void ConfigurePrimitive::operator()() +{ + // Detect upstream scene_in pointer/version change and rebuild. + // Control changes come through their update() callbacks. + const auto* in_state = inputs.scene_in.scene.state.get(); + const int64_t in_version = in_state ? in_state->version : -1; + const bool upstream_changed + = m_cached_in_state != in_state || m_cached_in_version != in_version; + if(upstream_changed || (!m_cached_out && in_state)) + rebuild(); + outputs.scene_out.scene.state = m_cached_out; + outputs.scene_out.dirty = m_pending_dirty; + m_pending_dirty = 0; +} + +} // namespace Threedim diff --git a/src/plugins/score-plugin-threedim/Threedim/ConfigurePrimitive.hpp b/src/plugins/score-plugin-threedim/Threedim/ConfigurePrimitive.hpp new file mode 100644 index 0000000000..508112f65a --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/ConfigurePrimitive.hpp @@ -0,0 +1,108 @@ +#pragma once +#include +#include + +#include + +#include +#include +#include +#include + +namespace Threedim +{ + +// Authors metadata flags on matching scene_nodes: active, visible. +// Matches Solaris's "Configure Primitive" LOP, trimmed to the flags +// that are meaningful for a live renderer. (USD also has `kind` and +// `purpose` fields; we can add those later if needed — for now they +// don't change rendering behaviour.) +// +// Usage pattern: +// glTF → ConfigurePrimitive(paths=["*/chairs/*"], active=false) → ScenePreprocessor +// disables the entire `chairs` subtree non-destructively — flipping +// the toggle re-activates it without reloading the glTF or rebuilding +// any GPU state. +// +// `visible` acts at the leaf level (hides from rendering but keeps the +// subtree composed); `active` is stronger (skips the subtree in the +// flatten walk entirely — no transforms applied, no data uploaded). +class ConfigurePrimitive +{ +public: + halp_meta(name, "Configure Primitive") + halp_meta(category, "Visuals/3D/Scene") + halp_meta(c_name, "configure_primitive") + halp_meta(authors, "ossia team") + halp_meta( + manual_url, + "https://ossia.io/score-docs/processes/configure-primitive.html") + halp_meta(uuid, "4b8e9d2a-7c5f-4e3a-9b1c-3d2f5e8a7b9c") + + enum Mode + { + // Applies the flags to every matching node. Non-matching nodes + // keep their existing flags (no change). + SetActive, + SetInactive, + SetVisible, + SetInvisible, + // Apply both at once — useful for "this subtree is off right now". + SetActiveAndVisible, + SetInactiveAndInvisible + }; + + struct ins + { + struct + { + halp_meta(name, "Scene In"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_in; + + // Port-driven rebuild: controls trigger rebuild() via update(). + // scene_in pointer/version changes detected in operator()(). + struct : halp::combobox_t<"Mode", Mode> + { + struct range + { + std::string_view values[6]{ + "Set active", "Set inactive", + "Set visible", "Set invisible", + "Active + visible", "Inactive + invisible"}; + int init{0}; + }; + void update(ConfigurePrimitive& n) { n.rebuild(); } + } mode; + + // Path-glob list. Same syntax as SceneGraphFilter: `*` wildcards + // within a segment, `**` crosses slashes, `?` single char, literal + // names otherwise. + struct : halp::val_port<"Paths", std::vector> + { void update(ConfigurePrimitive& n) { n.rebuild(); } } paths; + } inputs; + + struct outs + { + struct + { + halp_meta(name, "Scene Out"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_out; + } outputs; + + void rebuild(); + void operator()(); + + std::shared_ptr m_cached_out; + uint8_t m_pending_dirty{0xFF}; + const ossia::scene_state* m_cached_in_state{}; + int64_t m_cached_in_version{-1}; + int m_cached_mode{-1}; + std::vector m_cached_paths; + int64_t m_version_counter{0}; +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/CreateCollection.cpp b/src/plugins/score-plugin-threedim/Threedim/CreateCollection.cpp new file mode 100644 index 0000000000..a340cabdea --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/CreateCollection.cpp @@ -0,0 +1,80 @@ +#include "CreateCollection.hpp" + +namespace Threedim +{ + +void CreateCollection::rebuild() +{ + const auto& in = inputs.scene_in.scene; + const ossia::scene_state* in_state = in.state.get(); + const int64_t in_version = in_state ? in_state->version : -1; + + const auto& cur_name = inputs.name.value; + const auto& cur_paths = inputs.paths.value; + const auto& cur_tags = inputs.tags.value; + + m_cached_in_state = in_state; + m_cached_in_version = in_version; + + // An empty name is a no-op — pass the input through so the node is + // safe to wire in even before the user fills in the Name field. + if(cur_name.empty() || cur_paths.empty()) + { + m_cached_out = in.state; + m_pending_dirty = 0xFF; + return; + } + + auto coll = std::make_shared(); + coll->name = cur_name; + for(const auto& p : cur_paths) + coll->paths.push_back(p); + for(const auto& t : cur_tags) + coll->tags.push_back(t); + + // Rebuild the collections vector: copy existing entries whose name + // doesn't collide with ours (overwriting duplicates keeps the + // interaction model simple — each CreateCollection "owns" its name), + // then append the new one. + auto merged = std::make_shared>(); + if(in_state && in_state->collections) + { + for(const auto& c : *in_state->collections) + if(c && c->name != cur_name) + merged->push_back(c); + } + merged->push_back(std::move(coll)); + + auto state = std::make_shared(); + if(in_state) + { + state->roots = in_state->roots; + state->materials = in_state->materials; + state->animations = in_state->animations; + state->cameras = in_state->cameras; + state->skeletons = in_state->skeletons; + state->environment = in_state->environment; + state->active_camera_id = in_state->active_camera_id; + } + state->collections = std::move(merged); + state->version = ++m_version_counter; + state->dirty_index = m_version_counter; + + m_cached_out = state; + m_pending_dirty = 0xFF; +} + +void CreateCollection::operator()() +{ + const auto* in_state = inputs.scene_in.scene.state.get(); + const int64_t in_version = in_state ? in_state->version : -1; + const bool upstream_changed + = m_cached_in_state != in_state || m_cached_in_version != in_version; + if(!m_cached_out || upstream_changed) + rebuild(); + outputs.scene_out.scene.state = m_cached_out; + outputs.scene_out.dirty = m_pending_dirty; + m_pending_dirty = 0; +} + +} // namespace Threedim diff --git a/src/plugins/score-plugin-threedim/Threedim/CreateCollection.hpp b/src/plugins/score-plugin-threedim/Threedim/CreateCollection.hpp new file mode 100644 index 0000000000..38a1055f92 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/CreateCollection.hpp @@ -0,0 +1,79 @@ +#pragma once +#include +#include + +#include + +#include +#include +#include +#include + +namespace Threedim +{ + +// Authors a named collection (Solaris-style reusable path set) and +// stamps it onto the passthrough scene_spec's collections vector. +// +// Collections are addressable by name anywhere downstream — a consumer +// node that takes a collection name (e.g. a future SceneGraphFilter +// "by collection" mode) resolves the paths at consume-time. This +// decouples "what is the set of things I care about?" from "what am I +// doing to them?" — the classic Solaris LIVRPS composition win. +// +// Multiple CreateCollection nodes can chain: each contributes its own +// named collection to the scene, and downstream consumers can pick any +// of them by name. merge_scenes concatenates collections additively +// across multi-producer merges. +class CreateCollection +{ +public: + halp_meta(name, "Create Collection") + halp_meta(category, "Visuals/3D/Scene") + halp_meta(c_name, "create_collection") + halp_meta(authors, "ossia team") + halp_meta( + manual_url, + "https://ossia.io/score-docs/processes/create-collection.html") + halp_meta(uuid, "6c2e9b7a-4d3f-4a1c-8f5e-2b7d9e4c3a1f") + + struct ins + { + struct + { + halp_meta(name, "Scene In"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_in; + + // Port-driven rebuild: controls trigger rebuild(); upstream + // scene_in changes detected in operator()(). + struct : halp::lineedit<"Name", ""> + { void update(CreateCollection& n) { n.rebuild(); } } name; + struct : halp::val_port<"Paths", std::vector> + { void update(CreateCollection& n) { n.rebuild(); } } paths; + struct : halp::val_port<"Tags", std::vector> + { void update(CreateCollection& n) { n.rebuild(); } } tags; + } inputs; + + struct outs + { + struct + { + halp_meta(name, "Scene Out"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_out; + } outputs; + + void rebuild(); + void operator()(); + + std::shared_ptr m_cached_out; + uint8_t m_pending_dirty{0xFF}; + const ossia::scene_state* m_cached_in_state{}; + int64_t m_cached_in_version{-1}; + int64_t m_version_counter{0}; +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/CubemapComposer.hpp b/src/plugins/score-plugin-threedim/Threedim/CubemapComposer.hpp index c5783a3907..c6f4fcaf43 100644 --- a/src/plugins/score-plugin-threedim/Threedim/CubemapComposer.hpp +++ b/src/plugins/score-plugin-threedim/Threedim/CubemapComposer.hpp @@ -6,8 +6,13 @@ #include +#include + #include +#include +#include + namespace Threedim { @@ -34,12 +39,50 @@ class CubemapComposer struct { - halp::gpu_texture_output<"Cubemap"> cubemap; + halp::gpu_cubemap_output<"Cubemap"> cubemap; + // Scene-graph route: emits a scene_spec whose environment.skybox_texture + // points at our cube handle. See CubemapLoader for the same pattern. + struct + { + halp_meta(name, "Scene"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_out; } outputs; + // Per-face shape cache. Drives texture-recreation when face size changes. + // Content-change detection uses the producer's `changed` flag instead of + // a bytes-pointer compare — pointer identity missed in-place buffer + // updates (video readback into a ring buffer reuses the same pointer + // address, so the old fingerprint check stayed equal across content + // changes and the cube never re-uploaded). + struct FaceFingerprint + { + int width{0}; + int height{0}; + }; + QRhiTexture* m_cubemapTex{}; int m_faceSize{0}; bool m_dirty{true}; + FaceFingerprint m_lastFaces[6]{}; + std::shared_ptr m_sceneState; + int64_t m_sceneVersion{0}; + void* m_lastPublishedHandle{}; + + // Dtor safety net — same rationale as CubemapLoader: guarantees the + // VkImage is deleteLater'd even if release(RenderList&) was skipped, + // so QRhi's destructor drains the pending-delete list before + // vkDestroyDevice. Without this, Vulkan validation reports a leaked + // VkImage on exit. + ~CubemapComposer() + { + if(m_cubemapTex) + { + m_cubemapTex->deleteLater(); + m_cubemapTex = nullptr; + } + } void operator()() { } @@ -52,9 +95,28 @@ class CubemapComposer score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res, score::gfx::Edge* e) { - // Determine face size from the largest input + // Determine face size from the largest input; detect content changes + // by reading the producer's `changed` flag (set by halp::texture's + // update() — see avendish texture_formats.hpp). Resetting `changed` + // to false after consumption keeps the next frame's check fresh. + // Size changes are tracked separately so a producer that resizes the + // face still triggers a texture recreation even when it forgot to + // toggle `changed`. int maxSize = 0; - auto checkFace = [&](const auto& tex) { + int faceIdx = 0; + auto checkFace = [&](auto& tex) { + FaceFingerprint cur{tex.texture.width, tex.texture.height}; + const bool sizeChanged + = (cur.width != m_lastFaces[faceIdx].width + || cur.height != m_lastFaces[faceIdx].height); + const bool contentChanged = tex.texture.changed; + if(sizeChanged || contentChanged) + { + m_lastFaces[faceIdx] = cur; + m_dirty = true; + } + tex.texture.changed = false; // consumed; producer will set it on next update() + ++faceIdx; if(tex.texture.bytes && tex.texture.width > 0 && tex.texture.height > 0) { int s = std::max(tex.texture.width, tex.texture.height); @@ -94,6 +156,22 @@ class CubemapComposer outputs.cubemap.texture.handle = m_cubemapTex; m_dirty = true; } + + // Publish the cube on the Scene outlet (skybox_texture only — other + // environment fields are left for EnvironmentLoader / elsewhere to + // populate, merge_scenes overlays field-by-field). + if(!m_sceneState) + m_sceneState = std::make_shared(); + if(m_lastPublishedHandle != m_cubemapTex) + { + m_sceneState->environment = {}; + m_sceneState->environment.skybox_texture.native_handle = m_cubemapTex; + m_lastPublishedHandle = m_cubemapTex; + m_sceneVersion++; + m_sceneState->version = m_sceneVersion; + outputs.scene_out.scene.state = m_sceneState; + outputs.scene_out.dirty = ossia::scene_port::dirty_environment; + } } void release(score::gfx::RenderList& r) @@ -105,6 +183,14 @@ class CubemapComposer } m_faceSize = 0; outputs.cubemap.texture.handle = nullptr; + if(m_sceneState) + { + m_sceneState->environment = {}; + m_lastPublishedHandle = nullptr; + m_sceneVersion++; + m_sceneState->version = m_sceneVersion; + outputs.scene_out.dirty = ossia::scene_port::dirty_environment; + } } void runInitialPasses( diff --git a/src/plugins/score-plugin-threedim/Threedim/CubemapLoader.cpp b/src/plugins/score-plugin-threedim/Threedim/CubemapLoader.cpp index b3d596e42e..73b6d32297 100644 --- a/src/plugins/score-plugin-threedim/Threedim/CubemapLoader.cpp +++ b/src/plugins/score-plugin-threedim/Threedim/CubemapLoader.cpp @@ -2,17 +2,24 @@ #include -#include -#include +#include #include namespace Threedim { -// Fullscreen triangle vertex shader +// Fullscreen triangle vertex shader. Applies clipSpaceCorrMatrix + the +// non-GL conditional Y-flip — matches the engine-wide ossia convention +// (see isf.cpp's vertexInitFunc). Guarantees v_texcoord.y=1 is the top +// of the rendered face across GL / Vulkan / Metal / D3D. static const constexpr auto equirect_vs = R"_(#version 450 +layout(std140, binding = 0) uniform renderer_t { + mat4 clipSpaceCorrMatrix; + vec2 RENDERSIZE; +} renderer; + layout(location = 0) out vec2 v_texcoord; out gl_PerVertex { vec4 gl_Position; }; @@ -22,25 +29,39 @@ void main() // Fullscreen triangle vec2 pos = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2); v_texcoord = pos; - gl_Position = vec4(pos * 2.0 - 1.0, 0.0, 1.0); + gl_Position = renderer.clipSpaceCorrMatrix * vec4(pos * 2.0 - 1.0, 0.0, 1.0); +#if defined(QSHADER_SPIRV) || defined(QSHADER_HLSL) || defined(QSHADER_MSL) + gl_Position.y = -gl_Position.y; +#endif } )_"; -// Fragment shader: sample equirectangular map for a specific cubemap face -// The face index is passed via UBO +// Fragment shader: sample equirectangular map for a specific cubemap face. +// renderer_t (binding 0) matches the engine convention; FaceInfo moves to +// binding 2 so it doesn't collide. static const constexpr auto equirect_fs = R"_(#version 450 +layout(std140, binding = 0) uniform renderer_t { + mat4 clipSpaceCorrMatrix; + vec2 RENDERSIZE; +} renderer; + layout(location = 0) in vec2 v_texcoord; layout(location = 0) out vec4 fragColor; -layout(std140, binding = 0) uniform FaceInfo { +layout(std140, binding = 2) uniform FaceInfo { int faceIndex; } face; -layout(binding = 1) uniform sampler2D equirectMap; +layout(binding = 3) uniform sampler2D equirectMap; const float PI = 3.14159265358979323846; +// Face direction — v_texcoord.y=1 is the TOP of the rendered face +// (after the vertex stage's clipSpaceCorrMatrix + non-GL flip). This +// maps to sampled UV.y=0 in QRhi's top-left-origin UV, which per cube +// spec corresponds to cube-spec t=-1 → direction biased toward +Y. +// Hence the signs on `v` (flipped vs. the legacy raw-NDC form). vec3 faceDirection(int faceIdx, vec2 uv) { // Map UV from [0,1] to [-1,1] @@ -50,12 +71,12 @@ vec3 faceDirection(int faceIdx, vec2 uv) // QRhi cubemap face order: +X, -X, +Y, -Y, +Z, -Z switch(faceIdx) { - case 0: return vec3( 1.0, -v, -u); // +X - case 1: return vec3(-1.0, -v, u); // -X - case 2: return vec3( u, 1.0, v); // +Y - case 3: return vec3( u, -1.0, -v); // -Y - case 4: return vec3( u, -v, 1.0); // +Z - case 5: return vec3( -u, -v, -1.0); // -Z + case 0: return vec3( 1.0, v, -u); // +X + case 1: return vec3(-1.0, v, u); // -X + case 2: return vec3( u, 1.0, -v); // +Y + case 3: return vec3( u, -1.0, v); // -Y + case 4: return vec3( u, v, 1.0); // +Z + case 5: return vec3( -u, v, -1.0); // -Z default: return vec3(0.0); } } @@ -64,44 +85,43 @@ void main() { vec3 dir = normalize(faceDirection(face.faceIndex, v_texcoord)); - // Convert direction to equirectangular UV - float theta = atan(dir.z, dir.x); // [-PI, PI] - float phi = asin(clamp(dir.y, -1.0, 1.0)); // [-PI/2, PI/2] + // Convert direction to equirectangular UV. + // Longitude: atan2(z, x) ∈ [-π, π] → u ∈ [0, 1]. + // Latitude: asin(y) ∈ [-π/2, π/2]. + // + // Y flip: QRhi normalizes texture sampling to top-left-origin UV + // (UV.y = 0 at the top of the stored image — uniform across + // backends, see qrhi.cpp + QRhi::isYUpInFramebuffer). QImage + // uploads via uploadTexture(QImage) land scanline 0 at the + // texture's UV.y = 0, so sky (image top) is at UV.y = 0 and + // ground (image bottom) at UV.y = 1. The raw formula + // `v = phi/π + 0.5` would put sky at UV.y = 1 — wrong. Flip. + // + // LearnOpenGL uses the unflipped formula and works because GL's + // bottom-left-origin UV cancels the inversion — QRhi's top-left + // convention doesn't cancel it, so we flip explicitly. + // + // (Cube-face rendering side: this shader, like the rest of the + // IBL / test-cube shader family, writes raw NDC without + // clipSpaceCorrMatrix. That choice is backend-specific — the + // face-direction convention in `faceDirection()` above matches + // what Vulkan / Metal / D3D store after rasterization. Under + // OpenGL the whole cube content ends up vertically flipped — + // normalising that would require either applying + // clipSpaceCorrMatrix across every shader in the family OR + // conditionally flipping v_texcoord by isYUpInFramebuffer. + // Out of scope for this edit.) + float theta = atan(dir.z, dir.x); + float phi = asin(clamp(dir.y, -1.0, 1.0)); vec2 equirectUV; equirectUV.x = theta / (2.0 * PI) + 0.5; - equirectUV.y = phi / PI + 0.5; + equirectUV.y = 0.5 - phi / PI; fragColor = texture(equirectMap, equirectUV); } )_"; -void CubemapLoader::loadImage() -{ - const auto& path = inputs.image.value; - if(path.empty()) - { - m_loadedImage = QImage{}; - return; - } - - QString qpath = QString::fromStdString(path); - if(!QFileInfo::exists(qpath)) - { - m_loadedImage = QImage{}; - return; - } - - QImage img(qpath); - if(img.isNull()) - { - m_loadedImage = QImage{}; - return; - } - - m_loadedImage = img.convertToFormat(QImage::Format_RGBA8888); -} - QImage CubemapLoader::extractFace(int faceIndex) const { if(m_loadedImage.isNull()) @@ -179,6 +199,23 @@ void CubemapLoader::createCubemapTexture(QRhi& rhi, int faceSize) m_cubemapTex->create(); outputs.cubemap.texture.handle = m_cubemapTex; + + // Publish the cube on the Scene outlet too: one shared_ptr-stable + // scene_state whose environment.skybox_texture.native_handle points at + // our QRhiTexture. Version bumps only when the handle actually changes + // so merge_scenes / ScenePreprocessor short-circuit unchanged frames. + if(!m_sceneState) + m_sceneState = std::make_shared(); + if(m_lastPublishedHandle != m_cubemapTex) + { + m_sceneState->environment = {}; // only skybox_texture is ours to touch + m_sceneState->environment.skybox_texture.native_handle = m_cubemapTex; + m_lastPublishedHandle = m_cubemapTex; + m_sceneVersion++; + m_sceneState->version = m_sceneVersion; + outputs.scene_out.scene.state = m_sceneState; + outputs.scene_out.dirty = ossia::scene_port::dirty_environment; + } } void CubemapLoader::releaseCubemapTexture() @@ -204,9 +241,20 @@ void CubemapLoader::releaseCubemapTexture() } m_faceSize = 0; outputs.cubemap.texture.handle = nullptr; + + // Clear the scene outlet too: downstream merge_scenes will stop + // contributing a skybox_texture from us once the handle goes null. + if(m_sceneState) + { + m_sceneState->environment = {}; + m_lastPublishedHandle = nullptr; + m_sceneVersion++; + m_sceneState->version = m_sceneVersion; + outputs.scene_out.dirty = ossia::scene_port::dirty_environment; + } } -void CubemapLoader::releaseEquirectResources() +void CubemapLoader::releaseEquirectResources(score::gfx::RenderList* renderer) { if(m_equirectPipeline) { @@ -220,7 +268,10 @@ void CubemapLoader::releaseEquirectResources() } if(m_equirectUbo) { - m_equirectUbo->deleteLater(); + if(renderer) + renderer->releaseBuffer(m_equirectUbo); + else + m_equirectUbo->deleteLater(); m_equirectUbo = nullptr; } if(m_equirectSampler) @@ -235,7 +286,10 @@ void CubemapLoader::releaseEquirectResources() } if(m_quadVbuf) { - m_quadVbuf->deleteLater(); + if(renderer) + renderer->releaseBuffer(m_quadVbuf); + else + m_quadVbuf->deleteLater(); m_quadVbuf = nullptr; } } @@ -247,6 +301,7 @@ void CubemapLoader::setupEquirectPipeline(score::gfx::RenderList& renderer) // UBO for face index m_equirectUbo = rhi.newBuffer( QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, sizeof(int32_t) * 4); + m_equirectUbo->setName("CubemapLoader::equirect_ubo"); m_equirectUbo->create(); // Sampler for equirectangular source @@ -255,15 +310,25 @@ void CubemapLoader::setupEquirectPipeline(score::gfx::RenderList& renderer) QRhiSampler::Repeat, QRhiSampler::ClampToEdge); m_equirectSampler->create(); - // SRB + // SRB — matches the new shader layout: + // binding 0: renderer_t (shared engine UBO with clipSpaceCorrMatrix) + // binding 2: FaceInfo (our per-face index) + // binding 3: equirectangular source sampler + // Binding 1 is reserved for the engine's process_t UBO convention + // (not used here, but skipped to avoid future collisions). m_equirectSrb = rhi.newShaderResourceBindings(); m_equirectSrb->setBindings( {QRhiShaderResourceBinding::uniformBuffer( 0, + QRhiShaderResourceBinding::VertexStage + | QRhiShaderResourceBinding::FragmentStage, + &renderer.outputUBO()), + QRhiShaderResourceBinding::uniformBuffer( + 2, QRhiShaderResourceBinding::FragmentStage, m_equirectUbo), QRhiShaderResourceBinding::sampledTexture( - 1, + 3, QRhiShaderResourceBinding::FragmentStage, m_equirectTex, m_equirectSampler)}); @@ -312,14 +377,31 @@ void CubemapLoader::update( score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res, score::gfx::Edge* e) { - if(!m_imageChanged) - return; - - loadImage(); + // No-op on the render thread. The decode runs on the halp file-port + // worker (see image_t::process in CubemapLoader.hpp) which delivers + // the decoded QImage to m_loadedImage and sets m_imageChanged. + // runInitialPasses() picks that up and uploads + transcodes the cube. } void CubemapLoader::release(score::gfx::RenderList& r) { + releaseEquirectResources(&r); + releaseCubemapTexture(); +} + +CubemapLoader::~CubemapLoader() +{ + // Safety net — idempotent. releaseEquirectResources() and + // releaseCubemapTexture() null each pointer after deleteLater(), so + // calling them again is a no-op if the framework already ran + // release(RenderList&). + if(m_cubemapTex || m_equirectTex) + { + qDebug() << "[BUFTRACE] ~CubemapLoader FALLBACK this=" << (void*)this + << " m_cubemapTex=" << (void*)m_cubemapTex + << " m_equirectTex=" << (void*)m_equirectTex + << " (release(RenderList&) was never called — leaked textures)"; + } releaseEquirectResources(); releaseCubemapTexture(); } @@ -390,14 +472,21 @@ void CubemapLoader::renderEquirectangular( } else { - // Update SRB if equirect texture changed + // Update SRB if equirect texture changed. Mirror the slot layout + // from setupEquirectPipeline: binding 0 = engine renderer_t, + // binding 2 = FaceInfo, binding 3 = equirect sampler. m_equirectSrb->setBindings( {QRhiShaderResourceBinding::uniformBuffer( 0, + QRhiShaderResourceBinding::VertexStage + | QRhiShaderResourceBinding::FragmentStage, + &renderer.outputUBO()), + QRhiShaderResourceBinding::uniformBuffer( + 2, QRhiShaderResourceBinding::FragmentStage, m_equirectUbo), QRhiShaderResourceBinding::sampledTexture( - 1, + 3, QRhiShaderResourceBinding::FragmentStage, m_equirectTex, m_equirectSampler)}); diff --git a/src/plugins/score-plugin-threedim/Threedim/CubemapLoader.hpp b/src/plugins/score-plugin-threedim/Threedim/CubemapLoader.hpp index 26d0a0ddf4..d1bdd65d19 100644 --- a/src/plugins/score-plugin-threedim/Threedim/CubemapLoader.hpp +++ b/src/plugins/score-plugin-threedim/Threedim/CubemapLoader.hpp @@ -1,15 +1,22 @@ #pragma once #include +#include #include #include #include +#include + #include #include +#include +#include +#include + namespace Threedim { @@ -35,9 +42,37 @@ class CubemapLoader struct ins { - struct : halp::lineedit<"Image", ""> + // File-port boilerplate — same pattern as ImageLoader. process() + // runs on the file-load worker thread, decodes the image off the + // render thread, returns a lambda that stages the decoded QImage + // onto the node from the execution thread. See diagnostic 041 — + // the previous lineedit<…> path called QImage(qpath) from update() + // on the render thread, blocking command recording for many frames + // on a large cube cross / equirect HDR. + struct image_t : halp::file_port<"Image", halp::mmap_file_view> { - void update(CubemapLoader& self) { self.m_imageChanged = true; } + halp_meta(extensions, + "Images (*.png *.jpg *.jpeg *.bmp *.tga *.webp *.tif *.tiff *.hdr *.exr)"); + static std::function process(file_type data) + { + QImage img; + if(!data.bytes.empty()) + { + img.loadFromData( + reinterpret_cast(data.bytes.data()), + (int)data.bytes.size()); + } + if(img.isNull() && !data.filename.empty()) + { + img = QImage(data.filename.data()); + } + if(!img.isNull() && img.format() != QImage::Format_RGBA8888) + img = img.convertToFormat(QImage::Format_RGBA8888); + return [img = std::move(img)](CubemapLoader& self) mutable { + self.m_loadedImage = std::move(img); + self.m_imageChanged = true; + }; + } } image; struct : halp::enum_t @@ -53,9 +88,32 @@ class CubemapLoader struct { - halp::gpu_texture_output<"Cubemap"> cubemap; + // Raw cube texture — kept for consumers that want the handle + // directly (e.g. a bare-skybox rendering shader). Tagged via the + // new halp::gpu_cubemap_output so sinks know to grab-from-source + // rather than allocate a 2D render target. + halp::gpu_cubemap_output<"Cubemap"> cubemap; + + // Scene-graph output: a scene_spec whose scene_environment has only + // skybox_texture.native_handle populated (no ambient / fog / etc., + // no roots). Lets users wire the cubemap into a scene without a + // side-channel cable — merge_scenes's per-field env overlay folds + // it together with an EnvironmentLoader's params independent of + // wiring order. + struct + { + halp_meta(name, "Scene"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_out; } outputs; + // Stable scene_state identity so downstream scene-identity caches + // (ScenePreprocessor, merge_scenes passthrough) stay hot across frames. + std::shared_ptr m_sceneState; + int64_t m_sceneVersion{0}; + void* m_lastPublishedHandle{}; + // GPU resources QRhiTexture* m_cubemapTex{}; QRhiTexture* m_equirectTex{}; @@ -79,6 +137,16 @@ class CubemapLoader void operator()() { } + // Dtor safety net: if the renderer framework's release(RenderList&) + // path was skipped (e.g. a reconcile path that deletes the renderer + // without first calling release — or any future code that drops the + // GfxRenderer's shared_ptr without going through + // CpuFilterNode::releaseState), any still-live textures and GPU + // resources go to deleteLater here so QRhi's destructor can collect + // them before vkDestroyDevice. Without this the Vulkan validation + // layer flags "VkImage has not been destroyed" on app exit. + ~CubemapLoader(); + void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res); void update( score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res, @@ -89,10 +157,16 @@ class CubemapLoader QRhiResourceUpdateBatch*& res, score::gfx::Edge& edge); private: - void loadImage(); void createCubemapTexture(QRhi& rhi, int faceSize); void releaseCubemapTexture(); - void releaseEquirectResources(); + // `renderer` is optional: when non-null QRhiBuffers go through + // RenderList::releaseBuffer (the project-wide lifetime invariant); + // when null (dtor fallback, after the RenderList itself may have + // already been destroyed) we fall back to direct deleteLater. + // Textures always deleteLater directly — they're not tracked in + // RenderList::m_vertexBuffers, so the double-free risk only applies + // to buffers. + void releaseEquirectResources(score::gfx::RenderList* renderer = nullptr); void uploadCrossOrStrip(QRhiResourceUpdateBatch* res); void renderEquirectangular( diff --git a/src/plugins/score-plugin-threedim/Threedim/EnvironmentLoader.cpp b/src/plugins/score-plugin-threedim/Threedim/EnvironmentLoader.cpp new file mode 100644 index 0000000000..055bf9d0cb --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/EnvironmentLoader.cpp @@ -0,0 +1,153 @@ +#include "EnvironmentLoader.hpp" + +#include +#include + +#include + +namespace Threedim +{ + +void EnvironmentLoader::rebuild() +{ + if(!m_state) + { + m_state = std::make_shared(); + m_state->roots = std::make_shared>(); + } + + auto& env = m_state->environment; + // Reset: this node only sets the ambient / exposure / gamma / fog + // groups. It does NOT touch skybox_texture / IBL handles — those + // come from cube-texture producers (CubemapLoader, …) that emit + // their own scene_spec with only the relevant fields populated. + // merge_scenes overlays field-by-field using the params_set mask. + env = {}; + + env.ambient_color[0] = inputs.ambient_color.value.x; + env.ambient_color[1] = inputs.ambient_color.value.y; + env.ambient_color[2] = inputs.ambient_color.value.z; + env.ambient_intensity = inputs.ambient_intensity.value; + // Photographic exposure: EV100 is the scene anchor, exposure_stops is + // the user-facing fine-tune (analogous to a camera's ±EV dial). The + // standard formula is `mul = stops_gain / (K * 2^EV100)`; we use K=1 + // so EV100 = 0, stops = 0 leaves `env.exposure = 1` (preserving + // backward compat with scenes from before EV100 existed). Switch to + // the photometric K=1.2 (Frostbite/UE/Filament) once tone-mapping + // post-processes are the norm — at that point a non-unit default + // multiplier stops being surprising. + constexpr float K = 1.0f; + env.exposure = std::exp2(inputs.exposure_stops.value) + / (K * std::exp2(inputs.ev100.value)); + env.gamma = inputs.gamma.value; + env.fog.enabled = inputs.fog_enabled.value; + env.fog.color[0] = inputs.fog_color.value.x; + env.fog.color[1] = inputs.fog_color.value.y; + env.fog.color[2] = inputs.fog_color.value.z; + env.fog.start = inputs.fog_start.value; + env.fog.end = inputs.fog_end.value; + + env.params_set = ossia::scene_environment::params_ambient + | ossia::scene_environment::params_exposure_gamma + | ossia::scene_environment::params_fog; + + // Render target size: only publish the overlay when both dimensions + // are positive. 0,0 (the default) means "let downstream fall back to + // the RenderList swap-chain size" — don't stamp the bit so other + // branches with legitimate sizes can still win the merge. + if(inputs.render_target_size.value.x > 0 + && inputs.render_target_size.value.y > 0) + { + env.render_target_size[0] = (uint32_t)inputs.render_target_size.value.x; + env.render_target_size[1] = (uint32_t)inputs.render_target_size.value.y; + env.params_set |= ossia::scene_environment::params_render_target_size; + } + + // Propagate the Env arena slot ref so the preprocessor can resolve + // our slot via ossia::gpu_slot_ref. m_env_ref is populated once in + // init() on the render thread — here on the execution thread we + // just copy the POD value. It stays zero (invalid) until init() runs, + // which is fine: preprocessor's isLive() will reject a zero ref. + env.raw_slot = m_env_ref; + + m_version++; + m_state->version = m_version; + m_pending_dirty = ossia::scene_port::dirty_environment; +} + +void EnvironmentLoader::operator()() +{ + if(!m_state) + rebuild(); + outputs.scene_out.scene.state = m_state; + outputs.scene_out.dirty = m_pending_dirty; + m_pending_dirty = 0; +} + +void EnvironmentLoader::init( + score::gfx::RenderList& r, QRhiResourceUpdateBatch& res) +{ + // Claim one slot in the Env arena for this node's lifetime. Kept in + // env_slot; released in release() below. The slot's offset + buffer + // are stable — consumer shaders bind r.registry().buffer(Env) with + // registry.slotOffset(env_slot) as the range base. + if(!env_slot.valid()) + { + env_slot = r.registry().allocate( + score::gfx::GpuResourceRegistry::Arena::Env, + sizeof(score::gfx::EnvParamsUBO)); + m_env_ref = r.registry().toOssiaRef(env_slot); + } + // Seed the slot with default-constructed bytes so downstream consumers + // that sample the slot before operator()() has ever run see a sane + // neutral environment rather than undefined memory. + if(env_slot.valid()) + { + score::gfx::EnvParamsUBO seed{}; + r.registry().updateSlot(res, env_slot, &seed, sizeof(seed)); + } +} + +void EnvironmentLoader::update( + score::gfx::RenderList& r, QRhiResourceUpdateBatch& res, + score::gfx::Edge*) +{ + // Render-thread path: pack the current CPU-side scene_environment into + // the EnvParamsUBO layout and upload to our slot. CpuFilterNode runs + // processControlIn before calling us, so `inputs.*.value` already + // reflects the latest control state — and operator()() has already + // run this frame, so m_state->environment holds the freshest data. + if(!env_slot.valid() || !m_state) + return; + + const auto& env = m_state->environment; + score::gfx::EnvParamsUBO gpu{}; + gpu.ambient[0] = env.ambient_color[0]; + gpu.ambient[1] = env.ambient_color[1]; + gpu.ambient[2] = env.ambient_color[2]; + gpu.ambient[3] = env.ambient_intensity; + gpu.fog_color_density[0] = env.fog.color[0]; + gpu.fog_color_density[1] = env.fog.color[1]; + gpu.fog_color_density[2] = env.fog.color[2]; + gpu.fog_color_density[3] = env.fog.density; + gpu.fog_range[0] = env.fog.start; + gpu.fog_range[1] = env.fog.end; + gpu.fog_range[2] = float(env.fog.mode); + gpu.fog_range[3] = env.fog.enabled ? 1.f : 0.f; + gpu.exposure_gamma[0] = env.exposure; + gpu.exposure_gamma[1] = env.gamma; + gpu.exposure_gamma[2] = 0.f; + gpu.exposure_gamma[3] = 0.f; + r.registry().updateSlot(res, env_slot, &gpu, sizeof(gpu)); +} + +void EnvironmentLoader::release(score::gfx::RenderList& r) +{ + if(env_slot.valid()) + r.registry().free(env_slot); + m_env_ref = {}; + // Producer-state-drift Option A — see Light::release. + m_state.reset(); +} + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/EnvironmentLoader.hpp b/src/plugins/score-plugin-threedim/Threedim/EnvironmentLoader.hpp new file mode 100644 index 0000000000..5cf8e8d5a1 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/EnvironmentLoader.hpp @@ -0,0 +1,146 @@ +#pragma once +#include +#include + +#include + +#include + +#include +#include + +class QRhiResourceUpdateBatch; + +namespace score::gfx +{ +class RenderList; +struct Edge; +} + +namespace Threedim +{ + +// Scene-producing node that defines the environment of a scene: +// ambient light, exposure, gamma, fog. +// +// Pairs with the project-wide scene_spec merge rule: environment is +// merged field-by-field using the params_set bitmask. This node sets +// only ambient / exposure-gamma / fog bits — skybox texture and IBL +// handles are owned by CubemapLoader / CubemapComposer (and a future +// EnvironmentPrecompute for real IBL) and overlay cleanly via +// merge_scenes. +// +// Downstream pipeline: +// - `ossia::merge_scenes` overlays this environment onto the merged +// scene_state — field groups without matching bits pass through +// from whichever producer set them. +// - ScenePreprocessor packs scene_environment fields into an Environment +// Params UBO (auto-bound as aux buffer on Geometry Out). +// - classic_pbr_ibl shaders read the UBO for ambient / exposure / fog. +class EnvironmentLoader +{ +public: + halp_meta(name, "Environment") + halp_meta(c_name, "environment_loader") + halp_meta(category, "Visuals/3D") + halp_meta(authors, "ossia team") + halp_meta(uuid, "d3f5a8c1-8b47-4e91-9c2d-6f1a9b5e3c82") + + struct ins + { + // Port-driven rebuild: each control's update() callback fires only + // on real change, triggering EnvironmentLoader::rebuild(). + struct : halp::xyz_spinboxes_f32<"Ambient Color", halp::range{0., 1., 0.03}> + { void update(EnvironmentLoader& n) { n.rebuild(); } } ambient_color; + struct : halp::hslider_f32<"Ambient Intensity", halp::range{0., 8., 1.}> + { void update(EnvironmentLoader& n) { n.rebuild(); } } ambient_intensity; + + // Photographic exposure value at ISO 100. Describes the scene's + // expected brightness in photometric terms; downstream shaders + // compensate so brighter scenes (higher EV100) display darker + // without manual rebalancing. Reference values: + // EV100 ≈ -3 moonlit night + // EV100 ≈ 3 indoor lighting + // EV100 ≈ 12 midday outdoor + // EV100 ≈ 16 direct sunlight + // EV100 = 0 leaves the linear multiplier at 1× (combined with the + // default exposure_stops below it), preserving backward + // compatibility with scenes authored before EV100 existed. + struct : halp::hslider_f32<"Exposure EV100", halp::range{-6., 18., 0.}> + { void update(EnvironmentLoader& n) { n.rebuild(); } } ev100; + + // Fine-tune compensation atop EV100, in stops (±EV). Same role as + // a photographer's "exposure compensation" dial: ev100 sets the + // photographic anchor, exposure_stops biases above/below. + struct : halp::hslider_f32<"Exposure (stops)", halp::range{-8., 8., 0.}> + { void update(EnvironmentLoader& n) { n.rebuild(); } } exposure_stops; + struct : halp::hslider_f32<"Gamma", halp::range{1., 3., 2.2}> + { void update(EnvironmentLoader& n) { n.rebuild(); } } gamma; + + struct : halp::toggle<"Fog"> + { void update(EnvironmentLoader& n) { n.rebuild(); } } fog_enabled; + struct : halp::xyz_spinboxes_f32<"Fog Color", halp::range{0., 1., 0.8}> + { void update(EnvironmentLoader& n) { n.rebuild(); } } fog_color; + struct : halp::hslider_f32<"Fog Start", halp::range{0., 1000., 10.}> + { void update(EnvironmentLoader& n) { n.rebuild(); } } fog_start; + struct : halp::hslider_f32<"Fog End", halp::range{0., 10000., 100.}> + { void update(EnvironmentLoader& n) { n.rebuild(); } } fog_end; + + // Downstream render-target dimensions (width, height). Stamped on + // scene_environment::render_target_size + params_render_target_size + // bit when both values > 0. Overrides the preprocessor's default + // derivation from the RenderList swap chain. + struct : halp::xy_spinboxes_i32<"Render target size", halp::range{0, 16384, 0}> + { void update(EnvironmentLoader& n) { n.rebuild(); } } render_target_size; + } inputs; + + struct outs + { + struct + { + halp_meta(name, "Scene"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_out; + } outputs; + + // Rebuild m_state from current inputs. Invoked by each port's + // update() callback on real control changes. operator()() just + // republishes m_state, so the emitted shared_ptr + version stay + // stable when nothing changed — keeps every downstream cache hot. + void rebuild(); + void operator()(); + + // Render-thread GPU hooks, invoked by CpuFilterNode. init allocates a + // slot in the Env arena once; update rebuilds the EnvParamsUBO bytes + // and uploads them into the slot (ScenePreprocessor will later pick + // these up directly instead of repacking the CPU struct — producer + // half only for now); release returns the slot. + void init(score::gfx::RenderList& r, QRhiResourceUpdateBatch& res); + void update( + score::gfx::RenderList& r, QRhiResourceUpdateBatch& res, + score::gfx::Edge* e); + void release(score::gfx::RenderList& r); + + // Invariant identity for the shared scene_environment struct we emit — + // holding one stable scene_state across frames lets downstream + // `scene.state.get()` comparisons short-circuit the no-op case. We + // mutate the state's environment in place on parameter changes. + std::shared_ptr m_state; + int64_t m_version{0}; + uint8_t m_pending_dirty{ossia::scene_port::dirty_environment}; + + // Slot in RenderList::registry().buffer(Env). Allocated in init(), + // written in update(), freed in release(). + score::gfx::GpuResourceRegistry::Slot env_slot; + + // Ossia-facing snapshot of env_slot, stamped on scene_state:: + // environment.raw_slot in operator()() so the preprocessor can + // resolve our slot via isLive(). Written once in init() on the + // render thread, read every tick in operator()() on the execution + // thread (trivially-copyable POD, initialised to zero so pre-init + // reads look like an invalid ref). + ossia::gpu_slot_ref m_env_ref{}; +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/ExtractBuffer2.cpp b/src/plugins/score-plugin-threedim/Threedim/ExtractBuffer2.cpp new file mode 100644 index 0000000000..7046e3ebf2 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/ExtractBuffer2.cpp @@ -0,0 +1,407 @@ +#include "ExtractBuffer2.hpp" + +#include + +#include + +#include +#include + +namespace Threedim +{ +namespace +{ +// Tiny helper: parse `n` as a non-negative integer. Returns -1 on miss. +[[nodiscard]] int parseInt(std::string_view n) noexcept +{ + int v{}; + const auto* first = n.data(); + const auto* last = n.data() + n.size(); + auto [ptr, ec] = std::from_chars(first, last, v); + if(ec != std::errc{} || ptr != last || v < 0) + return -1; + return v; +} + +// Map a user-supplied name to a halp::attribute_semantic. Returns +// nullopt for unknown names (the caller then falls back to the +// custom-name lookup against geometry_attribute::name). +[[nodiscard]] std::optional +nameToSemantic(std::string_view n) noexcept +{ + using S = halp::attribute_semantic; + // FIXME add all the others + if(n == "position" || n == "pos") + return S::position; + if(n == "normal" || n == "norm") + return S::normal; + if(n == "tangent") + return S::tangent; + if(n == "bitangent") + return S::bitangent; + if(n == "uv" || n == "texcoord" || n == "texcoord0") + return S::texcoord0; + if(n == "texcoord1") + return S::texcoord1; + if(n == "texcoord2") + return S::texcoord2; + if(n == "texcoord3") + return S::texcoord3; + if(n == "color" || n == "color0") + return S::color0; + if(n == "color1") + return S::color1; + if(n == "color2") + return S::color2; + if(n == "color3") + return S::color3; + if(n == "joints" || n == "joints0") + return S::joints0; + if(n == "joints1") + return S::joints1; + if(n == "weights" || n == "weights0") + return S::weights0; + if(n == "weights1") + return S::weights1; + if(n == "velocity") + return S::velocity; + return std::nullopt; +} +} + +ExtractBuffer2::ExtractBuffer2() = default; + +std::optional ExtractBuffer2::resolveAttribute( + const halp::dynamic_gpu_geometry& mesh, std::string_view n) noexcept +{ + if(n.empty()) + return std::nullopt; + + // Numeric -> Nth attribute slot. + if(const int idx = parseInt(n); idx >= 0) + return findAttribute(mesh, idx); + + // Well-known semantic name. + if(const auto sem = nameToSemantic(n)) + return findAttribute(mesh, *sem); + + // Custom-name lookup against geometry_attribute::name. + for(int i = 0; i < (int)mesh.attributes.size(); ++i) + { + if(mesh.attributes[i].name == n) + return findAttribute(mesh, i); + } + + return std::nullopt; +} + +ExtractBuffer2::BufferRef ExtractBuffer2::resolveBuffer( + const halp::dynamic_gpu_geometry& mesh, std::string_view n) noexcept +{ + if(n.empty()) + return {}; + + // "index" -> the index buffer + if(n == "index") + { + if(mesh.index.buffer < 0 || mesh.index.buffer >= (int)mesh.buffers.size()) + return {}; + int64_t bytes = 0; + switch(mesh.index.format) + { + case halp::index_format::uint16: + bytes = (int64_t)mesh.vertices * 2; + break; + case halp::index_format::uint32: + bytes = (int64_t)mesh.vertices * 4; + break; + } + return { + .buffer_index = mesh.index.buffer, + .byte_offset = mesh.index.byte_offset, + .byte_size = bytes}; + } + + // Numeric -> Nth buffer in mesh.buffers[] + if(const int idx = parseInt(n); idx >= 0) + { + if(idx >= (int)mesh.buffers.size()) + return {}; + return { + .buffer_index = idx, + .byte_offset = 0, + .byte_size = mesh.buffers[idx].byte_size}; + } + + // Named auxiliary buffer (scene_lights, scene_materials, model_matrices, ...). + // ScenePreprocessor and other producers attach scene-level data here. Checked + // first because aux names are user-chosen and may shadow attribute names. + for(const auto& aux : mesh.auxiliary) + { + if(aux.name == n) + { + if(aux.buffer < 0 || aux.buffer >= (int)mesh.buffers.size()) + return {}; + const int64_t size + = aux.byte_size > 0 ? aux.byte_size : mesh.buffers[aux.buffer].byte_size; + return { + .buffer_index = aux.buffer, + .byte_offset = aux.byte_offset, + .byte_size = size}; + } + } + + // Otherwise: try to resolve as an attribute name and walk to the + // backing buffer. + if(const auto lk = resolveAttribute(mesh, n); lk && lk->input) + { + const int bidx = lk->input->buffer; + if(bidx >= 0 && bidx < (int)mesh.buffers.size()) + { + return { + .buffer_index = bidx, + .byte_offset = 0, + .byte_size = mesh.buffers[bidx].byte_size}; + } + } + + return {}; +} + +void ExtractBuffer2::initStrategy(score::gfx::RenderList& renderer) +{ + const auto& mesh = inputs.geometry.mesh; + if(mesh.vertices == 0) + { + m_strategy = std::monostate{}; + return; + } + + QRhi& rhi = *renderer.state.rhi; + + m_currentMode = inputs.mode.value; + m_currentName = inputs.name.value; + m_currentPadToVec4 = inputs.pad_to_vec4.value; + + if(inputs.mode.value == Attribute) + { + const auto lookup = resolveAttribute(mesh, m_currentName); + if(!lookup) + { + qWarning() << this << "ExtractBuffer2: attribute not found:" + << QString::fromStdString(m_currentName); + m_strategy = std::monostate{}; + return; + } + if(!lookup->buffer || !lookup->buffer->handle) + { + qWarning() << this << "ExtractBuffer2: source buffer is null"; + m_strategy = std::monostate{}; + return; + } + + const bool hasIndexBuffer = mesh.index.buffer >= 0; + const bool canDirectRef = lookup->canDirectReference() && !hasIndexBuffer; + + bool ok = false; + if(hasIndexBuffer) + { + auto& s = m_strategy.emplace(); + ok = s.init(renderer.state, rhi, mesh, *lookup, m_currentPadToVec4); + } + else if(canDirectRef) + { + auto& s = m_strategy.emplace(); + ok = s.init(renderer.state, rhi, mesh, *lookup, m_currentPadToVec4); + } + else + { + auto& s = m_strategy.emplace(); + ok = s.init(renderer.state, rhi, mesh, *lookup, m_currentPadToVec4); + } + if(!ok) + { + qWarning() << this << "ExtractBuffer2: strategy init failed"; + m_strategy = std::monostate{}; + } + } + else // Buffer + { + const auto ref = resolveBuffer(mesh, m_currentName); + if(ref.buffer_index < 0 || ref.byte_size <= 0) + { + qWarning() << this << "ExtractBuffer2: buffer not found:" + << QString::fromStdString(m_currentName); + m_strategy = std::monostate{}; + return; + } + auto& s = m_strategy.emplace(); + if(!s.init(renderer.state, rhi, mesh, ref.buffer_index, ref.byte_offset, ref.byte_size)) + { + qWarning() << this << "ExtractBuffer2: DirectBufferReferenceStrategy failed"; + m_strategy = std::monostate{}; + } + } +} + +void ExtractBuffer2::init( + score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) +{ + initStrategy(renderer); + updateOutput(); +} + +void ExtractBuffer2::update( + score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res, + score::gfx::Edge* /*e*/) +{ + const auto& mesh = inputs.geometry.mesh; + if(mesh.vertices == 0) + return; + + // Selector or pad change -> tear down and rebuild from scratch. The + // strategies are cheap to recreate (they own at most one compute + // pipeline) so this keeps the update path simple. + const bool modeChanged = (inputs.mode.value != m_currentMode); + const bool nameChanged = (inputs.name.value != m_currentName); + const bool padChanged = (inputs.pad_to_vec4.value != m_currentPadToVec4); + if(modeChanged || nameChanged || padChanged) + { + release(renderer); + initStrategy(renderer); + updateOutput(); + return; + } + + // Drain dirty flags so the upstream knows we picked them up. We + // always re-check the source buffer pointers below regardless. + bool any_dirty = inputs.geometry.dirty_mesh; + for(auto& buf : inputs.geometry.mesh.buffers) + { + any_dirty |= buf.dirty; + buf.dirty = false; + } + inputs.geometry.dirty_mesh = false; + + if(inputs.mode.value == Attribute) + { + const auto lookup = resolveAttribute(mesh, m_currentName); + if(!lookup) + return; + + // Strategy class may need to change if the upstream changed its + // index/binding layout (e.g. went from non-indexed to indexed). + const bool hasIndexBuffer = mesh.index.buffer >= 0; + const bool canDirectRef = lookup->canDirectReference() && !hasIndexBuffer; + + const bool needsIndexed = hasIndexBuffer; + const bool needsDirect = canDirectRef && !hasIndexBuffer; + const bool needsCompute = !canDirectRef && !hasIndexBuffer; + const bool isIndexed = std::holds_alternative(m_strategy); + const bool isDirect = std::holds_alternative(m_strategy); + const bool isCompute = std::holds_alternative(m_strategy); + + if((needsIndexed && !isIndexed) || (needsDirect && !isDirect) + || (needsCompute && !isCompute)) + { + release(renderer); + initStrategy(renderer); + updateOutput(); + return; + } + + QRhi& rhi = *renderer.state.rhi; + std::visit( + [&](auto& strategy) { + using T = std::decay_t; + if constexpr(!std::is_same_v) + strategy.update(rhi, mesh, *lookup, m_currentPadToVec4); + }, + m_strategy); + } + else // Buffer + { + auto* strat = std::get_if(&m_strategy); + if(!strat) + { + release(renderer); + initStrategy(renderer); + updateOutput(); + return; + } + + // Re-resolve and re-init in place: even if the user-visible name + // hasn't changed, the upstream may have rebuilt the QRhiBuffer* + // (resize, format change). DirectBufferReferenceStrategy is + // pointer-only state, so this is effectively just a re-fetch. + const auto ref = resolveBuffer(mesh, m_currentName); + if(ref.buffer_index < 0 || ref.byte_size <= 0) + { + release(renderer); + return; + } + QRhi& rhi = *renderer.state.rhi; + if(!strat->init( + renderer.state, rhi, mesh, ref.buffer_index, ref.byte_offset, + ref.byte_size)) + { + qWarning() << this << "ExtractBuffer2: re-init failed in update"; + release(renderer); + return; + } + } + + updateOutput(); +} + +void ExtractBuffer2::release(score::gfx::RenderList& /*renderer*/) +{ + std::visit( + [](auto& strategy) { + using T = std::decay_t; + if constexpr(!std::is_same_v) + strategy.release(); + }, + m_strategy); + m_strategy = std::monostate{}; +} + +void ExtractBuffer2::runInitialPasses( + score::gfx::RenderList& renderer, QRhiCommandBuffer& commands, + QRhiResourceUpdateBatch*& res, score::gfx::Edge& /*edge*/) +{ + QRhi& rhi = *renderer.state.rhi; + std::visit( + [&](auto& strategy) { + using T = std::decay_t; + if constexpr(!std::is_same_v) + { + if constexpr(T::needsCompute()) + strategy.runCompute(rhi, commands, res); + } + }, + m_strategy); +} + +void ExtractBuffer2::updateOutput() +{ + std::visit( + [this](const auto& strategy) { + using T = std::decay_t; + if constexpr(!std::is_same_v) + { + gpu_buffer_view out = strategy.output(); + outputs.buffer.buffer.handle = out.buffer; + outputs.buffer.buffer.byte_size = out.size; + outputs.buffer.buffer.byte_offset = out.offset; + } + else + { + outputs.buffer.buffer = {}; + } + }, + m_strategy); +} + +void ExtractBuffer2::operator()() { } +} diff --git a/src/plugins/score-plugin-threedim/Threedim/ExtractBuffer2.hpp b/src/plugins/score-plugin-threedim/Threedim/ExtractBuffer2.hpp new file mode 100644 index 0000000000..9eda1a5c62 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/ExtractBuffer2.hpp @@ -0,0 +1,134 @@ +#pragma once +#include + +#include +#include +#include +#include + +namespace Threedim +{ +// Name-based version of Threedim::ExtractBuffer. +// +// The original ExtractBuffer enumerates a fixed list of attribute slots +// (Position / TexCoord / Normal / ... / Buffer_0..Buffer_8) and selects +// one through a combobox. The Buffer_i path was wrong (the source buffer +// pointer was not refreshed in update(), and there was no way to refer +// to a buffer through anything other than its raw index in the mesh's +// buffer list, which is brittle whenever the upstream geometry rebuilds +// its buffer layout). +// +// This version takes: +// * a Mode enum -- Attribute or Buffer +// * a name -- a free-form string interpreted differently per mode +// +// Mode == Attribute: extract a single per-vertex attribute (one vec lane) +// * "position" / "normal" / "tangent" / "bitangent" / +// "texcoord" or "texcoord0".."texcoord7" / "uv" (alias for texcoord0) / +// "color" or "color0".."color3" : +// match against halp::attribute_semantic +// * "" : Nth entry in mesh.attributes[] +// * anything else: custom-name lookup in mesh.attributes[].name +// The output is one of the existing extraction strategies +// (Direct / Compute / Indexed) just like ExtractBuffer. +// +// Mode == Buffer: extract a whole raw buffer (all bytes) +// * "" : the Nth entry in mesh.buffers[] (the index path +// ExtractBuffer's combobox tried to expose) +// * "index" : the buffer mesh.index points at +// * a name matching one of `mesh.auxiliary[].name` (checked first, +// since user-chosen aux names may shadow attribute names): +// returns the auxiliary's backing buffer + its byte_offset / +// byte_size. This is how ScenePreprocessor's per-frame auxiliaries +// (camera, camera_prev, env, scene_lights, scene_materials, +// per_draws, indirect_draw_cmds, scene_counts, and every +// scene_data_ptr name) can be pulled out onto a standalone +// gpu_buffer outlet for downstream consumers that don't want to +// auto-bind via try_bind_from_geometry. +// * anything else: look up an attribute by semantic / custom name +// and return the buffer it lives in (via attribute -> input -> +// buffer). +// +// On every update() the source buffer handle is re-fetched from the +// mesh, so an upstream that rebuilds its QRhiBuffer (resize / new +// allocation) is reflected on the next frame instead of leaving us +// holding a stale pointer. +class ExtractBuffer2 +{ +public: + halp_meta(name, "Extract buffer (by name)") + halp_meta(category, "Visuals/Utilities") + halp_meta(c_name, "extract_buffer_by_name") + halp_meta( + manual_url, "https://ossia.io/score-docs/processes/extract-buffer.html") + halp_meta(uuid, "3c9d6c2b-1f04-4f7d-9bc2-a4b1d7c8e5f0") + + enum Mode + { + Attribute, + Buffer + }; + + struct ins + { + struct + { + halp_meta(name, "Geometry"); + halp::dynamic_gpu_geometry mesh; + float transform[16]{}; + bool dirty_mesh = false; + bool dirty_transform = false; + } geometry; + + halp::combobox_t<"Mode", Mode> mode; + struct : halp::lineedit<"Name / index", "position"> + { + halp_meta(symbol, "name") + } name; + halp::toggle<"Pad vec3 to vec4"> pad_to_vec4; + } inputs; + + struct + { + halp::gpu_buffer_output<"Buffer"> buffer; + } outputs; + + ExtractBuffer2(); + + void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res); + void update( + score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res, + score::gfx::Edge* e); + void release(score::gfx::RenderList& r); + void runInitialPasses( + score::gfx::RenderList& renderer, QRhiCommandBuffer& commands, + QRhiResourceUpdateBatch*& res, score::gfx::Edge& edge); + void operator()(); + +private: + // Resolve the user's name string to an attribute_lookup, taking the + // active mesh into account. Returns nullopt on miss. + [[nodiscard]] static std::optional + resolveAttribute(const halp::dynamic_gpu_geometry& mesh, std::string_view n) noexcept; + + // Resolve the user's name string to a (buffer index, byte_offset, byte_size) + // triple suitable for DirectBufferReferenceStrategy. Returns -1 on miss. + struct BufferRef + { + int buffer_index{-1}; + int64_t byte_offset{}; + int64_t byte_size{}; + }; + [[nodiscard]] static BufferRef + resolveBuffer(const halp::dynamic_gpu_geometry& mesh, std::string_view n) noexcept; + + // (Re)initialise m_strategy based on the current inputs and mesh. + void initStrategy(score::gfx::RenderList& renderer); + void updateOutput(); + + ExtractionStrategyVariant m_strategy; + Mode m_currentMode{Attribute}; + std::string m_currentName{}; + bool m_currentPadToVec4{false}; +}; +} diff --git a/src/plugins/score-plugin-threedim/Threedim/ExtractSceneBuffer.cpp b/src/plugins/score-plugin-threedim/Threedim/ExtractSceneBuffer.cpp new file mode 100644 index 0000000000..2aeea8b4f5 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/ExtractSceneBuffer.cpp @@ -0,0 +1,124 @@ +#include "ExtractSceneBuffer.hpp" + +#include + +namespace Threedim +{ + +namespace +{ +// Resolve the slot ref from the scene + kind + index selectors. Returns +// an all-zero (invalid) ref on miss; the caller's isLive() call will +// reject it without a separate null check. +inline ossia::gpu_slot_ref +pickSlotRef(const ossia::scene_state& state, ExtractSceneBuffer::Kind kind, + int index) noexcept +{ + switch(kind) + { + case ExtractSceneBuffer::Environment: + return state.environment.raw_slot; + + case ExtractSceneBuffer::Camera: + { + if(!state.cameras) + return {}; + const auto& cams = *state.cameras; + if(index < 0 || std::size_t(index) >= cams.size()) + return {}; + if(!cams[index]) + return {}; + return cams[index]->raw_slot; + } + + case ExtractSceneBuffer::Material: + { + if(!state.materials) + return {}; + const auto& mats = *state.materials; + if(index < 0 || std::size_t(index) >= mats.size()) + return {}; + if(!mats[index]) + return {}; + return mats[index]->raw_slot; + } + } + return {}; +} + +inline score::gfx::GpuResourceRegistry::Arena arenaOf(uint32_t tag) noexcept +{ + return static_cast(tag); +} +} + +void ExtractSceneBuffer::operator()() +{ + // Execution thread — no GPU work here. The port's scene_spec is what + // update() reads. Drain the dirty flag so upstream knows the tick + // was observed. + inputs.scene_in.dirty = 0; +} + +void ExtractSceneBuffer::init( + score::gfx::RenderList&, QRhiResourceUpdateBatch&) +{ + // Nothing to allocate — the node only reads through the registry. + outputs.buffer.buffer = {}; +} + +void ExtractSceneBuffer::update( + score::gfx::RenderList& renderer, QRhiResourceUpdateBatch&, + score::gfx::Edge*) +{ + // No scene → clear outlet. Downstream consumers see buffer.handle == + // nullptr and fall back to whatever default they define. + if(!inputs.scene_in.scene.state) + { + outputs.buffer.buffer = {}; + return; + } + + const auto ref = pickSlotRef( + *inputs.scene_in.scene.state, + Kind(inputs.kind.value), inputs.index.value); + + // Liveness is the one authoritative check: catches stale refs + // (producer released), default-constructed refs (no slot stamped), + // refs from a different registry (different RenderList), and + // mismatched-arena refs in one compare. + if(!renderer.registry().isLive(ref)) + { + outputs.buffer.buffer = {}; + return; + } + + QRhiBuffer* buf = renderer.registry().buffer(arenaOf(ref.arena)); + if(!buf) + { + outputs.buffer.buffer = {}; + return; + } + + const void* prev_handle = outputs.buffer.buffer.handle; + const int64_t prev_offset = outputs.buffer.buffer.byte_offset; + const int64_t prev_size = outputs.buffer.buffer.byte_size; + + outputs.buffer.buffer.handle = buf; + outputs.buffer.buffer.byte_offset = (int64_t)ref.offset; + outputs.buffer.buffer.byte_size = (int64_t)ref.size; + // Flip `changed` only when something downstream-observable actually + // moved — most frames the slot is stable and we want downstream + // rebinds to short-circuit on identity. + outputs.buffer.buffer.changed + = (prev_handle != buf) + || (prev_offset != (int64_t)ref.offset) + || (prev_size != (int64_t)ref.size); +} + +void ExtractSceneBuffer::release(score::gfx::RenderList&) +{ + outputs.buffer.buffer = {}; +} + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/ExtractSceneBuffer.hpp b/src/plugins/score-plugin-threedim/Threedim/ExtractSceneBuffer.hpp new file mode 100644 index 0000000000..86ba08eb13 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/ExtractSceneBuffer.hpp @@ -0,0 +1,116 @@ +#pragma once +#include +#include +#include + +#include + +#include + +#include + +class QRhiResourceUpdateBatch; + +namespace score::gfx +{ +class RenderList; +struct Edge; +} + +namespace Threedim +{ + +// Scene-level buffer extractor. Takes a scene_spec in, picks one of the +// GPU arena slots stamped on the scene's components, and republishes +// the backing `{QRhiBuffer*, byte_offset, byte_size}` triple on a +// halp::gpu_buffer outlet. +// +// Unlike Threedim::ExtractBuffer2 — which extracts from a flattened +// geometry's aux list downstream of a ScenePreprocessor — this node +// works directly on a raw scene_spec. Useful when: +// +// - A custom compute shader wants to consume a producer's Raw arena +// slot without paying the cost of a preprocessor flatten. +// - The downstream pipeline has no preprocessor (e.g. a pure +// data-probing tool inspecting the environment's bytes). +// +// Source resolution uses the `raw_slot` field on each component type: +// +// - Environment: scene.state->environment.raw_slot +// - Camera(N): (*scene.state->cameras)[N]->raw_slot +// - Material(N): (*scene.state->materials)[N]->raw_slot +// +// The registry's isLive() check guards every read. Stale refs (producer +// released, mismatched generation) clear the outlet rather than handing +// a dangling QRhiBuffer* downstream. +// +// Lights aren't exposed here because the light tree isn't a flat +// scene_state.lights vector (lights live as scene_payload children). +// Walking the tree to find the Nth light by preorder index is a +// reasonable future addition if the use case shows up; for now, +// extract light data downstream of a ScenePreprocessor via +// ExtractBuffer2(name="scene_lights"). +class ExtractSceneBuffer +{ +public: + halp_meta(name, "Extract Scene Buffer") + halp_meta(category, "Visuals/3D/Scene") + halp_meta(c_name, "extract_scene_buffer") + halp_meta(authors, "ossia team") + halp_meta( + manual_url, + "https://ossia.io/score-docs/processes/extract-scene-buffer.html") + halp_meta(uuid, "5f2b8e1c-4a7d-4e9b-b0f1-3c6e8d2a5b74") + + enum Kind + { + Environment, + Camera, + Material + }; + + struct ins + { + struct + { + halp_meta(name, "Scene In"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_in; + + struct : halp::combobox_t<"Kind", Kind> + { + struct range + { + std::string_view values[3]{"Environment", "Camera", "Material"}; + int init{0}; + }; + } kind; + + // Index inside scene.state->cameras / ->materials. Ignored when + // Kind == Environment (the environment is a singleton on scene_state). + halp::spinbox_i32<"Index", halp::irange{0, 1024, 0}> index; + } inputs; + + struct outs + { + halp::gpu_buffer_output<"Buffer"> buffer; + } outputs; + + // Execution-thread tick. No heavy work here — just snapshot the + // current scene ref + control values. Slot resolution needs the + // registry (render thread) so it happens in update(). + void operator()(); + + // Render-thread hooks. update() resolves the slot ref against the + // renderer's GpuResourceRegistry, validates via isLive(), and + // publishes the buffer handle + offset + size on the outlet. init() + // and release() are no-ops for now — the node owns no GPU state. + void init(score::gfx::RenderList& r, QRhiResourceUpdateBatch& res); + void update( + score::gfx::RenderList& r, QRhiResourceUpdateBatch& res, + score::gfx::Edge* e); + void release(score::gfx::RenderList& r); +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/ExtractTexture.cpp b/src/plugins/score-plugin-threedim/Threedim/ExtractTexture.cpp new file mode 100644 index 0000000000..d7b288c977 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/ExtractTexture.cpp @@ -0,0 +1,173 @@ +#include "ExtractTexture.hpp" + +#include + +#include + +namespace Threedim +{ + +void ExtractTexture::init( + score::gfx::RenderList& /*renderer*/, QRhiResourceUpdateBatch& /*res*/) +{ +} + +void ExtractTexture::update( + score::gfx::RenderList& /*renderer*/, QRhiResourceUpdateBatch& /*res*/, + score::gfx::Edge* /*e*/) +{ + const auto& mesh = inputs.geometry.mesh; + const auto& target_name = inputs.name.value; + + // Resolve by name. aux names are producer-chosen (ScenePreprocessor + // uses "skybox", "irradiance_map", "camera", "base_color_array", + // …); if the target is missing we hand out a null handle so the + // downstream binding drops to its empty-placeholder. + void* resolved = nullptr; + void* resolved_sampler = nullptr; + for(const auto& aux : mesh.auxiliary_textures) + { + if(aux.name == target_name) + { + resolved = aux.handle; + resolved_sampler = aux.sampler_handle; + break; + } + } + + // Short-circuit identical-state updates. Texture metadata re-emission + // trips downstream SRB rebuilds, so we only publish when the handle + // pointer or the target name actually changed. + if(resolved == m_lastHandle && target_name == m_lastName) + return; + m_lastHandle = resolved; + m_lastName = target_name; + + outputs.texture.texture.handle = resolved; + // Forward the producer-side sampler if any. ScenePreprocessor's per- + // bucket sampler split (per-glTF wrap/filter mode) ships a sampler + // alongside each material texture array — passing it through here + // lets downstream sampler-config-sensitive nodes (anisotropy, custom + // wrap mode) honour it. Null = downstream falls back to its own. + outputs.texture.texture.sampler_handle = resolved_sampler; + + if(!resolved) + { + outputs.texture.texture.width = 0; + outputs.texture.texture.height = 0; + outputs.texture.texture.layers_or_depth = 1; + outputs.texture.texture.kind = halp::texture_kind::texture_2d; + return; + } + + // Detect the texture shape from the live QRhiTexture's flags + + // dimensions. Order matters: CubeMap and ThreeDimensional are + // mutually exclusive by construction, but check CubeMap first as + // some backends may happen to set both bits on edge-case allocations. + auto* tex = static_cast(resolved); + const auto flags = tex->flags(); + const QSize px = tex->pixelSize(); + + outputs.texture.texture.width = px.width(); + outputs.texture.texture.height = px.height(); + + if(flags.testFlag(QRhiTexture::CubeMap)) + { + outputs.texture.texture.kind = halp::texture_kind::cubemap; + outputs.texture.texture.layers_or_depth = 6; + } + else if(flags.testFlag(QRhiTexture::ThreeDimensional)) + { + outputs.texture.texture.kind = halp::texture_kind::texture_3d; + // QRhiTexture::depth() is 0 for non-3D textures, set on allocation + // for 3D. Default to 1 when the backend returns 0 on a 3D texture + // that hasn't been filled yet — avoids an illegal 0-depth probe + // binding downstream. + outputs.texture.texture.layers_or_depth = std::max(1, tex->depth()); + } + else if(flags.testFlag(QRhiTexture::TextureArray)) + { + outputs.texture.texture.kind = halp::texture_kind::texture_array; + outputs.texture.texture.layers_or_depth = std::max(1, tex->arraySize()); + } + else + { + outputs.texture.texture.kind = halp::texture_kind::texture_2d; + outputs.texture.texture.layers_or_depth = 1; + } + + // Format reporting — halp's gpu_texture format taxonomy now mirrors + // QRhi's color + integer set, so downstream nodes that branch on + // format (HDR-ness, integer-vs-float for atomic-image consumers, + // sRGB inference) get a faithful answer instead of the previous + // "everything not in the float subset → RGBA8" silent miscast. + // + // QRhi version availability: + // - RGBA8 / BGRA8 / R8 / RG8 / R16 / RG16 / float family / depth → + // present since QRhi went public-ish (Qt 6.2 private API). + // - RGB10A2 added in Qt 6.4. + // - Integer family (R8UI / R32UI / RG32UI / RGBA32UI / *SI variants) + // added in Qt 6.10. Guard so older Qt builds compile. + switch(tex->format()) + { + // 8-bit unorm — Qt 6.2+ + case QRhiTexture::RGBA8: outputs.texture.texture.format = halp::gpu_texture::RGBA8; break; + case QRhiTexture::BGRA8: outputs.texture.texture.format = halp::gpu_texture::BGRA8; break; + case QRhiTexture::R8: outputs.texture.texture.format = halp::gpu_texture::R8; break; + case QRhiTexture::RG8: outputs.texture.texture.format = halp::gpu_texture::RG8; break; + + // 16-bit unorm — Qt 6.2+ + case QRhiTexture::R16: outputs.texture.texture.format = halp::gpu_texture::R16; break; + case QRhiTexture::RG16: outputs.texture.texture.format = halp::gpu_texture::RG16; break; + + // float — Qt 6.2+ + case QRhiTexture::RGBA16F: outputs.texture.texture.format = halp::gpu_texture::RGBA16F; break; + case QRhiTexture::RGBA32F: outputs.texture.texture.format = halp::gpu_texture::RGBA32F; break; + case QRhiTexture::R16F: outputs.texture.texture.format = halp::gpu_texture::R16F; break; + case QRhiTexture::R32F: outputs.texture.texture.format = halp::gpu_texture::R32F; break; + +#if QT_VERSION >= QT_VERSION_CHECK(6, 4, 0) + // 10/10/10/2 packed — Qt 6.4+ + case QRhiTexture::RGB10A2: outputs.texture.texture.format = halp::gpu_texture::RGB10A2; break; +#endif + +#if QT_VERSION >= QT_VERSION_CHECK(6, 10, 0) + // Unsigned integer — Qt 6.10+. REQUIRED to be reported as such for + // atomic-image consumers (voxelizer occupancy grids, histogram + // targets, …). A miscast here would tell downstream "this is RGBA8, + // sample as float" and break uimage / usampler bindings on Vulkan + // validation. + case QRhiTexture::R8UI: outputs.texture.texture.format = halp::gpu_texture::R8UI; break; + case QRhiTexture::R32UI: outputs.texture.texture.format = halp::gpu_texture::R32UI; break; + case QRhiTexture::RG32UI: outputs.texture.texture.format = halp::gpu_texture::RG32UI; break; + case QRhiTexture::RGBA32UI: outputs.texture.texture.format = halp::gpu_texture::RGBA32UI; break; + + // Signed integer — Qt 6.10+ + case QRhiTexture::R8SI: outputs.texture.texture.format = halp::gpu_texture::R8SI; break; + case QRhiTexture::R32SI: outputs.texture.texture.format = halp::gpu_texture::R32SI; break; + case QRhiTexture::RG32SI: outputs.texture.texture.format = halp::gpu_texture::RG32SI; break; + case QRhiTexture::RGBA32SI: outputs.texture.texture.format = halp::gpu_texture::RGBA32SI; break; +#endif + + default: + // Depth, compressed, or anything halp's enum doesn't cover — + // safest fallback is RGBA8 so the downstream sampler binding + // doesn't trip a type-mismatch validation error. Downstream + // explicit consumers should branch on `kind` first. + outputs.texture.texture.format = halp::gpu_texture::RGBA8; + break; + } +} + +void ExtractTexture::release(score::gfx::RenderList& /*r*/) +{ + m_lastHandle = nullptr; + m_lastName.clear(); + outputs.texture.texture.handle = nullptr; + outputs.texture.texture.width = 0; + outputs.texture.texture.height = 0; + outputs.texture.texture.layers_or_depth = 1; + outputs.texture.texture.kind = halp::texture_kind::texture_2d; +} + +} // namespace Threedim diff --git a/src/plugins/score-plugin-threedim/Threedim/ExtractTexture.hpp b/src/plugins/score-plugin-threedim/Threedim/ExtractTexture.hpp new file mode 100644 index 0000000000..3373fd8cf1 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/ExtractTexture.hpp @@ -0,0 +1,79 @@ +#pragma once +#include +#include +#include +#include + +#include + +namespace Threedim +{ + +// Sibling to ExtractBuffer2 (name-based buffer extractor) but for +// texture auxiliaries. Reads `inputs.geometry.mesh.auxiliary_textures` +// (populated by the halp/ossia bridge from `ossia::geometry:: +// auxiliary_textures` — which ScenePreprocessor fills with skybox, +// irradiance_map, prefiltered_map, brdf_lut, shadow_map_array, +// base_color_array, metal_rough_array, normal_array, emissive_array, +// *_Dyn0..N, and any producer-injected texture) and re-publishes the +// named entry on a standalone gpu_texture_output. +// +// Runtime-detects the texture shape (2D / TextureArray / Cubemap / +// 3D) from QRhiTexture::flags() and stamps it into the output port's +// `kind` field so downstream nodes / shader bindings know how to bind +// (sampler2D / sampler2DArray / samplerCube / sampler3D). Width, +// height, and layer-or-depth count come along from pixelSize() / +// arraySize() / depth(). +// +// Primary use case: post-processing shaders that depend on scene +// aux textures without going through the scene cable themselves. E.g. +// the shaderlib/depth set wants `camera` + `camera_prev` UBOs +// (extract via ExtractBuffer2) and sometimes a depth-texture aux +// (this node). +class ExtractTexture +{ +public: + halp_meta(name, "Extract texture (by name)") + halp_meta(category, "Visuals/Utilities") + halp_meta(c_name, "extract_texture_by_name") + halp_meta(authors, "ossia team") + halp_meta( + manual_url, "https://ossia.io/score-docs/processes/extract-texture.html") + halp_meta(uuid, "4d8f2a6b-7c19-4e05-a3d8-1b6f5e9c2a48") + + struct ins + { + struct + { + halp_meta(name, "Geometry"); + halp::dynamic_gpu_geometry mesh; + float transform[16]{}; + bool dirty_mesh = false; + bool dirty_transform = false; + } geometry; + + struct : halp::lineedit<"Name", "skybox"> + { + halp_meta(symbol, "name") + } name; + } inputs; + + struct + { + halp::gpu_texture_output<"Texture"> texture; + } outputs; + + void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res); + void update( + score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res, + score::gfx::Edge* e); + void release(score::gfx::RenderList& r); + void operator()() { } + +private: + // Last-known resolved values — used to skip work when nothing changed. + void* m_lastHandle{}; + std::string m_lastName; +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/FbxParser.cpp b/src/plugins/score-plugin-threedim/Threedim/FbxParser.cpp new file mode 100644 index 0000000000..10a0f2885c --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/FbxParser.cpp @@ -0,0 +1,1073 @@ +#include "FbxParser.hpp" + +#include "TangentUtils.hpp" + +#include + +#include + +#include + +#include +#include +#include + +namespace Threedim +{ + +// Transform a position by a ufbx 3x4 matrix (double -> float) +static void transform_point( + const ufbx_matrix& m, const ufbx_vec3& v, float& ox, float& oy, float& oz) +{ + ox = float(m.m00 * v.x + m.m01 * v.y + m.m02 * v.z + m.m03); + oy = float(m.m10 * v.x + m.m11 * v.y + m.m12 * v.z + m.m13); + oz = float(m.m20 * v.x + m.m21 * v.y + m.m22 * v.z + m.m23); +} + +// Transform a direction by a ufbx 3x4 matrix (no translation), then normalize +static void transform_normal( + const ufbx_matrix& m, const ufbx_vec3& v, float& ox, float& oy, float& oz) +{ + float rx = float(m.m00 * v.x + m.m01 * v.y + m.m02 * v.z); + float ry = float(m.m10 * v.x + m.m11 * v.y + m.m12 * v.z); + float rz = float(m.m20 * v.x + m.m21 * v.y + m.m22 * v.z); + float len = std::sqrt(rx * rx + ry * ry + rz * rz); + if(len > 1e-8f) + { + float inv = 1.0f / len; + ox = rx * inv; oy = ry * inv; oz = rz * inv; + } + else + { + ox = 0.0f; oy = 1.0f; oz = 0.0f; + } +} + +// ============================================================================= +// Scene extractor — builds FbxParser::m_scene_nodes (hierarchical) using the +// node's local_transform (NOT pre-transformed to world). Vertex data lives in +// per-attribute shared buffers owned by ScenePart. +// ============================================================================= +struct FbxSceneExtractor +{ + std::vector& nodes; + std::vector>& materials; + std::shared_ptr& skeleton; + std::unordered_map material_index; + // bone_node → joint index within the global skeleton. + std::unordered_map joint_index_of; + std::vector tri_indices; + + // Return the joint index for a ufbx bone node, registering a new entry in + // the global skeleton on first sight. Parent chain is resolved later in a + // second pass (link_joint_parents). + int register_joint(const ufbx_node* bone) + { + if(!bone) + return -1; + auto it = joint_index_of.find(bone); + if(it != joint_index_of.end()) + return it->second; + + if(!skeleton) + skeleton = std::make_shared(); + + ossia::skeleton_joint j; + j.name = std::string(bone->name.data, bone->name.length); + + // Local TRS from the bone node itself. + const auto& lt = bone->local_transform; + j.translation[0] = float(lt.translation.x); + j.translation[1] = float(lt.translation.y); + j.translation[2] = float(lt.translation.z); + j.rotation[0] = float(lt.rotation.x); + j.rotation[1] = float(lt.rotation.y); + j.rotation[2] = float(lt.rotation.z); + j.rotation[3] = float(lt.rotation.w); + j.scale[0] = float(lt.scale.x); + j.scale[1] = float(lt.scale.y); + j.scale[2] = float(lt.scale.z); + + // Parent linked later. Identity IBM as placeholder; cluster fills it in. + j.parent_index = -1; + for(int k = 0; k < 16; ++k) + j.inverse_bind_matrix[k] = (k % 5 == 0) ? 1.f : 0.f; + + const int idx = (int)skeleton->joints.size(); + skeleton->joints.push_back(j); + joint_index_of.emplace(bone, idx); + return idx; + } + + // After all bones are registered, fill in parent_index for each joint by + // walking the ufbx parent chain until we find another registered bone. + void link_joint_parents() + { + if(!skeleton) + return; + for(auto& [node, idx] : joint_index_of) + { + const ufbx_node* p = node->parent; + while(p) + { + auto it = joint_index_of.find(p); + if(it != joint_index_of.end()) + { + skeleton->joints[idx].parent_index = it->second; + break; + } + p = p->parent; + } + } + } + + // Convert a ufbx_material to a material_component (factors only — Stage 1b). + // Returns the index in `materials`, registering it on first sight. + int register_material(const ufbx_material* m) + { + if(!m) + return -1; + auto it = material_index.find(m); + if(it != material_index.end()) + return it->second; + + auto mc = std::make_shared(); + mc->tag = std::string(m->name.data, m->name.length); + + // ufbx exposes both classical (Phong/Lambert) and PBR maps. Prefer PBR + // values when present; fall back to FBX classical fields otherwise. + const auto& pbr = m->pbr; + const auto& fbx = m->fbx; + + // Base color + if(pbr.base_color.has_value) + { + mc->base_color_factor[0] = float(pbr.base_color.value_vec4.x); + mc->base_color_factor[1] = float(pbr.base_color.value_vec4.y); + mc->base_color_factor[2] = float(pbr.base_color.value_vec4.z); + mc->base_color_factor[3] = float(pbr.base_color.value_vec4.w); + } + else if(fbx.diffuse_color.has_value) + { + mc->base_color_factor[0] = float(fbx.diffuse_color.value_vec3.x); + mc->base_color_factor[1] = float(fbx.diffuse_color.value_vec3.y); + mc->base_color_factor[2] = float(fbx.diffuse_color.value_vec3.z); + mc->base_color_factor[3] = 1.0f; + } + + // Apply scalar diffuse factor as multiplier on RGB if present. + if(pbr.base_factor.has_value) + { + const float k = float(pbr.base_factor.value_real); + mc->base_color_factor[0] *= k; + mc->base_color_factor[1] *= k; + mc->base_color_factor[2] *= k; + } + + // Metallic / Roughness + mc->metallic_factor + = pbr.metalness.has_value ? float(pbr.metalness.value_real) : 0.0f; + mc->roughness_factor + = pbr.roughness.has_value ? float(pbr.roughness.value_real) : 0.5f; + + // Emissive + if(pbr.emission_color.has_value) + { + mc->emissive_factor[0] = float(pbr.emission_color.value_vec3.x); + mc->emissive_factor[1] = float(pbr.emission_color.value_vec3.y); + mc->emissive_factor[2] = float(pbr.emission_color.value_vec3.z); + } + else if(fbx.emission_color.has_value) + { + mc->emissive_factor[0] = float(fbx.emission_color.value_vec3.x); + mc->emissive_factor[1] = float(fbx.emission_color.value_vec3.y); + mc->emissive_factor[2] = float(fbx.emission_color.value_vec3.z); + } + mc->emissive_strength = pbr.emission_factor.has_value + ? float(pbr.emission_factor.value_real) : 1.0f; + + // Alpha / opacity + if(pbr.opacity.has_value) + { + const float op = float(pbr.opacity.value_real); + mc->base_color_factor[3] *= op; + if(op < 0.999f) + mc->alpha = ossia::alpha_mode::blend; + } + + // Material features. Two-sided shading from FBX is uncommon; default false. + mc->double_sided = false; + mc->unlit = false; + + // Texture extraction. ufbx_material_map.texture (when non-null) carries + // either an absolute filename, a relative one (resolved against the FBX + // file dir), or an embedded blob (`content`). We populate texture_ref + // with `source` so the renderer's TextureCache can lazily upload on the + // render thread. The `source` member is never null when a texture is + // present, even if the file/blob is later unreadable. + auto fill_texture + = [](ossia::texture_ref& tr, const ufbx_material_map& map) { + if(!map.texture) + return; + const ufbx_texture* tex = map.texture; + auto src = std::make_shared(); + // Prefer absolute filename when present (more robust); fall back + // to relative + the original "filename" field. + if(tex->absolute_filename.length > 0) + src->file_path = std::string( + tex->absolute_filename.data, tex->absolute_filename.length); + else if(tex->filename.length > 0) + src->file_path = std::string(tex->filename.data, tex->filename.length); + else if(tex->relative_filename.length > 0) + src->file_path = std::string( + tex->relative_filename.data, tex->relative_filename.length); + + if(tex->content.size > 0) + { + auto blob = std::make_shared>( + reinterpret_cast(tex->content.data), + reinterpret_cast(tex->content.data) + tex->content.size); + src->embedded_data = blob; + // ufbx exposes the file extension via the texture name path — + // best-effort sniff for a MIME hint. The TextureLoader uses + // QImage::loadFromData with this hint and falls back to header + // sniffing when empty/wrong. + auto ext_hint = [&](std::string_view path) -> std::string { + auto dot = path.rfind('.'); + if(dot == std::string_view::npos) + return {}; + std::string e(path.substr(dot + 1)); + for(auto& c : e) c = (char)std::tolower((unsigned char)c); + if(e == "jpg" || e == "jpeg") return "image/jpeg"; + if(e == "png") return "image/png"; + if(e == "tga") return "image/tga"; + if(e == "tif" || e == "tiff") return "image/tiff"; + if(e == "bmp") return "image/bmp"; + return {}; + }; + src->mime_type = ext_hint(src->file_path); + } + + // Plan 09 S1: stamp the content hash so the preprocessor's + // decode cache (Gfx::AssetTable) can skip re-decoding the + // same image across multiple outputs / scene reloads. + // Prefer embedded bytes (authoritative) over path (stable + // fallback when the file is an external reference). + if(src->embedded_data && !src->embedded_data->empty()) + { + src->content_hash = ossia::hash_bytes( + src->embedded_data->data(), + src->embedded_data->size()); + } + else if(!src->file_path.empty()) + { + src->content_hash = ossia::hash_bytes( + src->file_path.data(), src->file_path.size()); + } + + tr.source = std::move(src); + tr.texcoord_set = 0; + }; + + fill_texture(mc->base_color_texture, + pbr.base_color.texture ? pbr.base_color : fbx.diffuse_color); + fill_texture(mc->metallic_roughness_texture, pbr.metalness); + fill_texture(mc->normal_texture, + pbr.normal_map.texture ? pbr.normal_map : fbx.normal_map); + fill_texture(mc->occlusion_texture, pbr.ambient_occlusion); + fill_texture(mc->emissive_texture, + pbr.emission_color.texture ? pbr.emission_color : fbx.emission_color); + + // --- OpenPBR / Arnold StandardSurface extensions -------------------- + // ufbx exposes the full Arnold-family PBR parameter set (coat / sheen + // / transmission / subsurface / thin-film / anisotropic specular) on + // ufbx_material_pbr_maps — the same fields OpenPBR aggregates under + // its coat / fuzz / transmission / subsurface / thin-film lobes. The + // FBX PBR extension (Autodesk Standard Surface) is the predecessor of + // OpenPBR, so the mapping is 1:1 name-wise. + // + // Each `ufbx_material_map.has_value` tells us whether the DCC + // actually wrote that channel; if not we leave the material_component + // field at its spec default. + + auto scalar = [](const ufbx_material_map& map, float fallback) -> float { + return map.has_value ? float(map.value_real) : fallback; + }; + auto color3 = [](const ufbx_material_map& map, float (&out)[3], + float fx, float fy, float fz) { + if(map.has_value) + { + out[0] = float(map.value_vec3.x); + out[1] = float(map.value_vec3.y); + out[2] = float(map.value_vec3.z); + } + else + { + out[0] = fx; out[1] = fy; out[2] = fz; + } + }; + + // Coat (KHR_materials_clearcoat equivalent). + mc->clearcoat.factor = scalar(pbr.coat_factor, 0.0f); + mc->clearcoat.roughness_factor = scalar(pbr.coat_roughness, 0.0f); + fill_texture(mc->clearcoat.texture, pbr.coat_factor); + fill_texture(mc->clearcoat.roughness_texture, pbr.coat_roughness); + fill_texture(mc->clearcoat.normal_texture, pbr.coat_normal); + + // Sheen (fuzz in OpenPBR; KHR_materials_sheen). + mc->sheen.roughness_factor = scalar(pbr.sheen_roughness, 0.0f); + color3(pbr.sheen_color, mc->sheen.color_factor, 0.f, 0.f, 0.f); + fill_texture(mc->sheen.color_texture, pbr.sheen_color); + fill_texture(mc->sheen.roughness_texture, pbr.sheen_roughness); + + // Transmission (KHR_materials_transmission). The FBX path tracks + // thick-walled volume via transmission_depth / scatter / dispersion + // which we don't carry yet on material_component (see usd-openpbr + // analysis — volume-depth / scatter / dispersion are listed as the + // missing fields for full OpenPBR coverage). + mc->transmission.factor = scalar(pbr.transmission_factor, 0.0f); + fill_texture(mc->transmission.texture, pbr.transmission_factor); + + // Volume (KHR_materials_volume) — attenuation color ≈ transmission_color. + // ufbx has no direct thicknessFactor; infer from transmission_depth. + mc->volume.thickness_factor = scalar(pbr.transmission_depth, 0.0f); + color3( + pbr.transmission_color, mc->volume.attenuation_color, 1.f, 1.f, 1.f); + + // Specular (KHR_materials_specular) — Arnold specular_factor + + // specular_color; anisotropy separately. + mc->specular.factor = scalar(pbr.specular_factor, 1.0f); + color3(pbr.specular_color, mc->specular.color_factor, 1.f, 1.f, 1.f); + fill_texture(mc->specular.texture, pbr.specular_factor); + fill_texture(mc->specular.color_texture, pbr.specular_color); + + // IOR (KHR_materials_ior). Falls back to the spec default 1.5 when + // the FBX didn't write one. + mc->ior = scalar(pbr.specular_ior, 1.5f); + + // Anisotropy (KHR_materials_anisotropy). ufbx splits anisotropy + // magnitude (specular_anisotropy) and rotation (specular_rotation). + mc->anisotropy.strength = scalar(pbr.specular_anisotropy, 0.0f); + mc->anisotropy.rotation = scalar(pbr.specular_rotation, 0.0f); + fill_texture(mc->anisotropy.texture, pbr.specular_anisotropy); + + // Iridescence (KHR_materials_iridescence). ufbx's thin_film_* + // covers the same physics; min == max when ufbx provides only a + // single thickness value. + mc->iridescence.factor = scalar(pbr.thin_film_factor, 0.0f); + const float tf_thickness = scalar(pbr.thin_film_thickness, 400.0f); + mc->iridescence.thickness_min = tf_thickness; + mc->iridescence.thickness_max = tf_thickness; + mc->iridescence.ior = scalar(pbr.thin_film_ior, 1.3f); + fill_texture(mc->iridescence.texture, pbr.thin_film_factor); + + // Subsurface as diffuse_transmission approximation. OpenPBR-style + // subsurface fields (weight / color / radius) aren't on our + // material_component yet, but we map the scalar factor + + // subsurface_color into diffuse_transmission as the closest + // available representation so the glTF-side KHR_materials_diffuse_ + // transmission and FBX-side subsurface_factor land in the same slot. + mc->diffuse_transmission.factor = scalar(pbr.subsurface_factor, 0.0f); + color3( + pbr.subsurface_color, mc->diffuse_transmission.color_factor, + 1.f, 1.f, 1.f); + fill_texture(mc->diffuse_transmission.texture, pbr.subsurface_factor); + fill_texture(mc->diffuse_transmission.color_texture, pbr.subsurface_color); + + // Thin-walled flag — Arnold exposes this as a material feature on + // the FBX side; mirror it to material_component for consumer + // shaders that want to switch back-side transmission on / off. + if(m->features.thin_walled.enabled) + { + // No dedicated `thin_walled` bool on material_component today; + // surface it via the generic property map so downstream shaders + // can opt-in. Key kept stable to match OpenPBR_ResolvedInputs + // field name. + mc->properties["thin_walled"] = true; + } + + // Stable id — deterministic within this FBX load (keyed on the ufbx + // material's element_id when available, else the running index). + // Re-reads of the same asset may still mint different ids, but + // within-session fingerprinting stays pointer-independent. + mc->stable_id = (m && m->element.element_id) + ? (uint64_t)m->element.element_id + : ossia::mint_stable_id(); + const int idx = (int)materials.size(); + materials.push_back(mc); + material_index.emplace(m, idx); + return idx; + } + + // Pull a single attribute stream into a freshly-allocated shared buffer. + // `floats_per_vertex` controls stride. The lambda is called per vertex with + // (dst_floats, source_index_in_mesh). + template + static std::shared_ptr> extract_attribute( + const ufbx_mesh* umesh, const ufbx_mesh_part& part, + int floats_per_vertex, std::vector& tris, + Read&& read) + { + const int64_t num_verts = int64_t(part.num_triangles) * 3; + auto out = std::make_shared>(size_t(num_verts) * floats_per_vertex); + float* dst = out->data(); + for(size_t fi = 0; fi < part.num_faces; fi++) + { + const uint32_t face_idx = part.face_indices.data[fi]; + const ufbx_face face = umesh->faces.data[face_idx]; + tris.resize(face.num_indices * 3); + uint32_t num_tris = ufbx_triangulate_face(tris.data(), tris.size(), umesh, face); + for(uint32_t ti = 0; ti < num_tris; ti++) + { + for(int vi = 0; vi < 3; vi++) + { + uint32_t idx = tris[ti * 3 + vi]; + read(dst, idx); + dst += floats_per_vertex; + } + } + } + return out; + } + + // Build a ScenePart for one (mesh, material_part) pair. Vertex data is in + // mesh-local space — node hierarchy carries the transform. + FbxParser::ScenePart extract_part( + const ufbx_node* node, const ufbx_mesh* umesh, + const ufbx_mesh_part& part) + { + FbxParser::ScenePart sp; + sp.vertex_count = uint32_t(part.num_triangles) * 3; + if(sp.vertex_count == 0) + return sp; + + const bool has_normals = umesh->vertex_normal.exists; + const bool has_uv = umesh->vertex_uv.exists; + const bool has_colors = umesh->vertex_color.exists; + const bool has_tangents = umesh->vertex_tangent.exists; + + sp.positions = extract_attribute( + umesh, part, 3, tri_indices, [umesh](float* dst, uint32_t idx) { + ufbx_vec3 p = umesh->vertex_position.values.data[ + umesh->vertex_position.indices.data[idx]]; + dst[0] = float(p.x); dst[1] = float(p.y); dst[2] = float(p.z); + }); + // Local-space AABB for per-draw GPU culling. Walk the just-extracted + // positions once. ~10 ns/vertex — negligible at load time. + if(sp.positions && !sp.positions->empty()) + sp.bounds = ossia::compute_aabb_from_positions( + sp.positions->data(), sp.vertex_count); + + if(has_normals) + { + sp.normals = extract_attribute( + umesh, part, 3, tri_indices, [umesh](float* dst, uint32_t idx) { + ufbx_vec3 n = umesh->vertex_normal.values.data[ + umesh->vertex_normal.indices.data[idx]]; + float len = float(std::sqrt(n.x * n.x + n.y * n.y + n.z * n.z)); + if(len > 1e-8f) + { + float inv = 1.f / len; + dst[0] = float(n.x) * inv; + dst[1] = float(n.y) * inv; + dst[2] = float(n.z) * inv; + } + else + { + dst[0] = 0.f; dst[1] = 1.f; dst[2] = 0.f; + } + }); + } + + if(has_uv) + { + sp.texcoords = extract_attribute( + umesh, part, 2, tri_indices, [umesh](float* dst, uint32_t idx) { + ufbx_vec2 uv = umesh->vertex_uv.values.data[ + umesh->vertex_uv.indices.data[idx]]; + dst[0] = float(uv.x); dst[1] = float(uv.y); + }); + } + + if(has_colors) + { + sp.colors = extract_attribute( + umesh, part, 4, tri_indices, [umesh](float* dst, uint32_t idx) { + ufbx_vec4 c = umesh->vertex_color.values.data[ + umesh->vertex_color.indices.data[idx]]; + dst[0] = float(c.x); dst[1] = float(c.y); + dst[2] = float(c.z); dst[3] = float(c.w); + }); + } + + if(has_tangents) + { + sp.tangents = extract_attribute( + umesh, part, 4, tri_indices, [umesh](float* dst, uint32_t idx) { + ufbx_vec3 t = umesh->vertex_tangent.values.data[ + umesh->vertex_tangent.indices.data[idx]]; + float len = float(std::sqrt(t.x * t.x + t.y * t.y + t.z * t.z)); + if(len > 1e-8f) + { + float inv = 1.f / len; + dst[0] = float(t.x) * inv; + dst[1] = float(t.y) * inv; + dst[2] = float(t.z) * inv; + } + else + { + dst[0] = 1.f; dst[1] = 0.f; dst[2] = 0.f; + } + // Compute handedness from bitangent if present + if(umesh->vertex_bitangent.exists) + { + ufbx_vec3 n = umesh->vertex_normal.values.data[ + umesh->vertex_normal.indices.data[idx]]; + ufbx_vec3 b = umesh->vertex_bitangent.values.data[ + umesh->vertex_bitangent.indices.data[idx]]; + float cx = float(n.y * t.z - n.z * t.y); + float cy = float(n.z * t.x - n.x * t.z); + float cz = float(n.x * t.y - n.y * t.x); + float d = cx * float(b.x) + cy * float(b.y) + cz * float(b.z); + dst[3] = d < 0.f ? -1.f : 1.f; + } + else + { + dst[3] = 1.f; + } + }); + } + else if(has_normals && has_uv) + { + // FBX mesh has no TANGENT channel — synthesize tangents from + // position / normal / UV via mikktspace so normal maps work. + // Extracted attributes here are already triangle-unindexed + // (each triangle has 3 unique vertices), so no index buffer is + // needed and mikktspace's contract is satisfied naturally. + sp.tangents = Threedim::generate_tangents_mikktspace( + sp.positions, sp.normals, sp.texcoords, + /*indices=*/nullptr, sp.vertex_count); + } + + // Skinning: if the mesh has a skin deformer, pull top-4 (cluster, weight) + // pairs per vertex. ufbx sorts weights descending, so we can truncate to + // 4 safely. Joint indices map through register_joint into the global + // skeleton. The per-triangle expansion mirrors the position walk: one + // output entry per (face_index, triangulated_vertex). + if(umesh->skin_deformers.count > 0) + { + const ufbx_skin_deformer* skin = umesh->skin_deformers.data[0]; + + // Register all clusters' bones up front so register_joint is a plain + // lookup in the hot per-vertex loop below. + std::vector cluster_to_joint(skin->clusters.count, -1); + for(size_t ci = 0; ci < skin->clusters.count; ci++) + { + const ufbx_skin_cluster* cl = skin->clusters.data[ci]; + if(!cl || !cl->bone_node) + continue; + int j = register_joint(cl->bone_node); + cluster_to_joint[ci] = j; + + // The cluster's geometry_to_bone IS the inverse-bind matrix (glTF + // convention): vertices in geometry-local space → bone-local. Store + // as column-major 4x4 (ufbx_matrix is row-major 3x4; we transpose). + const ufbx_matrix& m = cl->geometry_to_bone; + float* ibm = skeleton->joints[j].inverse_bind_matrix; + // Column 0: (m00, m10, m20, 0), col 1, col 2, col 3 (translation) + ibm[0] = float(m.m00); ibm[1] = float(m.m10); ibm[2] = float(m.m20); ibm[3] = 0.f; + ibm[4] = float(m.m01); ibm[5] = float(m.m11); ibm[6] = float(m.m21); ibm[7] = 0.f; + ibm[8] = float(m.m02); ibm[9] = float(m.m12); ibm[10] = float(m.m22); ibm[11] = 0.f; + ibm[12] = float(m.m03); ibm[13] = float(m.m13); ibm[14] = float(m.m23); ibm[15] = 1.f; + } + + // Allocate joints0/weights0 per-triangle-vertex buffers. ufbx indexes + // skin_vertices by the base vertex (not the triangulated index), so + // we resolve via umesh->vertex_position.indices — same pattern as the + // attribute extraction above. + const int64_t num_verts = int64_t(part.num_triangles) * 3; + auto joints_buf = std::make_shared>(size_t(num_verts) * 4); + auto weights_buf = std::make_shared>(size_t(num_verts) * 4); + uint16_t* jdst = joints_buf->data(); + float* wdst = weights_buf->data(); + + for(size_t fi = 0; fi < part.num_faces; fi++) + { + const uint32_t face_idx = part.face_indices.data[fi]; + const ufbx_face face = umesh->faces.data[face_idx]; + tri_indices.resize(face.num_indices * 3); + uint32_t num_tris = ufbx_triangulate_face( + tri_indices.data(), tri_indices.size(), umesh, face); + for(uint32_t ti = 0; ti < num_tris; ti++) + { + for(int vi = 0; vi < 3; vi++) + { + uint32_t idx = tri_indices[ti * 3 + vi]; + uint32_t base_vtx = umesh->vertex_position.indices.data[idx]; + const ufbx_skin_vertex sv = skin->vertices.data[base_vtx]; + + // Pick up to 4 weights (already sorted descending by weight). + float w[4] = {0, 0, 0, 0}; + uint16_t j[4] = {0, 0, 0, 0}; + const uint32_t n = std::min(sv.num_weights, 4); + for(uint32_t k = 0; k < n; ++k) + { + const ufbx_skin_weight sw = skin->weights.data[sv.weight_begin + k]; + if(sw.cluster_index < cluster_to_joint.size() + && cluster_to_joint[sw.cluster_index] >= 0) + { + j[k] = uint16_t(cluster_to_joint[sw.cluster_index]); + w[k] = float(sw.weight); + } + } + // Renormalise — ufbx doesn't guarantee the top-4 sum to 1. + float sum = w[0] + w[1] + w[2] + w[3]; + if(sum > 1e-6f) + { + float inv = 1.f / sum; + w[0] *= inv; w[1] *= inv; w[2] *= inv; w[3] *= inv; + } + jdst[0] = j[0]; jdst[1] = j[1]; jdst[2] = j[2]; jdst[3] = j[3]; + wdst[0] = w[0]; wdst[1] = w[1]; wdst[2] = w[2]; wdst[3] = w[3]; + jdst += 4; + wdst += 4; + } + } + } + + sp.joints0 = std::move(joints_buf); + sp.weights0 = std::move(weights_buf); + sp.skin_joint_count = int(skeleton ? skeleton->joints.size() : 0); + } + + // Material assignment — prefer the per-instance node->materials list + // (FBX allows different node instances to override mesh materials), fall + // back to the mesh's own materials list, then to part.material. + const ufbx_material* mat = nullptr; + if(part.index < node->materials.count) + mat = node->materials.data[part.index]; + if(!mat && part.index < umesh->materials.count) + mat = umesh->materials.data[part.index]; + sp.material_index = register_material(mat); + + return sp; + } + + // Convert a ufbx_light to a populated light_component. Caller takes + // ownership. Returns nullptr if the light isn't representable (e.g. ufbx + // VOLUME type). + static std::shared_ptr to_light(const ufbx_light* l) + { + if(!l) + return {}; + auto lc = std::make_shared(); + switch(l->type) + { + case UFBX_LIGHT_DIRECTIONAL: + lc->type = ossia::light_type::directional; break; + case UFBX_LIGHT_POINT: + lc->type = ossia::light_type::point; break; + case UFBX_LIGHT_SPOT: + lc->type = ossia::light_type::spot; break; + case UFBX_LIGHT_AREA: + // ufbx exposes either rectangle or sphere area shape; map the common + // rect case, fall back to disk for sphere (close enough at v1). + lc->type = (l->area_shape == UFBX_LIGHT_AREA_SHAPE_RECTANGLE) + ? ossia::light_type::rect_area + : ossia::light_type::sphere_area; + break; + default: // UFBX_LIGHT_VOLUME and any future types — skip. + return {}; + } + switch(l->decay) + { + case UFBX_LIGHT_DECAY_NONE: lc->decay = ossia::light_decay::none; break; + case UFBX_LIGHT_DECAY_LINEAR: lc->decay = ossia::light_decay::linear; break; + case UFBX_LIGHT_DECAY_QUADRATIC: lc->decay = ossia::light_decay::quadratic; break; + case UFBX_LIGHT_DECAY_CUBIC: lc->decay = ossia::light_decay::cubic; break; + default: break; + } + lc->color[0] = float(l->color.x); + lc->color[1] = float(l->color.y); + lc->color[2] = float(l->color.z); + lc->intensity = float(l->intensity); + lc->inner_cone_angle = float(l->inner_angle) * float(M_PI) / 180.f; + lc->outer_cone_angle = float(l->outer_angle) * float(M_PI) / 180.f; + lc->shadow.enabled = l->cast_shadows; + + // Range: FBX doesn't expose falloff distance as a first-class + // ufbx_light field, but the underlying FBX property `FarAttenuationEnd` + // (the distance past which the light contributes nothing) maps + // cleanly onto score's `range`. 0 = infinite, which is the ossia + // light_component convention for "no cutoff." Read via the generic + // props accessor since ufbx pins it in `l->props`, not in the + // ufbx_light struct fields. + lc->range = float(ufbx_find_real(&l->props, "FarAttenuationEnd", 0.0)); + + // Area-light dimensions: FBX has no standard area_width / area_height + // fields in ufbx_light. Authoring tools encode area size through + // the node's own scale; we leave lc->width / height / radius at + // their defaults and let a future shader-side area sampler derive + // effective dimensions from the node transform when needed. + + // `l->cast_light` (bool) is the "is this light emitting at all" + // gate in FBX. ossia::light_component has no direct equivalent — + // a disabled light would be culled upstream (scene_filter by visibility + // or a dedicated filter). Dropping a non-emitting light here keeps + // the RawLight arena from accumulating dead slots. + if(!l->cast_light) + return {}; + + return lc; + } + + // Convert a ufbx_camera to a camera_component. Field-of-view in ufbx is + // degrees (vertical for "horizontal" axis); ossia stores radians. + static std::shared_ptr to_camera(const ufbx_camera* c) + { + if(!c) + return {}; + auto cc = std::make_shared(); + cc->projection = (c->projection_mode == UFBX_PROJECTION_MODE_ORTHOGRAPHIC) + ? ossia::camera_projection::orthographic + : ossia::camera_projection::perspective; + cc->yfov = float(c->field_of_view_deg.y) * float(M_PI) / 180.f; + cc->aspect_ratio = float(c->aspect_ratio > 0 ? c->aspect_ratio : 1.0); + cc->xmag = float(c->orthographic_size.x); + cc->ymag = float(c->orthographic_size.y); + cc->znear = float(c->near_plane); + cc->zfar = float(c->far_plane); + cc->physical.focal_length = float(c->focal_length_mm); + cc->physical.horizontal_aperture = float(c->aperture_size_inch.x * 25.4); + cc->physical.vertical_aperture = float(c->aperture_size_inch.y * 25.4); + return cc; + } + + void extract_node(const ufbx_node* node, int parent_index) + { + FbxParser::SceneNode sn; + sn.name = std::string(node->name.data, node->name.length); + sn.parent_index = parent_index; + sn.light = to_light(node->light); + sn.camera = to_camera(node->camera); + + // Decompose local_transform — ufbx already gives us TRS. + const auto& lt = node->local_transform; + sn.local_transform.translation[0] = float(lt.translation.x); + sn.local_transform.translation[1] = float(lt.translation.y); + sn.local_transform.translation[2] = float(lt.translation.z); + sn.local_transform.rotation[0] = float(lt.rotation.x); + sn.local_transform.rotation[1] = float(lt.rotation.y); + sn.local_transform.rotation[2] = float(lt.rotation.z); + sn.local_transform.rotation[3] = float(lt.rotation.w); + sn.local_transform.scale[0] = float(lt.scale.x); + sn.local_transform.scale[1] = float(lt.scale.y); + sn.local_transform.scale[2] = float(lt.scale.z); + + // Extract mesh parts if this node holds a mesh. + if(node->mesh) + { + const ufbx_mesh* umesh = node->mesh; + if(umesh->material_parts.count > 0) + { + for(size_t pi = 0; pi < umesh->material_parts.count; pi++) + { + auto sp = extract_part(node, umesh, umesh->material_parts.data[pi]); + if(sp.vertex_count > 0) + sn.parts.push_back(std::move(sp)); + } + } + else + { + ufbx_mesh_part whole{}; + whole.num_faces = umesh->num_faces; + whole.num_triangles = umesh->num_triangles; + std::vector all_faces(umesh->num_faces); + for(size_t i = 0; i < umesh->num_faces; i++) + all_faces[i] = uint32_t(i); + whole.face_indices.data = all_faces.data(); + whole.face_indices.count = all_faces.size(); + auto sp = extract_part(node, umesh, whole); + if(sp.vertex_count > 0) + sn.parts.push_back(std::move(sp)); + } + } + + const int self_index = (int)nodes.size(); + nodes.push_back(std::move(sn)); + + // Recurse into children. + for(size_t ci = 0; ci < node->children.count; ci++) + extract_node(node->children.data[ci], self_index); + } + + void extract_scene(const ufbx_scene* scene) + { + // Skip the synthetic root node; emit its children as actual roots. + if(!scene->root_node) + return; + for(size_t ci = 0; ci < scene->root_node->children.count; ci++) + extract_node(scene->root_node->children.data[ci], -1); + } +}; + +// ============================================================================= +// rebuild_scene — walk m_scene_nodes, build hierarchical scene_spec with +// mesh_primitive[] (modern path; ScenePreprocessor handles both this and the +// legacy_geometry path). +// ============================================================================= + +// Wrap a per-attribute float buffer as a buffer_resource_ptr suitable for +// mesh_primitive::vertex_buffers. The data lifetime is held by the shared +// pointer aliasing — no extra copy. +static ossia::buffer_resource_ptr make_buffer_resource( + std::shared_ptr> floats) +{ + if(!floats || floats->empty()) + return {}; + auto br = std::make_shared(); + ossia::buffer_data bd; + // Aliasing constructor: the resulting shared_ptr keeps `floats` alive but + // exposes a `const void*` pointing at the contiguous data. + bd.data = std::shared_ptr(floats, floats->data()); + bd.byte_size = int64_t(floats->size() * sizeof(float)); + bd.usage_hint = ossia::buffer_data::usage::vertex_buffer; + br->resource = std::move(bd); + br->dirty_index = 1; + return br; +} + +// Build one mesh_primitive from a ScenePart. Each present attribute lives in +// its own buffer (one buffer_index per attribute, one binding per attribute). +static ossia::mesh_primitive part_to_primitive( + const FbxParser::ScenePart& part, + const std::vector>& mats) +{ + ossia::mesh_primitive mp; + mp.stable_id = ossia::mint_stable_id(); + mp.topology = ossia::primitive_topology::triangles; + mp.index_type = ossia::index_format::none; + mp.vertex_count = part.vertex_count; + mp.index_count = 0; + mp.first_vertex = 0; + mp.first_index = 0; + mp.vertex_offset = 0; + mp.bounds = part.bounds; + if(part.material_index >= 0 + && std::size_t(part.material_index) < mats.size()) + mp.material = mats[part.material_index]; + + uint32_t buffer_idx = 0; + auto add = [&](std::shared_ptr> data, int floats_per_vertex, + ossia::attribute_semantic sem, ossia::vertex_format fmt) { + if(!data || data->empty()) + return; + mp.vertex_buffers.push_back(make_buffer_resource(std::move(data))); + ossia::vertex_attribute attr; + attr.semantic = sem; + attr.format = fmt; + attr.buffer_index = buffer_idx; + attr.byte_offset = 0; + attr.byte_stride = uint32_t(floats_per_vertex) * sizeof(float); + attr.rate = ossia::vertex_attribute::input_rate::per_vertex; + mp.attributes.push_back(attr); + ++buffer_idx; + }; + + add(part.positions, 3, + ossia::attribute_semantic::position, ossia::vertex_format::float3); + add(part.normals, 3, + ossia::attribute_semantic::normal, ossia::vertex_format::float3); + add(part.texcoords, 2, + ossia::attribute_semantic::texcoord0, ossia::vertex_format::float2); + add(part.colors, 4, + ossia::attribute_semantic::color0, ossia::vertex_format::float4); + add(part.tangents, 4, + ossia::attribute_semantic::tangent, ossia::vertex_format::float4); + + // Skinning attributes. joints0 is uint16x4 (halves per-vertex storage vs + // uint32x4); weights0 is float4. Only emitted when the mesh has skinning. + if(part.joints0 && !part.joints0->empty()) + { + auto joint_br = std::make_shared(); + ossia::buffer_data bd; + bd.data = std::shared_ptr(part.joints0, part.joints0->data()); + bd.byte_size = int64_t(part.joints0->size() * sizeof(uint16_t)); + bd.usage_hint = ossia::buffer_data::usage::vertex_buffer; + joint_br->resource = std::move(bd); + joint_br->dirty_index = 1; + mp.vertex_buffers.push_back(joint_br); + + ossia::vertex_attribute attr; + attr.semantic = ossia::attribute_semantic::joints0; + attr.format = ossia::vertex_format::uint16x4; + attr.buffer_index = buffer_idx++; + attr.byte_offset = 0; + attr.byte_stride = 4 * sizeof(uint16_t); + attr.rate = ossia::vertex_attribute::input_rate::per_vertex; + mp.attributes.push_back(attr); + } + if(part.weights0 && !part.weights0->empty()) + { + mp.vertex_buffers.push_back(make_buffer_resource(part.weights0)); + ossia::vertex_attribute attr; + attr.semantic = ossia::attribute_semantic::weights0; + attr.format = ossia::vertex_format::float4; + attr.buffer_index = buffer_idx++; + attr.byte_offset = 0; + attr.byte_stride = 4 * sizeof(float); + attr.rate = ossia::vertex_attribute::input_rate::per_vertex; + mp.attributes.push_back(attr); + } + + return mp; +} + +void FbxParser::rebuild_scene() +{ + if(m_scene_nodes.empty()) + return; + + // Allocate scene_node + children list shells in flat arrays first, then + // wire children using parent_index. Two-pass keeps the code simple and + // avoids any std::shared_ptr circular-ownership concerns. + const std::size_t N = m_scene_nodes.size(); + std::vector> nodes; + std::vector>> children_lists; + nodes.reserve(N); + children_lists.reserve(N); + for(std::size_t i = 0; i < N; ++i) + { + auto n = std::make_shared(); + n->name = m_scene_nodes[i].name; + n->visible = true; + nodes.push_back(std::move(n)); + children_lists.push_back( + std::make_shared>()); + } + + // Per-node payload list: first the local transform (so it applies to all + // subsequent siblings, matching FlattenVisitor's convention), then the + // mesh_component (if any). Child nodes are pushed in the second pass. + for(std::size_t i = 0; i < N; ++i) + { + auto& src = m_scene_nodes[i]; + auto& lst = *children_lists[i]; + + lst.push_back(src.local_transform); + + if(!src.parts.empty()) + { + auto mc = std::make_shared(); + mc->primitives.reserve(src.parts.size()); + bool any_skinned = false; + for(const auto& part : src.parts) + { + mc->primitives.push_back(part_to_primitive(part, m_materials)); + if(part.skin_joint_count > 0) + any_skinned = true; + } + // Attach the global skeleton when any part of this mesh is skinned. + if(any_skinned && m_skeleton) + mc->skin = ossia::skeleton_component_ptr(m_skeleton); + mc->dirty_index = 1; + lst.push_back(ossia::mesh_component_ptr(std::move(mc))); + } + if(src.light) + lst.push_back(ossia::light_component_ptr(src.light)); + if(src.camera) + lst.push_back(ossia::camera_component_ptr(src.camera)); + } + + // Wire children (parent_index references earlier entries). + for(std::size_t i = 0; i < N; ++i) + { + int p = m_scene_nodes[i].parent_index; + if(p >= 0 && p < int(N)) + children_lists[p]->push_back(ossia::scene_node_ptr(nodes[i])); + } + for(std::size_t i = 0; i < N; ++i) + nodes[i]->children = children_lists[i]; + + auto roots = std::make_shared>(); + for(std::size_t i = 0; i < N; ++i) + if(m_scene_nodes[i].parent_index < 0) + roots->push_back(ossia::scene_node_ptr(nodes[i])); + + // Materials: publish the registered list. Const conversion happens via + // material_component_ptr (shared_ptr). + auto mat_list = std::make_shared>(); + mat_list->reserve(m_materials.size()); + for(auto& m : m_materials) + mat_list->push_back(ossia::material_component_ptr(m)); + + auto state = std::make_shared(); + state->roots = std::move(roots); + state->materials = std::move(mat_list); + if(m_skeleton && !m_skeleton->joints.empty()) + { + auto skins = std::make_shared>(); + skins->push_back(ossia::skeleton_component_ptr(m_skeleton)); + state->skeletons = std::move(skins); + } + state->version = 1; + state->dirty_index = 1; + + // AssetLoader wraps m_raw_state in a TRS payload externally; we + // publish only the raw scene here. + m_raw_state = std::move(state); +} + +std::function FbxParser::ins::fbx_t::process(file_type tv) +{ + if(tv.filename.empty()) + return {}; + + ufbx_load_opts opts{}; + opts.generate_missing_normals = true; + opts.normalize_normals = true; + opts.normalize_tangents = true; + + // Convert to OpenGL coordinate system: +X right, +Y up, +Z front (= -Z forward) + opts.target_axes.right = UFBX_COORDINATE_AXIS_POSITIVE_X; + opts.target_axes.up = UFBX_COORDINATE_AXIS_POSITIVE_Y; + opts.target_axes.front = UFBX_COORDINATE_AXIS_POSITIVE_Z; + opts.target_unit_meters = 1.0; + + // Bake "geometric transforms" (the non-inherited per-attachment offset) into + // the vertex data. This means node->geometry_transform is identity afterward + // and the meshes' vertex positions are in the node's local frame — exactly + // what we want for the hierarchical scene_spec output. + opts.geometry_transform_handling = UFBX_GEOMETRY_TRANSFORM_HANDLING_MODIFY_GEOMETRY; + opts.space_conversion = UFBX_SPACE_CONVERSION_ADJUST_TRANSFORMS; + opts.use_blender_pbr_material = true; + + ufbx_error error{}; + ufbx_scene* scene = ufbx_load_file(tv.filename.data(), &opts, &error); + if(!scene) + return {}; + + // Extract hierarchical scene (drives rebuild_scene). + std::vector scene_nodes; + std::vector> materials; + std::shared_ptr skeleton; + FbxSceneExtractor scene_ex{scene_nodes, materials, skeleton, {}, {}, {}}; + scene_ex.extract_scene(scene); + scene_ex.link_joint_parents(); + + ufbx_free_scene(scene); + + if(scene_nodes.empty()) + return {}; + + return [scene_nodes = std::move(scene_nodes), + materials = std::move(materials), + skeleton = std::move(skeleton)](FbxParser& o) mutable { + std::swap(o.m_scene_nodes, scene_nodes); + std::swap(o.m_materials, materials); + o.m_skeleton = std::move(skeleton); + o.rebuild_scene(); + }; +} + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/FbxParser.hpp b/src/plugins/score-plugin-threedim/Threedim/FbxParser.hpp new file mode 100644 index 0000000000..3cd0bc8349 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/FbxParser.hpp @@ -0,0 +1,95 @@ +#pragma once +#include + +#include + +#include +#include +#include + +namespace Threedim +{ + +// Internal FBX parsing class — drives ufbx + builds an ossia::scene_spec +// out of an FBX file's bytes. Not a halp node in its own right (the +// user-facing entry point is AssetLoader). AssetLoader calls the static +// `ins::fbx_t::process` to obtain an apply-lambda, applies it against a +// throwaway FbxParser instance, then copies out `m_raw_state`. +class FbxParser +{ +public: + struct ins + { + struct fbx_t : halp::file_port<"FBX file"> + { + static std::function process(file_type data); + } fbx; + } inputs; + + void rebuild_scene(); + + // -- Rich scene staging (drives rebuild_scene) ----------------------------- + // Built once per `process()` call. Lives on the execution thread; rebuilt + // into ossia::scene_spec by rebuild_scene(). + struct ScenePart + { + // Per-attribute CPU buffers, one shared_ptr per stream. Each spans + // vertex_count elements of the matching format. Empty pointers indicate + // the attribute is absent on this part. + std::shared_ptr> positions; // 3 floats per vertex (always present) + std::shared_ptr> normals; // 3 floats per vertex + std::shared_ptr> texcoords; // 2 floats per vertex + std::shared_ptr> colors; // 4 floats per vertex (RGBA) + std::shared_ptr> tangents; // 4 floats per vertex + + // Skinning: top-4 joints + weights per vertex. joints holds uint16 per + // component (4 per vertex); weights holds float (4 per vertex). Both + // are populated iff the mesh has a skin deformer. + std::shared_ptr> joints0; + std::shared_ptr> weights0; + + uint32_t vertex_count{0}; + + // Index into FbxParser::m_materials. -1 = no material assigned. + int material_index{-1}; + + // Index into FbxParser::m_skeleton_joints_*, i.e. how many joints exist + // — stored on the ScenePart to propagate skin_index to mesh_component. + // 0 = no skin. + int skin_joint_count{0}; + + // Local-space AABB over `positions`. Computed once by extract_part + // (or whoever fills ScenePart) and carried into mesh_primitive by + // part_to_primitive. Empty aabb = "not yet computed"; downstream + // GPU culling treats empty as infinite. + ossia::aabb bounds{}; + }; + + struct SceneNode + { + std::string name; + ossia::scene_transform local_transform; // node's local TRS + int parent_index{-1}; // index into m_scene_nodes (-1 = root) + std::vector parts; // 0..N mesh parts (one per material) + + // Optional attached components — populated during extraction when the + // ufbx_node carries them. `rebuild_scene` adds them as scene_payloads. + std::shared_ptr light; + std::shared_ptr camera; + }; + + std::vector m_scene_nodes; + std::vector> m_materials; + + // One global skeleton built from all skin clusters encountered. Published + // to scene_state.skeletons[0]; mesh_component::skin_index is 0 for any + // mesh that uses skinning. Empty if the FBX has no skinning. + std::shared_ptr m_skeleton; + + // Rich scene state emitted by rebuild_scene — full hierarchy with + // materials, lights, cameras, skeletons. AssetLoader consumes this + // via the apply-lambda returned by ins::fbx_t::process. + std::shared_ptr m_raw_state; +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/FlattenedSceneFilter/Executor.cpp b/src/plugins/score-plugin-threedim/Threedim/FlattenedSceneFilter/Executor.cpp new file mode 100644 index 0000000000..42f2b35f4d --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/FlattenedSceneFilter/Executor.cpp @@ -0,0 +1,81 @@ +#include "Executor.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +namespace Gfx::FlattenedSceneFilter +{ +class flattened_scene_filter_exec_node final : public gfx_exec_node +{ +public: + flattened_scene_filter_exec_node(GfxExecutionAction& ctx) + : gfx_exec_node{ctx} + { + } + + void init() + { + auto node = std::make_unique(); + id = exec_context->ui->register_node(std::move(node)); + } + + ~flattened_scene_filter_exec_node() + { + exec_context->ui->unregister_node(id); + } + + std::string label() const noexcept override + { + return "Gfx::FlattenedSceneFilter_node"; + } +}; + +ProcessExecutorComponent::ProcessExecutorComponent( + Gfx::FlattenedSceneFilter::Model& element, + const Execution::Context& ctx, + QObject* parent) + : ProcessComponent_T{element, ctx, "flattenedSceneFilterComponent", parent} +{ + auto n = ossia::make_node( + *ctx.execState, ctx.doc.plugin().exec); + + // Port 0: geometry input + n->add_geometry(); + + // Ports 1-3: Mode + Match (int) + Match (string) controls + for(std::size_t i = 1; i <= 3; i++) + { + auto ctrl = qobject_cast(element.inlets()[i]); + auto& p = n->add_control(); + ctrl->setupExecution(*n->root_inputs().back(), this); + p->value = ctrl->value(); + QObject::connect( + ctrl, + &Process::ControlInlet::valueChanged, + this, + con_unvalidated{ctx, i, 0, n}); + } + + // Port 0: geometry output + n->add_geometry_out(); + + n->init(); + + this->node = n; + m_ossia_process = std::make_shared(n); +} + +void ProcessExecutorComponent::cleanup() +{ + ProcessComponent_T::cleanup(); +} +} diff --git a/src/plugins/score-plugin-threedim/Threedim/FlattenedSceneFilter/Executor.hpp b/src/plugins/score-plugin-threedim/Threedim/FlattenedSceneFilter/Executor.hpp new file mode 100644 index 0000000000..922d1060fa --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/FlattenedSceneFilter/Executor.hpp @@ -0,0 +1,22 @@ +#pragma once +#include + +#include + +namespace Gfx::FlattenedSceneFilter +{ +class Model; +class ProcessExecutorComponent final + : public Execution:: + ProcessComponent_T +{ + COMPONENT_METADATA("b6c8e2d4-9a1f-4e7b-8d3c-2f5a1b7e9c4d") +public: + ProcessExecutorComponent( + Model& element, const Execution::Context& ctx, QObject* parent); + void cleanup() override; +}; + +using ProcessExecutorComponentFactory + = Execution::ProcessComponentFactory_T; +} diff --git a/src/plugins/score-plugin-threedim/Threedim/FlattenedSceneFilter/Metadata.hpp b/src/plugins/score-plugin-threedim/Threedim/FlattenedSceneFilter/Metadata.hpp new file mode 100644 index 0000000000..6f144e83f7 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/FlattenedSceneFilter/Metadata.hpp @@ -0,0 +1,22 @@ +#pragma once +#include + +namespace Gfx::FlattenedSceneFilter +{ +class Model; +} + +PROCESS_METADATA( + , Gfx::FlattenedSceneFilter::Model, "7a1b3c5d-2e4f-4a6b-8c9d-1e2f3a4b5c6e", + "flattenedscenefilter", + "Flattened Scene Filter", + Process::ProcessCategory::Visual, + "Visuals/3D/Scene", + "Filter a flattened scene by tag or material index, per pass", + "ossia team", + (QStringList{"gfx", "scene", "filter", "3d"}), + {}, + {}, + QUrl{}, + Process::ProcessFlags::SupportsAll +) diff --git a/src/plugins/score-plugin-threedim/Threedim/FlattenedSceneFilter/Process.cpp b/src/plugins/score-plugin-threedim/Threedim/FlattenedSceneFilter/Process.cpp new file mode 100644 index 0000000000..d552fd9852 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/FlattenedSceneFilter/Process.cpp @@ -0,0 +1,97 @@ +#include "Process.hpp" + +#include + +#include +#include + +#include +#include + +#include + +W_OBJECT_IMPL(Gfx::FlattenedSceneFilter::Model) +namespace Gfx::FlattenedSceneFilter +{ + +Model::Model( + const TimeVal& duration, const Id& id, QObject* parent) + : Process::ProcessModel{duration, id, "gfxProcess", parent} +{ + metadata().setInstanceName(*this); + init(); +} + +Model::~Model() = default; + +void Model::init() +{ + if(m_inlets.empty() && m_outlets.empty()) + { + m_inlets.push_back(new GeometryInlet{"Geometry In", Id(0), this}); + + m_inlets.push_back(new Process::ComboBox{ + std::vector>{ + {QStringLiteral("tag == match"), 0}, + {QStringLiteral("tag != match"), 1}, + {QStringLiteral("material_index == match"), 2}, + {QStringLiteral("material_index != match"), 3}, + {QStringLiteral("blend == match"), 4}, + {QStringLiteral("blend != match"), 5}, + {QStringLiteral("depth_write == match"), 6}, + {QStringLiteral("depth_write != match"), 7}, + {QStringLiteral("cull_mode == match"), 8}, + {QStringLiteral("cull_mode != match"), 9}, + {QStringLiteral("topology == match"), 10}, + {QStringLiteral("topology != match"), 11}, + {QStringLiteral("format_id == match_str"), 12}, + {QStringLiteral("format_id != match_str"), 13}}, + 0, "Mode", Id(1), this}); + + m_inlets.push_back(new Process::IntSpinBox{ + -1, 2147483647, 0, "Match", Id(2), this}); + + // Modes 12/13 read this string; other modes ignore it. + m_inlets.push_back(new Process::LineEdit{ + QString{}, "Format ID", Id(3), this}); + + m_outlets.push_back(new GeometryOutlet{"Geometry Out", Id(0), this}); + } +} + +QString Model::prettyName() const noexcept +{ + return tr("Flattened Scene Filter"); +} + +} + +template <> +void DataStreamReader::read(const Gfx::FlattenedSceneFilter::Model& proc) +{ + readPorts(*this, proc.m_inlets, proc.m_outlets); + insertDelimiter(); +} + +template <> +void DataStreamWriter::write(Gfx::FlattenedSceneFilter::Model& proc) +{ + writePorts( + *this, components.interfaces(), proc.m_inlets, + proc.m_outlets, &proc); + checkDelimiter(); +} + +template <> +void JSONReader::read(const Gfx::FlattenedSceneFilter::Model& proc) +{ + readPorts(*this, proc.m_inlets, proc.m_outlets); +} + +template <> +void JSONWriter::write(Gfx::FlattenedSceneFilter::Model& proc) +{ + writePorts( + *this, components.interfaces(), proc.m_inlets, + proc.m_outlets, &proc); +} diff --git a/src/plugins/score-plugin-threedim/Threedim/Splat/Process.hpp b/src/plugins/score-plugin-threedim/Threedim/FlattenedSceneFilter/Process.hpp similarity index 52% rename from src/plugins/score-plugin-threedim/Threedim/Splat/Process.hpp rename to src/plugins/score-plugin-threedim/Threedim/FlattenedSceneFilter/Process.hpp index 38d68691d4..9efaec1896 100644 --- a/src/plugins/score-plugin-threedim/Threedim/Splat/Process.hpp +++ b/src/plugins/score-plugin-threedim/Threedim/FlattenedSceneFilter/Process.hpp @@ -1,25 +1,22 @@ #pragma once -#include +#include +#include #include #include -#include -#include -#include - -#include - -#include -namespace Gfx::Splat +namespace Gfx::FlattenedSceneFilter { class Model final : public Process::ProcessModel { SCORE_SERIALIZE_FRIENDS - PROCESS_METADATA_IMPL(Gfx::Splat::Model) + PROCESS_METADATA_IMPL(Gfx::FlattenedSceneFilter::Model) W_OBJECT(Model) public: - Model(const TimeVal& duration, const Id& id, QObject* parent); + Model( + const TimeVal& duration, + const Id& id, + QObject* parent); template Model(Impl& vis, QObject* parent) @@ -36,6 +33,5 @@ class Model final : public Process::ProcessModel QString prettyName() const noexcept override; }; -using ProcessFactory = Process::ProcessFactory_T; - +using ProcessFactory = Process::ProcessFactory_T; } diff --git a/src/plugins/score-plugin-threedim/Threedim/ObjLoader.cpp b/src/plugins/score-plugin-threedim/Threedim/GeometryLoader.cpp similarity index 80% rename from src/plugins/score-plugin-threedim/Threedim/ObjLoader.cpp rename to src/plugins/score-plugin-threedim/Threedim/GeometryLoader.cpp index ed2f199f05..2196a2dc2d 100644 --- a/src/plugins/score-plugin-threedim/Threedim/ObjLoader.cpp +++ b/src/plugins/score-plugin-threedim/Threedim/GeometryLoader.cpp @@ -1,15 +1,16 @@ -#include "ObjLoader.hpp" +#include "GeometryLoader.hpp" #include #include #include #include +#include namespace Threedim { -void ObjLoader::rebuild_geometry() +void GeometryLoader::rebuild_geometry() { std::vector& new_meshes = this->meshinfo; @@ -211,14 +212,21 @@ static bool check_file_extension(std::string_view filename, std::string_view exp return true; } -std::function ObjLoader::ins::obj_t::process(file_type tv) +std::function GeometryLoader::ins::geom_t::process(file_type tv) { + // Dispatch by extension. Each branch returns a pair of + // (vector, float_vec). Empty pair = unsupported / failed + // parse → we return {} so the halp runtime leaves the current geometry + // intact rather than wiping it. + // + // The returned lambda (captured mesh list + flat float buffer) runs on + // the execution thread and swaps into the loader instance's members, + // then triggers rebuild_geometry to populate the dynamic_geometry + // output. auto upload = [](auto&& mesh, auto&& buf) { - return [mesh = std::move(mesh), buf = std::move(buf)](ObjLoader& o) mutable { - // This part happens in the execution thread + return [mesh = std::move(mesh), buf = std::move(buf)](GeometryLoader& o) mutable { std::swap(o.meshinfo, mesh); std::swap(o.complete, buf); - o.rebuild_geometry(); }; }; @@ -226,20 +234,35 @@ std::function ObjLoader::ins::obj_t::process(file_type tv) Threedim::float_vec buf; if(check_file_extension(tv.filename, "obj")) { - // This part happens in a separate thread if(auto mesh = Threedim::ObjFromString(tv.bytes, buf); !mesh.empty()) - { return upload(std::move(mesh), std::move(buf)); - } } else if(check_file_extension(tv.filename, "ply")) { - // This part happens in a separate thread if(auto mesh = Threedim::PlyFromFile(tv.filename, buf); !mesh.empty()) - { return upload(std::move(mesh), std::move(buf)); - } + } + else if(check_file_extension(tv.filename, "stl")) + { + if(auto mesh = Threedim::StlFromFile(tv.filename, buf); !mesh.empty()) + return upload(std::move(mesh), std::move(buf)); + } + else if(check_file_extension(tv.filename, "off")) + { + if(auto mesh = Threedim::OffFromFile(tv.filename, buf); !mesh.empty()) + return upload(std::move(mesh), std::move(buf)); } return {}; } + +void GeometryLoader::operator()() +{ + // Compute TRS matrix from position/rotation/scale into + // halp::mesh::transform[16]. dirty_transform fires only on actual + // change so downstream's transform binding rebuild is skipped on + // idle frames. + outputs.geometry.dirty_transform + = computeTRSMatrix(inputs, outputs.geometry.transform, m_cachedTRS); +} + } diff --git a/src/plugins/score-plugin-threedim/Threedim/GeometryLoader.hpp b/src/plugins/score-plugin-threedim/Threedim/GeometryLoader.hpp new file mode 100644 index 0000000000..278619d183 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/GeometryLoader.hpp @@ -0,0 +1,72 @@ +#pragma once +#include "TransformHelper.hpp" + +#include +#include +#include +#include +#include +#include + +namespace Threedim +{ + +// Geometry-only file loader. Dispatches by extension to the right parser +// and emits a halp::dynamic_geometry output — one draw-ready mesh per +// file part, no scene graph, no materials, no lights. Use AssetLoader +// for the full-scene variant (FBX / glTF also go through a +// geometry+materials+hierarchy scene_spec pipeline there). +// +// Supported extensions: .obj, .ply, .stl, .off. STL + OFF go through +// the vcglib importers; OBJ + PLY through tinyobj / miniply. All four +// funnel into the same `Threedim::mesh` + `float_vec` representation +// so `rebuild_geometry` sees one uniform input format. +// +// This is the TD-equivalent of a geometry-specific SOP-style loader — +// simpler output, no material / skeleton / animation carry-along. When +// users want the full content (PBR materials, skeletons, anim clips) +// they reach for AssetLoader instead. +class GeometryLoader +{ +public: + halp_meta(name, "Geometry Loader") + halp_meta(category, "Visuals/Meshes") + halp_meta(c_name, "geometry_loader") + halp_meta( + authors, + "Jean-Michaël Celerier, TinyOBJ authors, miniPLY authors, vcglib authors, Eigen authors") + halp_meta(manual_url, "https://ossia.io/score-docs/processes/meshes.html#geometry-loader") + halp_meta(uuid, "5df71765-505f-4ab7-98c1-f305d10a01ef") + + struct ins + { + struct geom_t : halp::file_port<"3D file"> + { + halp_meta(extensions, "3D files (*.obj *.ply *.stl *.off)"); + static std::function process(file_type data); + } geom; + PositionControl position; + RotationControl rotation; + ScaleControl scale; + } inputs; + + struct + { + struct : halp::mesh + { + halp_meta(name, "Geometry"); + std::vector mesh; + } geometry; + } outputs; + + void rebuild_geometry(); + void operator()(); + + std::vector meshinfo{}; + float_vec complete; + + // Per-frame TRS matrix cache (see TransformHelper.hpp). + CachedTRS m_cachedTRS{}; +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/GltfParser.cpp b/src/plugins/score-plugin-threedim/Threedim/GltfParser.cpp new file mode 100644 index 0000000000..2cb1775d45 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/GltfParser.cpp @@ -0,0 +1,1041 @@ +#include "GltfParser.hpp" + +#include "TangentUtils.hpp" + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +namespace Threedim +{ + +namespace +{ + +// glTF TRS decomposition. With Options::DecomposeNodeMatrices we get TRS +// directly; otherwise we'd need to decompose the 4x4. fastgltf gives us a +// std::variant — handle both paths. +static ossia::scene_transform to_transform(const fastgltf::Node& n) +{ + ossia::scene_transform t{}; + + if(const auto* trs = std::get_if(&n.transform)) + { + t.translation[0] = float(trs->translation[0]); + t.translation[1] = float(trs->translation[1]); + t.translation[2] = float(trs->translation[2]); + t.rotation[0] = float(trs->rotation[0]); + t.rotation[1] = float(trs->rotation[1]); + t.rotation[2] = float(trs->rotation[2]); + t.rotation[3] = float(trs->rotation[3]); + t.scale[0] = float(trs->scale[0]); + t.scale[1] = float(trs->scale[1]); + t.scale[2] = float(trs->scale[2]); + } + else if(const auto* m = std::get_if(&n.transform)) + { + // Matrix form — full TRS decomposition. We pass + // Options::DecomposeNodeMatrices so fastgltf SHOULD have already + // converted to TRS upfront, but this branch still fires for + // matrices that fastgltf flags as non-decomposable (negative + // scale, near-degenerate, library version differences). The + // previous translation-only fallback silently dropped rotation + // and scale, which broke any glTF authored matrix-only — like + // VirtualCity (193/234 nodes use matrix form encoding rotation + // and uniform scale). + // + // Algorithm: T = column 3; per-column lengths give scale; reflect + // one axis when det < 0; normalised 3×3 → quaternion via the + // standard branch-on-trace method. + const auto& M = *m; + t.translation[0] = M[3][0]; + t.translation[1] = M[3][1]; + t.translation[2] = M[3][2]; + + QVector3D c0(M[0][0], M[0][1], M[0][2]); + QVector3D c1(M[1][0], M[1][1], M[1][2]); + QVector3D c2(M[2][0], M[2][1], M[2][2]); + + float sx = c0.length(); + float sy = c1.length(); + float sz = c2.length(); + + // Flip one axis when determinant is negative (reflection encoded + // as negative scale on one axis). Without this, the quaternion + // extraction below trips on a left-handed basis and yields garbage. + const float det + = c0.x() * (c1.y() * c2.z() - c1.z() * c2.y()) + - c0.y() * (c1.x() * c2.z() - c1.z() * c2.x()) + + c0.z() * (c1.x() * c2.y() - c1.y() * c2.x()); + if(det < 0.f) + { + sx = -sx; + c0 = -c0; + } + + t.scale[0] = sx; + t.scale[1] = sy; + t.scale[2] = sz; + + if(sx > 1e-6f) c0 /= sx; + if(sy > 1e-6f) c1 /= sy; + if(sz > 1e-6f) c2 /= sz; + + QMatrix3x3 R; + R(0, 0) = c0.x(); R(1, 0) = c0.y(); R(2, 0) = c0.z(); + R(0, 1) = c1.x(); R(1, 1) = c1.y(); R(2, 1) = c1.z(); + R(0, 2) = c2.x(); R(1, 2) = c2.y(); R(2, 2) = c2.z(); + QQuaternion q = QQuaternion::fromRotationMatrix(R); + t.rotation[0] = q.x(); + t.rotation[1] = q.y(); + t.rotation[2] = q.z(); + t.rotation[3] = q.scalar(); + } + return t; +} + +// Translate a glTF Material into material_component (factors + base color +// texture path). `dir` is the glTF file's parent directory — external +// image URIs are relative to it. +static std::shared_ptr to_material( + const fastgltf::Asset& asset, const fastgltf::Material& m, + const std::filesystem::path& dir) +{ + auto mc = std::make_shared(); + mc->tag = std::string(m.name); + + // Base color (pbrMetallicRoughness factor + texture) + mc->base_color_factor[0] = float(m.pbrData.baseColorFactor[0]); + mc->base_color_factor[1] = float(m.pbrData.baseColorFactor[1]); + mc->base_color_factor[2] = float(m.pbrData.baseColorFactor[2]); + mc->base_color_factor[3] = float(m.pbrData.baseColorFactor[3]); + mc->metallic_factor = float(m.pbrData.metallicFactor); + mc->roughness_factor = float(m.pbrData.roughnessFactor); + + mc->emissive_factor[0] = float(m.emissiveFactor[0]); + mc->emissive_factor[1] = float(m.emissiveFactor[1]); + mc->emissive_factor[2] = float(m.emissiveFactor[2]); + mc->emissive_strength = float(m.emissiveStrength); + + switch(m.alphaMode) + { + case fastgltf::AlphaMode::Opaque: mc->alpha = ossia::alpha_mode::opaque_; break; + case fastgltf::AlphaMode::Mask: mc->alpha = ossia::alpha_mode::mask; break; + case fastgltf::AlphaMode::Blend: mc->alpha = ossia::alpha_mode::blend; break; + } + mc->alpha_cutoff = float(m.alphaCutoff); + mc->double_sided = m.doubleSided; + mc->unlit = m.unlit; + + // Resolve a glTF texture slot to an ossia texture_ref with source populated + // (filesystem path or embedded blob). The image may be external (URI), a + // buffer view into the main glTF buffer, or an inline array. + auto fill_tex = [&](ossia::texture_ref& tr, const fastgltf::TextureInfo& ti) { + if(ti.textureIndex >= asset.textures.size()) + return; + const auto& tex = asset.textures[ti.textureIndex]; + if(!tex.imageIndex.has_value()) + return; + const auto& img = asset.images[tex.imageIndex.value()]; + auto src = std::make_shared(); + std::visit( + [&](const auto& data) { + using T = std::decay_t; + if constexpr(std::is_same_v) + { + // Relative URI → join with the glTF file's parent dir. + auto p = dir / std::filesystem::path(std::string_view( + data.uri.path())); + src->file_path = p.lexically_normal().string(); + } + else if constexpr(std::is_same_v) + { + auto blob = std::make_shared>( + (const uint8_t*)data.bytes.data(), + (const uint8_t*)data.bytes.data() + data.bytes.size()); + src->embedded_data = blob; + src->mime_type = std::string(fastgltf::getMimeTypeString(data.mimeType)); + } + else if constexpr(std::is_same_v) + { + if(data.bufferViewIndex >= asset.bufferViews.size()) + return; + const auto& bv = asset.bufferViews[data.bufferViewIndex]; + if(bv.bufferIndex >= asset.buffers.size()) + return; + const auto& buf = asset.buffers[bv.bufferIndex]; + const auto* arr = std::get_if(&buf.data); + if(!arr) + return; + auto blob = std::make_shared>( + (const uint8_t*)arr->bytes.data() + bv.byteOffset, + (const uint8_t*)arr->bytes.data() + bv.byteOffset + bv.byteLength); + src->embedded_data = blob; + src->mime_type = std::string(fastgltf::getMimeTypeString(data.mimeType)); + } + // sources::Vector / sources::Fallback / sources::CustomBuffer not + // handled in v1 — most files use one of the three above. + }, + img.data); + + // Plan 09 S1: content-hash for cross-output / cross-reload decode + // dedup. Prefer hashing the embedded bytes — it's the decoded + // payload contents that matter, not the file path (two different + // files can embed the same JPEG). Fall back to hashing the path + // string when no embedded data (URI → we'll read the file on + // demand inside the preprocessor, hashing the path is a stable + // proxy for session-scope dedup). + if(src->embedded_data && !src->embedded_data->empty()) + { + src->content_hash = ossia::hash_bytes( + src->embedded_data->data(), src->embedded_data->size()); + } + else if(!src->file_path.empty()) + { + src->content_hash = ossia::hash_bytes( + src->file_path.data(), src->file_path.size()); + } + + tr.source = std::move(src); + tr.texcoord_set = uint32_t(ti.texCoordIndex); + + // KHR_texture_transform: per-texture-info UV transform. The + // extension overrides the texture-info texCoordIndex when set + // (spec) — honour that. Defaults are identity (offset=0, scale=1, + // rot=0), so leaving uv_transform at default for textures without + // the extension is correct. + if(ti.transform) + { + tr.uv_transform.offset[0] = float(ti.transform->uvOffset.x()); + tr.uv_transform.offset[1] = float(ti.transform->uvOffset.y()); + tr.uv_transform.scale[0] = float(ti.transform->uvScale.x()); + tr.uv_transform.scale[1] = float(ti.transform->uvScale.y()); + tr.uv_transform.rotation = float(ti.transform->rotation); + if(ti.transform->texCoordIndex.has_value()) + tr.texcoord_set = uint32_t(*ti.transform->texCoordIndex); + } + + // glTF per-texture sampler. Each texture optionally references a + // sampler index in `asset.samplers`. Default (when absent or + // unreferenced) is REPEAT/REPEAT/LINEAR/LINEAR/LINEAR_MIPMAP per + // glTF spec — which matches the texture_sampler_config defaults. + auto wrap_to_ossia = [](fastgltf::Wrap w) { + switch(w) + { + case fastgltf::Wrap::ClampToEdge: return ossia::CLAMP_TO_EDGE; + case fastgltf::Wrap::MirroredRepeat: return ossia::MIRROR; + case fastgltf::Wrap::Repeat: return ossia::REPEAT; + } + return ossia::REPEAT; + }; + auto filter_to_ossia = [](fastgltf::Filter f, ossia::texture_filter& base, + ossia::texture_filter& mip) { + // glTF combined min-filter encodes both the base filter and the + // mipmap mode (e.g. LinearMipMapNearest = LINEAR base + NEAREST + // mipmap). Decode both axes. + switch(f) + { + case fastgltf::Filter::Nearest: + base = ossia::NEAREST; mip = ossia::NONE; break; + case fastgltf::Filter::Linear: + base = ossia::LINEAR; mip = ossia::NONE; break; + case fastgltf::Filter::NearestMipMapNearest: + base = ossia::NEAREST; mip = ossia::NEAREST; break; + case fastgltf::Filter::LinearMipMapNearest: + base = ossia::LINEAR; mip = ossia::NEAREST; break; + case fastgltf::Filter::NearestMipMapLinear: + base = ossia::NEAREST; mip = ossia::LINEAR; break; + case fastgltf::Filter::LinearMipMapLinear: + base = ossia::LINEAR; mip = ossia::LINEAR; break; + } + }; + if(tex.samplerIndex.has_value() + && *tex.samplerIndex < asset.samplers.size()) + { + const auto& s = asset.samplers[*tex.samplerIndex]; + tr.sampler.wrap_s = wrap_to_ossia(s.wrapS); + tr.sampler.wrap_t = wrap_to_ossia(s.wrapT); + ossia::texture_filter mag_base = ossia::LINEAR, mag_mip = ossia::NONE; + ossia::texture_filter min_base = ossia::LINEAR, min_mip = ossia::LINEAR; + if(s.magFilter.has_value()) + filter_to_ossia(*s.magFilter, mag_base, mag_mip); + if(s.minFilter.has_value()) + filter_to_ossia(*s.minFilter, min_base, min_mip); + tr.sampler.mag_filter = mag_base; + tr.sampler.min_filter = min_base; + tr.sampler.mipmap_mode = min_mip; // mip mode comes from minFilter + } + }; + + if(m.pbrData.baseColorTexture) + fill_tex(mc->base_color_texture, *m.pbrData.baseColorTexture); + if(m.pbrData.metallicRoughnessTexture) + fill_tex(mc->metallic_roughness_texture, *m.pbrData.metallicRoughnessTexture); + if(m.normalTexture) + fill_tex(mc->normal_texture, *m.normalTexture); + if(m.occlusionTexture) + fill_tex(mc->occlusion_texture, *m.occlusionTexture); + if(m.emissiveTexture) + fill_tex(mc->emissive_texture, *m.emissiveTexture); + + // --- KHR material extensions ------------------------------------------ + // fastgltf parses every extension we've enabled in the Extensions mask + // at parse time (see loadFromFile() below). What was missing here is NOT + // the parse — fastgltf already gave us the fields — but the copy into + // ossia::material_component. Each KHR_* that sets material_component + // fields gets a matching block below. + // + // Each material. is a unique_ptr; nullptr means the file + // didn't declare that extension on this material. We leave the + // material_component sub-struct at its spec defaults (factor=0 / + // factor=1 depending on the field) in that case. + + // KHR_materials_ior — scalar IOR override; default 1.5 matches spec. + mc->ior = float(m.ior); + + // KHR_materials_clearcoat — second thin dielectric specular lobe. + if(m.clearcoat) + { + mc->clearcoat.factor = float(m.clearcoat->clearcoatFactor); + mc->clearcoat.roughness_factor = float(m.clearcoat->clearcoatRoughnessFactor); + if(m.clearcoat->clearcoatTexture) + fill_tex(mc->clearcoat.texture, *m.clearcoat->clearcoatTexture); + if(m.clearcoat->clearcoatRoughnessTexture) + fill_tex( + mc->clearcoat.roughness_texture, + *m.clearcoat->clearcoatRoughnessTexture); + if(m.clearcoat->clearcoatNormalTexture) + fill_tex( + mc->clearcoat.normal_texture, *m.clearcoat->clearcoatNormalTexture); + } + + // KHR_materials_sheen — fabric / velvet / brushed surfaces. + if(m.sheen) + { + mc->sheen.color_factor[0] = float(m.sheen->sheenColorFactor.x()); + mc->sheen.color_factor[1] = float(m.sheen->sheenColorFactor.y()); + mc->sheen.color_factor[2] = float(m.sheen->sheenColorFactor.z()); + mc->sheen.roughness_factor = float(m.sheen->sheenRoughnessFactor); + if(m.sheen->sheenColorTexture) + fill_tex(mc->sheen.color_texture, *m.sheen->sheenColorTexture); + if(m.sheen->sheenRoughnessTexture) + fill_tex(mc->sheen.roughness_texture, *m.sheen->sheenRoughnessTexture); + } + + // KHR_materials_transmission — thin-walled refraction weight. + if(m.transmission) + { + mc->transmission.factor = float(m.transmission->transmissionFactor); + if(m.transmission->transmissionTexture) + fill_tex(mc->transmission.texture, *m.transmission->transmissionTexture); + } + + // KHR_materials_volume — thick-walled absorption + attenuation. + if(m.volume) + { + mc->volume.thickness_factor = float(m.volume->thicknessFactor); + mc->volume.attenuation_distance = float(m.volume->attenuationDistance); + mc->volume.attenuation_color[0] = float(m.volume->attenuationColor.x()); + mc->volume.attenuation_color[1] = float(m.volume->attenuationColor.y()); + mc->volume.attenuation_color[2] = float(m.volume->attenuationColor.z()); + if(m.volume->thicknessTexture) + fill_tex(mc->volume.thickness_texture, *m.volume->thicknessTexture); + } + + // KHR_materials_specular — dielectric F0 override + tint. + if(m.specular) + { + mc->specular.factor = float(m.specular->specularFactor); + mc->specular.color_factor[0] = float(m.specular->specularColorFactor.x()); + mc->specular.color_factor[1] = float(m.specular->specularColorFactor.y()); + mc->specular.color_factor[2] = float(m.specular->specularColorFactor.z()); + if(m.specular->specularTexture) + fill_tex(mc->specular.texture, *m.specular->specularTexture); + if(m.specular->specularColorTexture) + fill_tex(mc->specular.color_texture, *m.specular->specularColorTexture); + } + + // KHR_materials_iridescence — thin-film interference. + if(m.iridescence) + { + mc->iridescence.factor = float(m.iridescence->iridescenceFactor); + mc->iridescence.ior = float(m.iridescence->iridescenceIor); + mc->iridescence.thickness_min + = float(m.iridescence->iridescenceThicknessMinimum); + mc->iridescence.thickness_max + = float(m.iridescence->iridescenceThicknessMaximum); + if(m.iridescence->iridescenceTexture) + fill_tex(mc->iridescence.texture, *m.iridescence->iridescenceTexture); + if(m.iridescence->iridescenceThicknessTexture) + fill_tex( + mc->iridescence.thickness_texture, + *m.iridescence->iridescenceThicknessTexture); + } + + // KHR_materials_anisotropy — directional specular stretch. + if(m.anisotropy) + { + mc->anisotropy.strength = float(m.anisotropy->anisotropyStrength); + mc->anisotropy.rotation = float(m.anisotropy->anisotropyRotation); + if(m.anisotropy->anisotropyTexture) + fill_tex(mc->anisotropy.texture, *m.anisotropy->anisotropyTexture); + } + + // KHR_materials_diffuse_transmission — translucent surfaces (paper, + // leaves, lampshades). + if(m.diffuseTransmission) + { + mc->diffuse_transmission.factor + = float(m.diffuseTransmission->diffuseTransmissionFactor); + mc->diffuse_transmission.color_factor[0] + = float(m.diffuseTransmission->diffuseTransmissionColorFactor.x()); + mc->diffuse_transmission.color_factor[1] + = float(m.diffuseTransmission->diffuseTransmissionColorFactor.y()); + mc->diffuse_transmission.color_factor[2] + = float(m.diffuseTransmission->diffuseTransmissionColorFactor.z()); + if(m.diffuseTransmission->diffuseTransmissionTexture) + fill_tex( + mc->diffuse_transmission.texture, + *m.diffuseTransmission->diffuseTransmissionTexture); + if(m.diffuseTransmission->diffuseTransmissionColorTexture) + fill_tex( + mc->diffuse_transmission.color_texture, + *m.diffuseTransmission->diffuseTransmissionColorTexture); + } + + return mc; +} + +// Translate a glTF Light (KHR_lights_punctual) to ossia::light_component. +static std::shared_ptr to_light(const fastgltf::Light& l) +{ + auto lc = std::make_shared(); + switch(l.type) + { + case fastgltf::LightType::Directional: + lc->type = ossia::light_type::directional; break; + case fastgltf::LightType::Point: + lc->type = ossia::light_type::point; break; + case fastgltf::LightType::Spot: + lc->type = ossia::light_type::spot; break; + } + lc->color[0] = float(l.color[0]); + lc->color[1] = float(l.color[1]); + lc->color[2] = float(l.color[2]); + lc->intensity = float(l.intensity); + lc->range = l.range.value_or(0.f); + lc->inner_cone_angle = float(l.innerConeAngle.value_or(0.f)); + lc->outer_cone_angle = float(l.outerConeAngle.value_or(float(M_PI) / 4.f)); + lc->decay = ossia::light_decay::quadratic; + return lc; +} + +// Translate a glTF Camera. +static std::shared_ptr to_camera(const fastgltf::Camera& c) +{ + auto cc = std::make_shared(); + if(const auto* p = std::get_if(&c.camera)) + { + cc->projection = ossia::camera_projection::perspective; + cc->yfov = float(p->yfov); + cc->aspect_ratio = p->aspectRatio.value_or(1.f); + cc->znear = float(p->znear); + cc->zfar = float(p->zfar.value_or(1000.f)); + } + else if(const auto* o = std::get_if(&c.camera)) + { + cc->projection = ossia::camera_projection::orthographic; + cc->xmag = float(o->xmag); + cc->ymag = float(o->ymag); + cc->znear = float(o->znear); + cc->zfar = float(o->zfar); + } + return cc; +} + +// Pull one accessor into a float vector. `components` is the number of +// floats per element (1/2/3/4). fastgltf's iterator handles all component +// types (byte/short/int/float) with automatic widening to float. +template +static std::shared_ptr> read_float_accessor( + const fastgltf::Asset& asset, const fastgltf::Accessor& acc) +{ + auto out = std::make_shared>(acc.count * Components); + float* dst = out->data(); + if constexpr(Components == 2) + { + fastgltf::iterateAccessor( + asset, acc, [&](fastgltf::math::fvec2 v) { + dst[0] = v.x(); dst[1] = v.y(); dst += 2; + }); + } + else if constexpr(Components == 3) + { + fastgltf::iterateAccessor( + asset, acc, [&](fastgltf::math::fvec3 v) { + dst[0] = v.x(); dst[1] = v.y(); dst[2] = v.z(); dst += 3; + }); + } + else if constexpr(Components == 4) + { + fastgltf::iterateAccessor( + asset, acc, [&](fastgltf::math::fvec4 v) { + dst[0] = v.x(); dst[1] = v.y(); dst[2] = v.z(); dst[3] = v.w(); dst += 4; + }); + } + return out; +} + +// Pull indices (whatever the glTF component type) into a flat uint32 buffer. +static std::shared_ptr> read_indices( + const fastgltf::Asset& asset, const fastgltf::Accessor& acc) +{ + auto out = std::make_shared>(acc.count); + uint32_t* dst = out->data(); + fastgltf::iterateAccessor( + asset, acc, [&](std::uint32_t v) { *dst++ = v; }); + return out; +} + +// Pull POSITION, NORMAL, TEXCOORD_0, COLOR_0, TANGENT for a primitive into a +// ScenePart. Missing attributes leave the matching shared_ptr empty. +static GltfParser::ScenePart extract_primitive( + const fastgltf::Asset& asset, const fastgltf::Primitive& prim, + const std::vector& material_index_remap) +{ + GltfParser::ScenePart sp; + + auto get_accessor + = [&](std::string_view name) -> const fastgltf::Accessor* { + for(const auto& a : prim.attributes) + if(a.name == name) + return &asset.accessors[a.accessorIndex]; + return nullptr; + }; + + if(auto* a = get_accessor("POSITION")) + { + sp.vertex_count = uint32_t(a->count); + sp.positions = read_float_accessor<3>(asset, *a); + // Local-space AABB. glTF requires min/max on the POSITION accessor, + // but rather than chase fastgltf's accessor-specific variant API we + // just walk the decoded float stream — same cost as one extra pass + // on load (negligible compared to asset I/O), and trivially uniform + // with the FBX / procedural code paths. + if(sp.positions && !sp.positions->empty()) + sp.bounds = ossia::compute_aabb_from_positions( + sp.positions->data(), sp.vertex_count); + } + if(auto* a = get_accessor("NORMAL")) + sp.normals = read_float_accessor<3>(asset, *a); + if(auto* a = get_accessor("TEXCOORD_0")) + sp.texcoords = read_float_accessor<2>(asset, *a); + if(auto* a = get_accessor("TEXCOORD_1")) + sp.texcoords1 = read_float_accessor<2>(asset, *a); + if(auto* a = get_accessor("COLOR_0")) + { + // COLOR_0 may be vec3 or vec4 — peek at component count. + if(a->type == fastgltf::AccessorType::Vec4) + sp.colors = read_float_accessor<4>(asset, *a); + else if(a->type == fastgltf::AccessorType::Vec3) + { + // Pad to RGBA. + auto rgb = read_float_accessor<3>(asset, *a); + auto rgba = std::make_shared>(a->count * 4); + for(std::size_t i = 0; i < a->count; ++i) + { + (*rgba)[i * 4 + 0] = (*rgb)[i * 3 + 0]; + (*rgba)[i * 4 + 1] = (*rgb)[i * 3 + 1]; + (*rgba)[i * 4 + 2] = (*rgb)[i * 3 + 2]; + (*rgba)[i * 4 + 3] = 1.f; + } + sp.colors = std::move(rgba); + } + } + if(auto* a = get_accessor("TANGENT")) + sp.tangents = read_float_accessor<4>(asset, *a); + + // Skinning attributes. glTF spec stores JOINTS_0 as UNSIGNED_BYTE or + // UNSIGNED_SHORT vec4 — widen to uint32 here so the vertex shader can + // bind a uniform uvec4 format regardless of source file. WEIGHTS_0 is + // always float vec4 per glTF normative spec. + if(auto* a = get_accessor("JOINTS_0")) + { + auto joints = std::make_shared>(a->count * 4); + uint32_t* dst = joints->data(); + fastgltf::iterateAccessor( + asset, *a, [&](fastgltf::math::u16vec4 v) { + *dst++ = uint32_t(v[0]); + *dst++ = uint32_t(v[1]); + *dst++ = uint32_t(v[2]); + *dst++ = uint32_t(v[3]); + }); + sp.joints0 = std::move(joints); + } + if(auto* a = get_accessor("WEIGHTS_0")) + sp.weights0 = read_float_accessor<4>(asset, *a); + + if(prim.indicesAccessor.has_value()) + { + const auto& ia = asset.accessors[*prim.indicesAccessor]; + sp.indices = read_indices(asset, ia); + sp.index_count = uint32_t(ia.count); + } + + if(prim.materialIndex.has_value()) + { + const std::size_t gltf_idx = *prim.materialIndex; + if(gltf_idx < material_index_remap.size()) + sp.material_index = material_index_remap[gltf_idx]; + } + + // KHR_materials_variants mapping. fastgltf stores it pre-indexed by + // variant index → Optional. Translate to our + // remapped material indices with -1 for "no override". + if(!prim.mappings.empty()) + { + sp.variant_material_indices.resize(prim.mappings.size(), -1); + for(std::size_t v = 0; v < prim.mappings.size(); ++v) + { + if(prim.mappings[v].has_value()) + { + const std::size_t mi = *prim.mappings[v]; + if(mi < material_index_remap.size()) + sp.variant_material_indices[v] = material_index_remap[mi]; + } + } + } + + // Generate tangents via mikktspace when the glTF mesh didn't ship + // them. Required for normal-mapped PBR: the fragment shader rebuilds + // the TBN basis from (normal, tangent.xyz, cross(normal, tangent.xyz) * + // tangent.w) before unpacking the sampled normal. Skipped when any + // prerequisite stream is missing (no UVs → no normal mapping anyway). + if(!sp.tangents && sp.positions && sp.normals && sp.texcoords) + { + sp.tangents = Threedim::generate_tangents_mikktspace( + sp.positions, sp.normals, sp.texcoords, sp.indices, + sp.vertex_count); + } + return sp; +} + +// Convert a ScenePart to mesh_primitive (mirrors FbxParser::part_to_primitive +// but with index-buffer support — glTF exposes indexed meshes). +static ossia::buffer_resource_ptr make_buffer_resource_f( + std::shared_ptr> floats) +{ + if(!floats || floats->empty()) + return {}; + auto br = std::make_shared(); + ossia::buffer_data bd; + bd.data = std::shared_ptr(floats, floats->data()); + bd.byte_size = int64_t(floats->size() * sizeof(float)); + bd.usage_hint = ossia::buffer_data::usage::vertex_buffer; + br->resource = std::move(bd); + br->dirty_index = 1; + return br; +} +static ossia::buffer_resource_ptr make_buffer_resource_u32( + std::shared_ptr> ints) +{ + if(!ints || ints->empty()) + return {}; + auto br = std::make_shared(); + ossia::buffer_data bd; + bd.data = std::shared_ptr(ints, ints->data()); + bd.byte_size = int64_t(ints->size() * sizeof(uint32_t)); + bd.usage_hint = ossia::buffer_data::usage::index_buffer; + br->resource = std::move(bd); + br->dirty_index = 1; + return br; +} + +static ossia::mesh_primitive part_to_primitive( + const GltfParser::ScenePart& p, + const std::vector>& mats) +{ + ossia::mesh_primitive mp; + // Per-primitive id — not deterministic across reloads (part_to_primitive + // is called from the scene walk where the source asset path isn't + // threaded in), so mint a fresh id. Sessions with the same model file + // reloaded will see different ids, which is acceptable: the preprocessor + // rebuilds on material/mesh fingerprint changes anyway, and stable-id + // stability is only critical for the material / transform fingerprints + // which ARE deterministic via the file-path hash. + mp.stable_id = ossia::mint_stable_id(); + mp.topology = ossia::primitive_topology::triangles; + mp.index_type = p.indices ? ossia::index_format::uint32 : ossia::index_format::none; + mp.vertex_count = p.vertex_count; + mp.index_count = p.index_count; + mp.first_vertex = 0; + mp.first_index = 0; + mp.vertex_offset = 0; + mp.bounds = p.bounds; + if(p.material_index >= 0 + && std::size_t(p.material_index) < mats.size()) + mp.material = mats[p.material_index]; + + // KHR_materials_variants: per-variant material override. Index V + // → null = "use default", else the variant's material_component_ptr. + if(!p.variant_material_indices.empty()) + { + mp.material_variants.resize(p.variant_material_indices.size()); + for(std::size_t v = 0; v < p.variant_material_indices.size(); ++v) + { + const int mi = p.variant_material_indices[v]; + if(mi >= 0 && std::size_t(mi) < mats.size()) + mp.material_variants[v] + = ossia::material_component_ptr(mats[mi]); + } + } + + uint32_t bi = 0; + auto add = [&](auto buf, int floats_per_vertex, + ossia::attribute_semantic sem, ossia::vertex_format fmt) { + if(!buf || buf->empty()) + return; + mp.vertex_buffers.push_back(make_buffer_resource_f(buf)); + ossia::vertex_attribute attr; + attr.semantic = sem; + attr.format = fmt; + attr.buffer_index = bi++; + attr.byte_offset = 0; + attr.byte_stride = uint32_t(floats_per_vertex) * sizeof(float); + attr.rate = ossia::vertex_attribute::input_rate::per_vertex; + mp.attributes.push_back(attr); + }; + + add(p.positions, 3, ossia::attribute_semantic::position, ossia::vertex_format::float3); + add(p.normals, 3, ossia::attribute_semantic::normal, ossia::vertex_format::float3); + add(p.texcoords, 2, ossia::attribute_semantic::texcoord0, ossia::vertex_format::float2); + add(p.texcoords1, 2, ossia::attribute_semantic::texcoord1, ossia::vertex_format::float2); + add(p.colors, 4, ossia::attribute_semantic::color0, ossia::vertex_format::float4); + add(p.tangents, 4, ossia::attribute_semantic::tangent, ossia::vertex_format::float4); + + // Skinning attributes — uvec4 joints + vec4 weights, one set per vertex. + if(p.joints0) + { + auto br = std::make_shared(); + ossia::buffer_data bd; + bd.data = std::shared_ptr(p.joints0, p.joints0->data()); + bd.byte_size = int64_t(p.joints0->size() * sizeof(uint32_t)); + bd.usage_hint = ossia::buffer_data::usage::vertex_buffer; + br->resource = std::move(bd); + br->dirty_index = 1; + mp.vertex_buffers.push_back(std::move(br)); + ossia::vertex_attribute attr; + attr.semantic = ossia::attribute_semantic::joints0; + attr.format = ossia::vertex_format::uint32x4; + attr.buffer_index = bi++; + attr.byte_offset = 0; + attr.byte_stride = 4 * sizeof(uint32_t); + attr.rate = ossia::vertex_attribute::input_rate::per_vertex; + mp.attributes.push_back(attr); + } + add(p.weights0, 4, ossia::attribute_semantic::weights0, ossia::vertex_format::float4); + + if(p.indices) + mp.index_buffer = make_buffer_resource_u32(p.indices); + + return mp; +} + +// Walk glTF scene hierarchy into FbxParser::SceneNode-like flat array. +// Returns the index of the emitted root-most parent for the given glTF node +// index, or -1 if unused. +static int emit_node( + const fastgltf::Asset& asset, std::size_t nodeIdx, int parent_index, + std::vector& out, + const std::vector& material_index_remap) +{ + const auto& n = asset.nodes[nodeIdx]; + + GltfParser::SceneNode sn; + sn.name = std::string(n.name); + sn.parent_index = parent_index; + sn.local_transform = to_transform(n); + // Stable ID = glTF node index + 1 (0 is the "unset" sentinel). Lets + // AnimationPlayer and skeleton_component::joint_node_ids resolve + // scene_node_id back to the emitted node. + sn.stable_id = std::uint64_t(nodeIdx) + 1; + + // glTF skin association — when the node references a skin, stamp its + // index so the downstream mesh_component inherits it. + if(n.skinIndex.has_value()) + sn.skin_index = int32_t(*n.skinIndex); + + if(n.meshIndex.has_value()) + { + const auto& mesh = asset.meshes[*n.meshIndex]; + sn.parts.reserve(mesh.primitives.size()); + for(const auto& prim : mesh.primitives) + { + auto sp = extract_primitive(asset, prim, material_index_remap); + if(sp.vertex_count > 0) + sn.parts.push_back(std::move(sp)); + } + } + if(n.lightIndex.has_value() && *n.lightIndex < asset.lights.size()) + sn.light = to_light(asset.lights[*n.lightIndex]); + if(n.cameraIndex.has_value() && *n.cameraIndex < asset.cameras.size()) + sn.camera = to_camera(asset.cameras[*n.cameraIndex]); + + const int self = (int)out.size(); + out.push_back(std::move(sn)); + for(std::size_t ci : asset.nodes[nodeIdx].children) + emit_node(asset, ci, self, out, material_index_remap); + return self; +} + +} // namespace + +// ============================================================================= +// rebuild_scene — same pattern as FbxParser::rebuild_scene. +// ============================================================================= +void GltfParser::rebuild_scene() +{ + if(m_scene_nodes.empty()) + return; + + const std::size_t N = m_scene_nodes.size(); + std::vector> nodes(N); + std::vector>> children(N); + for(std::size_t i = 0; i < N; ++i) + { + nodes[i] = std::make_shared(); + nodes[i]->name = m_scene_nodes[i].name; + nodes[i]->visible = true; + nodes[i]->id.value = m_scene_nodes[i].stable_id; + children[i] = std::make_shared>(); + } + for(std::size_t i = 0; i < N; ++i) + { + auto& src = m_scene_nodes[i]; + auto& lst = *children[i]; + lst.push_back(src.local_transform); + if(!src.parts.empty()) + { + auto mc = std::make_shared(); + mc->primitives.reserve(src.parts.size()); + for(const auto& p : src.parts) + mc->primitives.push_back(part_to_primitive(p, m_materials)); + // Direct skeleton pointer (glTF node.skin index → m_skeletons). + if(src.skin_index >= 0 + && std::size_t(src.skin_index) < m_skeletons.size()) + mc->skin = ossia::skeleton_component_ptr(m_skeletons[src.skin_index]); + mc->dirty_index = 1; + lst.push_back(ossia::mesh_component_ptr(std::move(mc))); + } + if(src.light) + lst.push_back(ossia::light_component_ptr(src.light)); + if(src.camera) + lst.push_back(ossia::camera_component_ptr(src.camera)); + } + for(std::size_t i = 0; i < N; ++i) + { + int p = m_scene_nodes[i].parent_index; + if(p >= 0 && p < (int)N) + children[p]->push_back(ossia::scene_node_ptr(nodes[i])); + } + for(std::size_t i = 0; i < N; ++i) + nodes[i]->children = children[i]; + + auto roots = std::make_shared>(); + for(std::size_t i = 0; i < N; ++i) + if(m_scene_nodes[i].parent_index < 0) + roots->push_back(ossia::scene_node_ptr(nodes[i])); + + auto mat_list = std::make_shared>(); + mat_list->reserve(m_materials.size()); + for(auto& m : m_materials) + mat_list->push_back(ossia::material_component_ptr(m)); + + auto state = std::make_shared(); + state->roots = std::move(roots); + state->materials = std::move(mat_list); + if(!m_skeletons.empty()) + { + auto skel_list + = std::make_shared>(); + skel_list->reserve(m_skeletons.size()); + for(auto& s : m_skeletons) + skel_list->push_back(ossia::skeleton_component_ptr(s)); + state->skeletons = std::move(skel_list); + } + state->version = 1; + state->dirty_index = 1; + + // Expose asset-scope variant names for UI / controls. active_variant + // starts at -1 (use each primitive's default material). + if(!m_variant_names.empty()) + { + state->variant_names.assign( + m_variant_names.begin(), m_variant_names.end()); + state->active_variant_index = -1; + } + + // AssetLoader wraps m_raw_state in a TRS payload externally; we + // publish only the raw scene here. + m_raw_state = std::move(state); +} + +std::function GltfParser::ins::gltf_t::process(file_type tv) +{ + if(tv.filename.empty()) + return {}; + + const std::filesystem::path path(tv.filename); + if(!std::filesystem::exists(path)) + return {}; + + // Enable every extension we can usefully translate. Unknown required + // extensions make fastgltf refuse the file; we intentionally enable more + // than we consume to avoid that (data we don't translate is ignored). + constexpr auto extensions = + fastgltf::Extensions::KHR_mesh_quantization + | fastgltf::Extensions::KHR_texture_transform + | fastgltf::Extensions::KHR_lights_punctual + | fastgltf::Extensions::KHR_materials_emissive_strength + | fastgltf::Extensions::KHR_materials_unlit + | fastgltf::Extensions::KHR_materials_ior + | fastgltf::Extensions::KHR_materials_specular + | fastgltf::Extensions::KHR_materials_transmission + | fastgltf::Extensions::KHR_materials_volume + | fastgltf::Extensions::KHR_materials_clearcoat + | fastgltf::Extensions::KHR_materials_sheen + | fastgltf::Extensions::KHR_materials_iridescence + | fastgltf::Extensions::KHR_materials_anisotropy + | fastgltf::Extensions::KHR_materials_diffuse_transmission + | fastgltf::Extensions::KHR_materials_variants; + + fastgltf::Parser parser(extensions); + + constexpr auto gltfOptions + = fastgltf::Options::DontRequireValidAssetMember + | fastgltf::Options::AllowDouble + | fastgltf::Options::LoadExternalBuffers + | fastgltf::Options::LoadExternalImages + | fastgltf::Options::GenerateMeshIndices + | fastgltf::Options::DecomposeNodeMatrices; + + auto gltfFile = fastgltf::GltfDataBuffer::FromPath(path); + if(!bool(gltfFile)) + return {}; + + auto assetE = parser.loadGltf( + gltfFile.get(), path.parent_path(), gltfOptions); + if(assetE.error() != fastgltf::Error::None) + return {}; + fastgltf::Asset asset = std::move(assetE.get()); + + // Materials first so primitives can remap their material indices. + std::vector> materials; + std::vector material_index_remap(asset.materials.size(), -1); + for(std::size_t i = 0; i < asset.materials.size(); ++i) + { + material_index_remap[i] = (int)materials.size(); + auto mat = to_material(asset, asset.materials[i], path.parent_path()); + // Deterministic id keyed on (asset path, "mat", index) — re-reads of the + // same asset file give the same material their same stable_id, so + // downstream caches survive asset reloads. + mat->stable_id = ossia::scene_node_id::from_parent( + ossia::scene_node_id::from_path(path.string()), + std::string("mat/") + std::to_string(i)).value; + materials.push_back(std::move(mat)); + } + + // Scene — walk the default scene's roots. glTF allows multiple scenes but + // only one is "active"; pick asset.defaultScene or the first. + std::vector scene_nodes; + const std::size_t sceneIdx + = asset.defaultScene.value_or(asset.scenes.empty() ? 0 : 0); + if(sceneIdx < asset.scenes.size()) + { + for(std::size_t rootIdx : asset.scenes[sceneIdx].nodeIndices) + emit_node(asset, rootIdx, -1, scene_nodes, material_index_remap); + } + + if(scene_nodes.empty()) + return {}; + + // Skins — parse joint node list + inverse-bind matrices per skin. + // Joint transforms themselves live on the scene_node's local_transform + // (set during emit_node). AnimationPlayer consumes this skeleton data + // to produce per-frame world-space joint matrices. + std::vector> skeletons; + skeletons.reserve(asset.skins.size()); + for(const auto& sk : asset.skins) + { + auto skel = std::make_shared(); + // Inverse-bind matrices are optional in glTF; default is identity. + std::vector ibms; + if(sk.inverseBindMatrices.has_value()) + { + const auto& ibmAcc = asset.accessors[*sk.inverseBindMatrices]; + ibms.resize(ibmAcc.count * 16); + std::size_t i = 0; + fastgltf::iterateAccessor( + asset, ibmAcc, [&](fastgltf::math::fmat4x4 m) { + for(int c = 0; c < 4; ++c) + for(int r = 0; r < 4; ++r) + ibms[i++] = m[c][r]; + }); + } + skel->joints.reserve(sk.joints.size()); + skel->joint_node_ids.reserve(sk.joints.size()); + for(std::size_t j = 0; j < sk.joints.size(); ++j) + { + ossia::skeleton_joint sj; + const auto nodeIdx = sk.joints[j]; + if(nodeIdx < asset.nodes.size()) + sj.name = std::string(asset.nodes[nodeIdx].name); + sj.parent_index = -1; // resolved from node hierarchy at use-time + if(j * 16 + 15 < ibms.size()) + std::memcpy( + sj.inverse_bind_matrix, ibms.data() + j * 16, + sizeof(float) * 16); + skel->joints.push_back(std::move(sj)); + // Stable node_id derived from the glTF node index (+1 because 0 + // means "unset" per scene_node_id convention). Matches the IDs + // assigned to emitted scene_nodes in rebuild_scene below. + ossia::scene_node_id nid; + nid.value = std::uint64_t(nodeIdx) + 1; + skel->joint_node_ids.push_back(nid); + } + skel->dirty_index = 1; + skeletons.push_back(std::move(skel)); + } + + // KHR_materials_variants: asset-scope variant name list. Carried + // alongside m_materials/skeletons into the parser so rebuild_scene + // can copy it into scene_state. Capture the asset's materialVariants + // by value so the lambda doesn't depend on the asset's lifetime. + std::vector variant_names( + asset.materialVariants.begin(), asset.materialVariants.end()); + + return [scene_nodes = std::move(scene_nodes), + materials = std::move(materials), + skeletons = std::move(skeletons), + variant_names = std::move(variant_names)](GltfParser& o) mutable { + std::swap(o.m_scene_nodes, scene_nodes); + std::swap(o.m_materials, materials); + std::swap(o.m_skeletons, skeletons); + std::swap(o.m_variant_names, variant_names); + o.rebuild_scene(); + }; +} + +} // namespace Threedim diff --git a/src/plugins/score-plugin-threedim/Threedim/GltfParser.hpp b/src/plugins/score-plugin-threedim/Threedim/GltfParser.hpp new file mode 100644 index 0000000000..580e7e92fb --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/GltfParser.hpp @@ -0,0 +1,96 @@ +#pragma once +#include + +#include + +#include +#include +#include + +namespace Threedim +{ + +// Internal glTF 2.0 parsing class — uses fastgltf + simdjson to parse +// .gltf / .glb. Not a halp node itself; AssetLoader is the user-facing +// entry point. AssetLoader calls the static `ins::gltf_t::process` to +// obtain an apply-lambda, applies it against a throwaway GltfParser +// instance, then copies out `m_raw_state`. +class GltfParser +{ +public: + struct ins + { + struct gltf_t : halp::file_port<"glTF file"> + { + static std::function process(file_type data); + } gltf; + } inputs; + + void rebuild_scene(); + + // Rich scene staging. Same schema as FbxParser (kept in sync so a future + // shared helper can consume both). + struct ScenePart + { + std::shared_ptr> positions; + std::shared_ptr> normals; + std::shared_ptr> texcoords; + std::shared_ptr> texcoords1; // glTF TEXCOORD_1 + std::shared_ptr> colors; + std::shared_ptr> tangents; + // Skinning attributes (present when the primitive references a skin). + // joints: uvec4 bone indices packed as uint32 x 4 per vertex. + // weights: vec4 bone weights per vertex. + std::shared_ptr> joints0; + std::shared_ptr> weights0; + std::shared_ptr> indices; // optional + uint32_t vertex_count{0}; + uint32_t index_count{0}; + int material_index{-1}; + // Local-space AABB over the POSITION stream. Populated by + // extract_primitive from the glTF POSITION accessor's min/max when + // present (spec-required but optionally trusted); otherwise derived + // by walking positions. Empty aabb = "not yet computed"; downstream + // GPU culling treats empty as infinite (never cull). + ossia::aabb bounds{}; + // KHR_materials_variants: per-variant material override index. + // Indexed by variant (parallel to scene_state::variant_names). + // -1 at a position = "no override for this variant, use default". + std::vector variant_material_indices; + }; + + struct SceneNode + { + std::string name; + ossia::scene_transform local_transform; + int parent_index{-1}; + std::vector parts; + std::shared_ptr light; + std::shared_ptr camera; + // glTF skin index. -1 = not skinned. When ≥ 0, the mesh_component + // emitted from this node's parts gets stamped with skin_index so + // ScenePreprocessor binds the matching skeleton's joint_matrices + // auxiliary buffer for the skinning vertex shader to read. + int32_t skin_index{-1}; + // Stable node_id, derived from the glTF node index + 1. Used by + // AnimationPlayer to find the node via channel.target_node_id, and + // by skeleton_component::joint_node_ids to resolve each joint to + // its node's world transform. + std::uint64_t stable_id{0}; + }; + + std::vector m_scene_nodes; + std::vector> m_materials; + std::vector> m_skeletons; + // KHR_materials_variants: names (UI-facing) declared at asset scope. + // Parallel to mesh_primitive::material_variants and + // scene_state::active_variant_index. + std::vector m_variant_names; + + // Rich scene state emitted by rebuild_scene — full hierarchy with + // materials, lights, cameras, skeletons. AssetLoader consumes this + // via the apply-lambda returned by ins::gltf_t::process. + std::shared_ptr m_raw_state; +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/HumanoidPose.hpp b/src/plugins/score-plugin-threedim/Threedim/HumanoidPose.hpp new file mode 100644 index 0000000000..9c1570c822 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/HumanoidPose.hpp @@ -0,0 +1,183 @@ +#pragma once + +// humanoid_pose — canonical intermediate for live mocap → rigged-character +// retargeting. One fixed set of 22 bones that every source adapter +// (PoseKeypointsToHumanoid, TrackedBonesToHumanoid, future Kinect / Xsens +// adapters) populates, and that HumanoidRetarget consumes. +// +// Design notes: +// - Flows through halp's structured-value port machinery, matching the +// existing DetectedPose pattern from score-addon-onnx. No new libossia +// port type. +// - Rotations are local parent-relative quaternions (x, y, z, w). Adapters +// responsible for converting their source's native representation +// (bone-direction vectors, world-space trackers, etc.) into this shape. +// - `validity` is 0..1 — a per-bone confidence that downstream can use +// to skip updates on unreliable landmarks (BlazePose visibility, +// tracker occlusion, etc.). 1.0 = fully trusted; 0.0 = drop / freeze +// at previous rotation. +// - `hip_position` is the only world-space translation that flows +// through; every other bone's position is implied by target rig +// proportions. Used only when HumanoidRetarget's root-motion toggle +// is on. + +#include + +#include +#include +#include + +namespace Threedim +{ + +// Canonical bone set. Indexed access via the enum; iterate with +// humanoid_bone_index::Count. Order is stable — adapters and retargeter +// presets both depend on it. +enum class humanoid_bone_index : uint8_t +{ + Hips = 0, + Spine, + Chest, + Neck, + Head, + + LeftShoulder, + LeftUpperArm, + LeftLowerArm, + LeftHand, + + RightShoulder, + RightUpperArm, + RightLowerArm, + RightHand, + + LeftUpperLeg, + LeftLowerLeg, + LeftFoot, + LeftToes, + + RightUpperLeg, + RightLowerLeg, + RightFoot, + RightToes, + + Count +}; + +// Per-bone pose. 20-byte halp-structured record (5 floats). +struct humanoid_bone +{ + // Parent-relative rotation quaternion, (x, y, z, w). Identity = {0,0,0,1}. + float qx{0.f}; + float qy{0.f}; + float qz{0.f}; + float qw{1.f}; + + // 0..1 confidence. 0 means "no reliable data for this bone, retargeter + // should ignore this frame for this bone". 1 = fully trusted. + float validity{1.f}; + + halp_field_names(qx, qy, qz, qw, validity); +}; + +// Fixed-size bone array — std::array plays nicely with halp serialization +// (same way DetectedPose uses std::vector, except the size is known and +// we can index by enum without a lookup). +struct humanoid_pose +{ + std::array bones{}; + + // World-space translation of the hip (Hips) root. Only consumed when + // root-motion is enabled on HumanoidRetarget; otherwise ignored. + float hip_x{0.f}; + float hip_y{0.f}; + float hip_z{0.f}; + + // Wall-clock frame counter. Increments on every adapter emit. Used by + // consumers for dirty tracking (skip work when version hasn't advanced). + int64_t version{0}; + + // Convenience: access a bone by enum. + humanoid_bone& operator[](humanoid_bone_index b) noexcept + { + return bones[std::size_t(b)]; + } + const humanoid_bone& operator[](humanoid_bone_index b) const noexcept + { + return bones[std::size_t(b)]; + } + + halp_field_names(bones, hip_x, hip_y, hip_z, version); +}; + +// ============================================================================= +// Keypoint ingestion type — structurally compatible with the DetectedPose +// struct from score-addon-onnx (same field names, same layout) so halp's +// field-name-based port marshalling can carry a DetectedPose through a +// port typed as keypoint_stream without cross-addon header dependency. +// +// Kept in Threedim deliberately: HumanoidRetarget consumes it, but we +// don't want score-plugin-threedim to link against score-addon-onnx. +// ============================================================================= +struct keypoint_3d +{ + float x{0.f}; + float y{0.f}; + float z{0.f}; + float confidence{0.f}; + + halp_field_names(x, y, z, confidence); +}; + +struct keypoint_stream +{ + std::vector keypoints; + float mean_confidence{0.f}; + + halp_field_names(keypoints, mean_confidence); +}; + +// ============================================================================= +// Tracker bundle — 6 slots matching a common VR / optical-mocap full-body +// layout (head + hips + 2 hands + 2 feet). Each slot carries a world-space +// position, a world-space quaternion, and a per-tracker validity so lost +// tracking (tracker occluded / battery dead) can gracefully skip instead +// of slamming the character to the origin. +// +// Additional tracker layouts (10-point Vive Full-Body, Xsens 17-IMU, +// OptiTrack marker sets) can be added as additional bundle_N struct types +// in future passes. v1 covers the most common consumer setup; users with +// richer rigs can still drive the 6 slots from the subset they trust. +// ============================================================================= +struct tracker_pose +{ + // World-space translation. + float x{0.f}; + float y{0.f}; + float z{0.f}; + + // World-space quaternion (x, y, z, w). Identity = {0, 0, 0, 1}. + float qx{0.f}; + float qy{0.f}; + float qz{0.f}; + float qw{1.f}; + + // 0..1 tracking confidence. 0 = "tracker offline, ignore this frame". + float validity{0.f}; + + halp_field_names(x, y, z, qx, qy, qz, qw, validity); +}; + +struct tracker_bundle_6 +{ + tracker_pose head; + tracker_pose hips; + tracker_pose left_hand; + tracker_pose right_hand; + tracker_pose left_foot; + tracker_pose right_foot; + + halp_field_names(head, hips, left_hand, right_hand, left_foot, right_foot); +}; + +} // namespace Threedim diff --git a/src/plugins/score-plugin-threedim/Threedim/HumanoidPresets.hpp b/src/plugins/score-plugin-threedim/Threedim/HumanoidPresets.hpp new file mode 100644 index 0000000000..f22eeaf09a --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/HumanoidPresets.hpp @@ -0,0 +1,157 @@ +#pragma once + +// Target rig presets for HumanoidRetarget: compile-time tables mapping +// canonical humanoid_bone_index → the target skeleton's joint name for +// three common conventions: +// +// - Mixamo (mixamorig:*) — ubiquitous for indie / live / education +// - VRM — VTubing standard; Ready Player Me derivatives all use this +// spec's bone names (the VRM humanoid bone list) +// - Unreal Mannequin — game-dev convention; also matches the +// output of many BVH-to-FBX converters and most "clean" glTF rigs +// +// Tables are std::array, compile-time, zero-overhead. +// If an entry is empty the target rig doesn't have a corresponding bone +// and HumanoidRetarget will silently skip it (e.g. Mixamo has no explicit +// Toes bone so LeftToes / RightToes are empty). +// +// Not user-editable by design (see project_decisions.md): if a rig doesn't +// fit these three presets, add a fourth preset in code rather than +// exposing a JSON text-input escape hatch that tends to silently half-work. + +#include + +#include +#include + +namespace Threedim +{ + +using HumanoidBoneMap = std::array< + std::string_view, + std::size_t(humanoid_bone_index::Count)>; + +enum class HumanoidRigPreset : uint8_t +{ + Mixamo = 0, + VRM, + UnrealMannequin, + Count +}; + +// Mixamo — "mixamorig:" prefix, title-cased component names. +// Spine / Spine1 / Spine2 are three bones; we map the canonical +// Spine→Spine, Chest→Spine1, (no UpperChest) and Neck/Head directly. +// Mixamo has no explicit Toes bones; we map to *ToeBase which is the +// closest equivalent (foot → toe-base is enough for live retargeting). +inline constexpr HumanoidBoneMap kMixamoBoneMap = { + "mixamorig:Hips", // Hips + "mixamorig:Spine", // Spine + "mixamorig:Spine1", // Chest + "mixamorig:Neck", // Neck + "mixamorig:Head", // Head + + "mixamorig:LeftShoulder", // LeftShoulder + "mixamorig:LeftArm", // LeftUpperArm + "mixamorig:LeftForeArm", // LeftLowerArm + "mixamorig:LeftHand", // LeftHand + + "mixamorig:RightShoulder", // RightShoulder + "mixamorig:RightArm", // RightUpperArm + "mixamorig:RightForeArm", // RightLowerArm + "mixamorig:RightHand", // RightHand + + "mixamorig:LeftUpLeg", // LeftUpperLeg + "mixamorig:LeftLeg", // LeftLowerLeg + "mixamorig:LeftFoot", // LeftFoot + "mixamorig:LeftToeBase", // LeftToes + + "mixamorig:RightUpLeg", // RightUpperLeg + "mixamorig:RightLeg", // RightLowerLeg + "mixamorig:RightFoot", // RightFoot + "mixamorig:RightToeBase", // RightToes +}; + +// VRM — per the VRM humanoid spec bone names. Ready Player Me avatars +// also use this naming. Toes are not part of the mandatory VRM bone +// list but commonly present; we map to the optional "LeftToes"/"RightToes" +// which RPM and most VRM exports populate. +inline constexpr HumanoidBoneMap kVRMBoneMap = { + "Hips", // Hips + "Spine", // Spine + "Chest", // Chest + "Neck", // Neck + "Head", // Head + + "LeftShoulder", // LeftShoulder + "LeftUpperArm", // LeftUpperArm + "LeftLowerArm", // LeftLowerArm + "LeftHand", // LeftHand + + "RightShoulder", // RightShoulder + "RightUpperArm", // RightUpperArm + "RightLowerArm", // RightLowerArm + "RightHand", // RightHand + + "LeftUpperLeg", // LeftUpperLeg + "LeftLowerLeg", // LeftLowerLeg + "LeftFoot", // LeftFoot + "LeftToes", // LeftToes + + "RightUpperLeg", // RightUpperLeg + "RightLowerLeg", // RightLowerLeg + "RightFoot", // RightFoot + "RightToes", // RightToes +}; + +// Unreal Mannequin — snake_case with "_l"/"_r" suffix. Spine is +// spine_01/02/03; we map Spine→spine_01, Chest→spine_02 (the visible +// chest bone). UE mannequin has no UpperChest; Spine→spine_03 would +// be closer if the rig has one authored. ball_l/r is the UE name for +// toes-equivalent. +inline constexpr HumanoidBoneMap kUnrealMannequinBoneMap = { + "pelvis", // Hips + "spine_01", // Spine + "spine_02", // Chest + "neck_01", // Neck + "head", // Head + + "clavicle_l", // LeftShoulder + "upperarm_l", // LeftUpperArm + "lowerarm_l", // LeftLowerArm + "hand_l", // LeftHand + + "clavicle_r", // RightShoulder + "upperarm_r", // RightUpperArm + "lowerarm_r", // RightLowerArm + "hand_r", // RightHand + + "thigh_l", // LeftUpperLeg + "calf_l", // LeftLowerLeg + "foot_l", // LeftFoot + "ball_l", // LeftToes + + "thigh_r", // RightUpperLeg + "calf_r", // RightLowerLeg + "foot_r", // RightFoot + "ball_r", // RightToes +}; + +inline constexpr const HumanoidBoneMap& +humanoidBoneMap(HumanoidRigPreset preset) noexcept +{ + switch(preset) + { + case HumanoidRigPreset::Mixamo: + return kMixamoBoneMap; + case HumanoidRigPreset::VRM: + return kVRMBoneMap; + case HumanoidRigPreset::UnrealMannequin: + return kUnrealMannequinBoneMap; + case HumanoidRigPreset::Count: + break; + } + return kMixamoBoneMap; +} + +} // namespace Threedim diff --git a/src/plugins/score-plugin-threedim/Threedim/HumanoidRetarget.hpp b/src/plugins/score-plugin-threedim/Threedim/HumanoidRetarget.hpp new file mode 100644 index 0000000000..e5aa9ea680 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/HumanoidRetarget.hpp @@ -0,0 +1,440 @@ +#pragma once + +// Threedim::HumanoidRetarget — live scene filter that drives a rigged +// model's skeleton from a humanoid_pose stream. +// +// Inputs: +// - scene_in: an ossia::scene_spec carrying a rigged asset (at least one +// skeleton_component). Typically comes from Threedim::AssetLoader. +// - pose_in: std::optional from a source adapter (e.g. +// PoseKeypointsToHumanoid wrapped around an ONNX PoseDetector, or +// TrackedBonesToHumanoid over PSN/RTTrP trackers). +// +// Controls: +// - Target rig preset: Mixamo / VRM / Unreal Mannequin bone-name +// convention. Selects which joint names we look up against the +// scene's skeleton_component. +// - Capture rest pose (impulse): snapshot both sides' current state as +// the retarget reference. Required before any motion transfers. +// - Root motion (toggle) + Root scale: optional Hips translation +// driven by the source's hip_position delta from rest. +// +// Output: +// - scene_out: the incoming scene_spec with ONLY the mapped joints' +// rotations (and optionally Hips translation) replaced. Every other +// joint, every mesh, every material, the scene hierarchy, version +// counters on other state — all passed through unchanged. +// +// Math (Offset / delta-from-rest mode, the default and correct choice +// when source and target rigs have different axis conventions): +// +// q_tgt_cur = q_tgt_rest * ( inverse(q_src_rest) * q_src_cur ) +// +// Calibration (both sides at once) captures q_src_rest per canonical +// bone and q_tgt_rest per resolved target joint. The delta is then a +// parent-relative quaternion that transfers cleanly even if the source +// is, say, a BlazePose landmark graph and the target is a Mixamo FBX — +// as long as the adapter produces parent-relative rotations, the math +// works. Per-bone axis correction matrices are a follow-up (needed for +// some exotic rigs; not a v1 concern). +// +// No smoothing here — smoothing belongs in the adapter, pre-pose_spec. +// No IK here — chain `InverseKinematics` after this process for +// hand/foot-prop reach; the two compose cleanly on scene_spec. + +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +namespace Threedim +{ + +// Picks which input shape the retargeter consumes this frame. The +// matching input ports are always present on the process (halp doesn't +// hide ports conditionally); the combobox just tells the dispatch which +// one to translate into humanoid_pose. +enum class HumanoidSourceType : uint8_t +{ + Off = 0, // Passthrough (no motion applied) + BlazePose, // keypoints_in, BlazePose 33-landmark ordering + Coco17, // keypoints_in, COCO-17 (YOLO-pose / ViTPose / RTMPose_COCO) + RTMPoseWhole, // keypoints_in, RTMPose_Whole (body subset of 133) + Trackers6, // trackers_in, 6 DOF (head / hips / 2 hands / 2 feet) + Count +}; + +class HumanoidRetarget +{ +public: + halp_meta(name, "Humanoid Retarget") + halp_meta(category, "Visuals/3D/Scene") + halp_meta(c_name, "humanoid_retarget") + halp_meta(authors, "ossia team") + halp_meta( + manual_url, + "https://ossia.io/score-docs/processes/humanoid-retarget.html") + halp_meta(uuid, "7e1f4d8a-2c6b-4e7f-9a35-6c4b8d2e0f1a") + + struct ins + { + struct + { + halp_meta(name, "Scene In"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_in; + + // Keypoint input — populated when Source is a PoseDetector workflow. + // Structurally compatible with score-addon-onnx's DetectedPose + // (matching halp_field_names), so a DetectedPose port wires directly. + struct + { + halp_meta(name, "Keypoints"); + std::optional value; + } keypoints_in; + + // Tracker input — populated when Source is Trackers6. The user wires + // OSC-emitted xyz+quat streams from a PSN/RTTrP/VRPN device into the + // matching tracker_pose slots of the bundle. + struct + { + halp_meta(name, "Trackers"); + std::optional value; + } trackers_in; + + struct : halp::combobox_t<"Source", HumanoidSourceType> + { + struct range + { + std::string_view values[5]{ + "Off", "BlazePose", "COCO-17", "RTMPose Whole", "6DOF Trackers"}; + int init{0}; + }; + void update(HumanoidRetarget& self) + { + // Source-shape change invalidates the captured source rest pose; + // the map of landmark→bone (and bone→tracker) differs, so previous + // "rest" values aren't meaningful under the new source. + self.m_calibrated = false; + } + } source; + + struct : halp::hslider_f32<"Confidence", halp::range{0.f, 1.f, 0.5f}> + { + halp_meta(description, "Per-keypoint confidence threshold"); + } confidence_threshold; + + struct : halp::combobox_t<"Target rig", HumanoidRigPreset> + { + struct range + { + std::string_view values[3]{"Mixamo", "VRM", "Unreal Mannequin"}; + int init{0}; + }; + void update(HumanoidRetarget& self) + { + // Bone-name table change invalidates the cached joint index + // lookups and the captured target rest pose; force a fresh + // calibration on the next frame that has both inputs. + self.m_calibrated = false; + } + } preset; + + halp::toggle<"Root motion"> root_motion; + + struct : halp::hslider_f32<"Root scale", halp::range{0.01f, 10.f, 1.f}> + { + } root_scale; + + struct : halp::impulse_button<"Capture rest pose"> + { + void update(HumanoidRetarget& self) { self.m_need_calibrate = true; } + } calibrate; + } inputs; + + struct outs + { + struct + { + halp_meta(name, "Scene Out"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_out; + } outputs; + + void operator()() + { + const auto& in = inputs.scene_in.scene; + if(!in.state || !in.state->roots) + { + outputs.scene_out.scene.state.reset(); + outputs.scene_out.dirty = 0; + return; + } + + // Translate the selected source into a humanoid_pose. Off mode and + // "source has no fresh data" both fall through to a clean passthrough + // so downstream nodes see the input unchanged until motion starts. + std::optional maybe_pose + = composeSourcePose(inputs.confidence_threshold.value); + if(!maybe_pose) + { + outputs.scene_out.scene = in; + outputs.scene_out.dirty = 0; + return; + } + + const auto& pose = *maybe_pose; + + // Resolve the skeleton — first entry in the scene's skeletons list. + // Multi-skeleton assets (per-skin glTF) are a follow-up: v1 retargets + // the first one, which covers 100% of Mixamo / VRM / single-rig + // scenes. + if(!in.state->skeletons || in.state->skeletons->empty()) + { + outputs.scene_out.scene = in; + outputs.scene_out.dirty = 0; + return; + } + const auto& srcSkel = *(*in.state->skeletons)[0]; + if(srcSkel.joints.empty()) + { + outputs.scene_out.scene = in; + outputs.scene_out.dirty = 0; + return; + } + + // Calibrate on demand. Two triggers: + // - user pressed "Capture rest pose" + // - preset combobox changed (invalidates previous joint lookups) + if(m_need_calibrate || !m_calibrated) + { + calibrate(srcSkel, pose); + m_need_calibrate = false; + } + + // Clone the skeleton so other consumers of the input scene don't see + // our mutations. This is the same pattern InverseKinematics uses. + auto newSkel = std::make_shared(srcSkel); + + // Per-bone offset-mode retarget: + // q_tgt_new = q_tgt_rest * ( inverse(q_src_rest) * q_src_cur ) + for(std::size_t b = 0; b < std::size_t(humanoid_bone_index::Count); ++b) + { + const int32_t tgt = m_target_joint_indices[b]; + if(tgt < 0 || tgt >= int32_t(newSkel->joints.size())) + continue; + + const auto& src_cur = pose.bones[b]; + if(src_cur.validity < kValidityThreshold) + continue; // trust the target's current rotation (kept from clone) + + const float src_cur_q[4] = { + src_cur.qx, src_cur.qy, src_cur.qz, src_cur.qw}; + float inv_src_rest[4]; + quat_inv(m_source_rest[b], inv_src_rest); + + float delta[4]; + quat_mul(inv_src_rest, src_cur_q, delta); + + float out[4]; + quat_mul(m_target_rest[b], delta, out); + + auto& tgtJoint = newSkel->joints[tgt]; + tgtJoint.rotation[0] = out[0]; + tgtJoint.rotation[1] = out[1]; + tgtJoint.rotation[2] = out[2]; + tgtJoint.rotation[3] = out[3]; + } + + // Root motion — apply source hip delta to target hip translation, + // scaled by the user control. Off by default (most live scenes want + // animate-in-place; locomotion is a deliberate choice). + if(inputs.root_motion.value) + { + const int32_t hipsIdx + = m_target_joint_indices[std::size_t(humanoid_bone_index::Hips)]; + if(hipsIdx >= 0 && hipsIdx < int32_t(newSkel->joints.size())) + { + const float s = inputs.root_scale.value; + auto& hip = newSkel->joints[hipsIdx]; + hip.translation[0] + = m_target_rest_hip_tr[0] + (pose.hip_x - m_source_rest_hip[0]) * s; + hip.translation[1] + = m_target_rest_hip_tr[1] + (pose.hip_y - m_source_rest_hip[1]) * s; + hip.translation[2] + = m_target_rest_hip_tr[2] + (pose.hip_z - m_source_rest_hip[2]) * s; + } + } + + newSkel->dirty_index++; + + // Emit a fresh scene_state that shares everything with the input + // except the skeletons vector. + auto state = std::make_shared(*in.state); + auto skels + = std::make_shared>(); + skels->reserve(in.state->skeletons->size()); + for(std::size_t i = 0; i < in.state->skeletons->size(); ++i) + skels->push_back( + i == 0 ? ossia::skeleton_component_ptr(newSkel) + : (*in.state->skeletons)[i]); + state->skeletons = std::move(skels); + state->version = ++m_version_counter; + state->dirty_index = in.state->dirty_index + 1; + + m_state = std::move(state); + outputs.scene_out.scene.state = m_state; + outputs.scene_out.dirty = ossia::scene_port::dirty_transform; + } + +private: + // Rotation confidence below which we don't override the target bone. + // Adapters default bone validity to 1.0; BlazePose maps landmark + // visibility into [0, 1]. 0.5 is a reasonable "believe this" line. + static constexpr float kValidityThreshold = 0.5f; + + // Hamilton quaternion multiply. (x, y, z, w) ordering. + static void quat_mul(const float a[4], const float b[4], float out[4]) noexcept + { + const float x = a[3] * b[0] + a[0] * b[3] + a[1] * b[2] - a[2] * b[1]; + const float y = a[3] * b[1] - a[0] * b[2] + a[1] * b[3] + a[2] * b[0]; + const float z = a[3] * b[2] + a[0] * b[1] - a[1] * b[0] + a[2] * b[3]; + const float w = a[3] * b[3] - a[0] * b[0] - a[1] * b[1] - a[2] * b[2]; + out[0] = x; + out[1] = y; + out[2] = z; + out[3] = w; + } + + // Inverse of a unit quaternion = conjugate. Adapters should be + // emitting normalized rotations; if they drift, the math still + // produces a stable result but scale factors creep in. + static void quat_inv(const float q[4], float out[4]) noexcept + { + out[0] = -q[0]; + out[1] = -q[1]; + out[2] = -q[2]; + out[3] = q[3]; + } + + void calibrate( + const ossia::skeleton_component& skel, + const humanoid_pose& pose) noexcept + { + const auto& map = humanoidBoneMap(inputs.preset.value); + + for(std::size_t b = 0; b < std::size_t(humanoid_bone_index::Count); ++b) + { + // Snapshot source rest pose quaternion (identity-ish if adapter + // hasn't moved yet; whatever is there is what "neutral" means + // for this capture). + m_source_rest[b][0] = pose.bones[b].qx; + m_source_rest[b][1] = pose.bones[b].qy; + m_source_rest[b][2] = pose.bones[b].qz; + m_source_rest[b][3] = pose.bones[b].qw; + + m_target_joint_indices[b] = -1; + if(map[b].empty()) + continue; // preset intentionally skips this bone (e.g. UpperChest) + + const int32_t idx = skel.find_joint(map[b]); + if(idx < 0) + continue; + m_target_joint_indices[b] = idx; + + // Snapshot target rest rotation. + const auto& j = skel.joints[std::size_t(idx)]; + m_target_rest[b][0] = j.rotation[0]; + m_target_rest[b][1] = j.rotation[1]; + m_target_rest[b][2] = j.rotation[2]; + m_target_rest[b][3] = j.rotation[3]; + + if(b == std::size_t(humanoid_bone_index::Hips)) + { + m_target_rest_hip_tr[0] = j.translation[0]; + m_target_rest_hip_tr[1] = j.translation[1]; + m_target_rest_hip_tr[2] = j.translation[2]; + } + } + + m_source_rest_hip[0] = pose.hip_x; + m_source_rest_hip[1] = pose.hip_y; + m_source_rest_hip[2] = pose.hip_z; + + m_calibrated = true; + } + + // Dispatch the selected source toggle into a humanoid_pose. Returns + // nullopt when the source is Off or no fresh data is present — in that + // case operator() passes the input scene through unchanged. + std::optional + composeSourcePose(float confidence_threshold) noexcept + { + const auto src = inputs.source.value; + switch(src) + { + case HumanoidSourceType::Off: + case HumanoidSourceType::Count: + return std::nullopt; + + case HumanoidSourceType::BlazePose: + if(!inputs.keypoints_in.value + || inputs.keypoints_in.value->keypoints.empty()) + return std::nullopt; + return keypoints_to_humanoid_pose( + *inputs.keypoints_in.value, kBlazePoseMap, confidence_threshold); + + case HumanoidSourceType::Coco17: + if(!inputs.keypoints_in.value + || inputs.keypoints_in.value->keypoints.empty()) + return std::nullopt; + return keypoints_to_humanoid_pose( + *inputs.keypoints_in.value, kCoco17Map, confidence_threshold); + + case HumanoidSourceType::RTMPoseWhole: + if(!inputs.keypoints_in.value + || inputs.keypoints_in.value->keypoints.empty()) + return std::nullopt; + return keypoints_to_humanoid_pose( + *inputs.keypoints_in.value, kRTMPoseWholeMap, + confidence_threshold); + + case HumanoidSourceType::Trackers6: + if(!inputs.trackers_in.value) + return std::nullopt; + return trackers_to_humanoid_pose(*inputs.trackers_in.value); + } + return std::nullopt; + } + +public: + // Persisted across score-document saves (serialized with process state). + bool m_calibrated{false}; + std::array m_source_rest{}; + std::array m_target_rest{}; + std::array + m_target_joint_indices{}; + float m_target_rest_hip_tr[3]{0.f, 0.f, 0.f}; + float m_source_rest_hip[3]{0.f, 0.f, 0.f}; + + // Ephemeral. + bool m_need_calibrate{false}; + std::shared_ptr m_state; + int64_t m_version_counter{0}; +}; + +} // namespace Threedim diff --git a/src/plugins/score-plugin-threedim/Threedim/HumanoidSourceAdapters.hpp b/src/plugins/score-plugin-threedim/Threedim/HumanoidSourceAdapters.hpp new file mode 100644 index 0000000000..0301a422a5 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/HumanoidSourceAdapters.hpp @@ -0,0 +1,326 @@ +#pragma once + +// Free functions that convert from the two source-data shapes +// HumanoidRetarget accepts (keypoint_stream from an ONNX PoseDetector, +// tracker_bundle_6 from a mocap / tracking-protocol device) into the +// canonical humanoid_pose. Separate header to keep HumanoidRetarget.hpp +// focused on orchestration + Offset-mode retargeting math. +// +// Both paths produce PARENT-LOCAL quaternions — that's the invariant +// HumanoidRetarget depends on. See the file comment at the top of +// HumanoidRetarget.hpp for why. + +#include +#include + +#include +#include + +namespace Threedim +{ + +// --------------------------------------------------------------------------- +// Small quaternion helpers. Inline and header-only for zero TU overhead. +// (x, y, z, w) layout, matching ossia::skeleton_joint::rotation and +// humanoid_bone::q*. +// --------------------------------------------------------------------------- +inline void quat_mul_xyzw( + const float a[4], const float b[4], float out[4]) noexcept +{ + const float x = a[3] * b[0] + a[0] * b[3] + a[1] * b[2] - a[2] * b[1]; + const float y = a[3] * b[1] - a[0] * b[2] + a[1] * b[3] + a[2] * b[0]; + const float z = a[3] * b[2] + a[0] * b[1] - a[1] * b[0] + a[2] * b[3]; + const float w = a[3] * b[3] - a[0] * b[0] - a[1] * b[1] - a[2] * b[2]; + out[0] = x; out[1] = y; out[2] = z; out[3] = w; +} + +inline void quat_inv_xyzw(const float q[4], float out[4]) noexcept +{ + // Inverse of a unit quaternion = conjugate. + out[0] = -q[0]; out[1] = -q[1]; out[2] = -q[2]; out[3] = q[3]; +} + +// Shortest-arc rotation from unit vector `from` to unit vector `to`. +// Produces the quaternion q such that q·from = to. Used to turn a +// canonical T-pose bone axis into the observed bone direction; this is +// inherently a 2-DoF answer (the twist around the bone's own length is +// undefined by just two direction endpoints). That's a hard limit of +// single-camera keypoint mocap; professional suits add IMU twist. +inline void shortest_arc( + const float from[3], const float to[3], float out[4]) noexcept +{ + const float d = from[0] * to[0] + from[1] * to[1] + from[2] * to[2]; + const float eps = 1e-6f; + + if(d >= 1.f - eps) + { + // Aligned — identity. + out[0] = 0.f; out[1] = 0.f; out[2] = 0.f; out[3] = 1.f; + return; + } + if(d <= -1.f + eps) + { + // Antiparallel — 180° around ANY perpendicular axis. Pick one that + // isn't (near-)parallel to `from` for numerical stability. + float axis[3]; + if(std::fabs(from[0]) < 0.9f) + { + axis[0] = 1.f - from[0] * from[0]; + axis[1] = -from[0] * from[1]; + axis[2] = -from[0] * from[2]; + } + else + { + axis[0] = -from[1] * from[0]; + axis[1] = 1.f - from[1] * from[1]; + axis[2] = -from[1] * from[2]; + } + const float len + = std::sqrt(axis[0] * axis[0] + axis[1] * axis[1] + axis[2] * axis[2]); + if(len > eps) + { + const float inv = 1.f / len; + out[0] = axis[0] * inv; + out[1] = axis[1] * inv; + out[2] = axis[2] * inv; + } + else + { + out[0] = 1.f; out[1] = 0.f; out[2] = 0.f; + } + out[3] = 0.f; + return; + } + + // General case — half-vector formulation for numerical stability. + const float cross[3] = { + from[1] * to[2] - from[2] * to[1], + from[2] * to[0] - from[0] * to[2], + from[0] * to[1] - from[1] * to[0]}; + const float s = std::sqrt((1.f + d) * 2.f); + const float invs = 1.f / s; + out[0] = cross[0] * invs; + out[1] = cross[1] * invs; + out[2] = cross[2] * invs; + out[3] = s * 0.5f; +} + +// --------------------------------------------------------------------------- +// keypoints → humanoid_pose. +// +// Algorithm: +// 1. For each bone with a valid (parent_idx, child_idx) edge in the map +// AND both keypoints' confidence ≥ threshold: +// d_world[b] = normalize(kp[child] - kp[parent]) +// q_world[b] = shortestArc(kRestAxis[b], d_world[b]) +// 2. Walk bones in topological order (enum order is already topological +// because each bone's parent has a lower index). For each bone b: +// - if no world rotation was computed, validity = 0 +// - if parent has no world rotation, emit q_world[b] as local +// (root-relative behaviour — good fallback when upper chain data +// is missing) +// - else q_local[b] = inv(q_world[parent(b)]) * q_world[b] +// 3. Copy Hips world position from whatever landmark best represents it +// (for BlazePose / COCO the hip midpoint; approximated as left_hip). +// Used only by the root-motion toggle downstream. +// --------------------------------------------------------------------------- +inline humanoid_pose keypoints_to_humanoid_pose( + const keypoint_stream& stream, + const HumanoidKeypointMap& map, + float confidence_threshold = 0.5f) noexcept +{ + humanoid_pose out{}; + + // Step 1: per-bone world rotations. + constexpr std::size_t N = std::size_t(humanoid_bone_index::Count); + std::array, N> q_world{}; + std::array has_world{}; + + const auto& kps = stream.keypoints; + const int K = int(kps.size()); + + for(std::size_t b = 0; b < N; ++b) + { + has_world[b] = false; + q_world[b] = {0.f, 0.f, 0.f, 1.f}; + + const auto& edge = map[b]; + if(!edge.valid() || edge.parent_idx == edge.child_idx) + continue; + if(edge.parent_idx >= K || edge.child_idx >= K) + continue; + + const auto& p = kps[std::size_t(edge.parent_idx)]; + const auto& c = kps[std::size_t(edge.child_idx)]; + if(p.confidence < confidence_threshold + || c.confidence < confidence_threshold) + continue; + + float d[3] = {c.x - p.x, c.y - p.y, c.z - p.z}; + const float len = std::sqrt(d[0] * d[0] + d[1] * d[1] + d[2] * d[2]); + if(len < 1e-6f) + continue; + const float inv = 1.f / len; + d[0] *= inv; d[1] *= inv; d[2] *= inv; + + const auto& rest = kHumanoidRestAxis[b]; + shortest_arc(rest.data(), d, q_world[b].data()); + has_world[b] = true; + } + + // Step 2: world → parent-local. Enum order is topological: each bone's + // parent has a strictly lower index, so a single forward pass is safe. + for(std::size_t b = 0; b < N; ++b) + { + auto& bone = out.bones[b]; + if(!has_world[b]) + { + bone.validity = 0.f; + bone.qx = 0.f; bone.qy = 0.f; bone.qz = 0.f; bone.qw = 1.f; + continue; + } + + const auto parent_idx = kHumanoidParent[b]; + if(parent_idx == humanoid_bone_index::Count + || !has_world[std::size_t(parent_idx)]) + { + // Root bone OR parent's world rotation is unknown — emit our world + // rotation as local. For root this is correct; for a bone whose + // parent failed to resolve this is a reasonable degradation (the + // bone will orient absolutely rather than relative to a missing + // parent, which at least keeps it visible). + bone.qx = q_world[b][0]; + bone.qy = q_world[b][1]; + bone.qz = q_world[b][2]; + bone.qw = q_world[b][3]; + } + else + { + float inv_parent[4]; + quat_inv_xyzw(q_world[std::size_t(parent_idx)].data(), inv_parent); + float local[4]; + quat_mul_xyzw(inv_parent, q_world[b].data(), local); + bone.qx = local[0]; bone.qy = local[1]; + bone.qz = local[2]; bone.qw = local[3]; + } + bone.validity = 1.f; + } + + // Hip translation — grab the parent keypoint of the Spine edge as the + // best "pelvis" proxy (BlazePose landmark 23 = left_hip, COCO 11 = + // left_hip). Not the true midpoint, but close enough for single-camera + // root motion; users who need precision should use a tracker workflow. + const auto& spine_edge = map[std::size_t(humanoid_bone_index::Spine)]; + if(spine_edge.parent_idx >= 0 && spine_edge.parent_idx < K) + { + const auto& hip_kp = kps[std::size_t(spine_edge.parent_idx)]; + if(hip_kp.confidence >= confidence_threshold) + { + out.hip_x = hip_kp.x; + out.hip_y = hip_kp.y; + out.hip_z = hip_kp.z; + } + } + + return out; +} + +// --------------------------------------------------------------------------- +// trackers → humanoid_pose. +// +// With only 6 trackers (head, hips, 2 hands, 2 feet) we directly drive +// those 6 bones and leave the intermediate bones (spine, shoulders, +// elbows, knees) at their retarget rest. Getting those bones to follow +// realistically needs either more trackers (10-point Vive Full-Body) or +// a downstream 2-bone IK chain (InverseKinematics process) keyed on +// shoulder + wrist tracker positions as (root, target). v1 keeps the +// retargeter unopinionated — we fill what we're given. +// +// Tracker quaternions are world-space by convention (PSN, OSC, VRPN all +// report world transforms). Parent-local is produced by inverting the +// parent bone's tracker rotation if that parent also has a tracker; +// otherwise the bone inherits the world rotation directly. +// --------------------------------------------------------------------------- +inline humanoid_pose trackers_to_humanoid_pose( + const tracker_bundle_6& t) noexcept +{ + humanoid_pose out{}; + + // Slot 1:1 mapping — which canonical bone gets which tracker. + struct Slot + { + humanoid_bone_index bone; + const tracker_pose* tr; + }; + const Slot slots[] = { + {humanoid_bone_index::Hips, &t.hips}, + {humanoid_bone_index::Head, &t.head}, + {humanoid_bone_index::LeftHand, &t.left_hand}, + {humanoid_bone_index::RightHand, &t.right_hand}, + {humanoid_bone_index::LeftFoot, &t.left_foot}, + {humanoid_bone_index::RightFoot, &t.right_foot}, + }; + + // Gather world rotations. + constexpr std::size_t N = std::size_t(humanoid_bone_index::Count); + std::array, N> q_world{}; + std::array has_world{}; + for(std::size_t b = 0; b < N; ++b) + { + q_world[b] = {0.f, 0.f, 0.f, 1.f}; + has_world[b] = false; + } + + for(const auto& slot : slots) + { + if(slot.tr->validity < 0.5f) + continue; + const std::size_t idx = std::size_t(slot.bone); + q_world[idx] = {slot.tr->qx, slot.tr->qy, slot.tr->qz, slot.tr->qw}; + has_world[idx] = true; + } + + // World → parent-local, same pattern as the keypoint path. Bones whose + // parent has no tracker fall through to "emit world as local", which + // makes them pose relative to the world origin — correct for Head / + // Hands when their parent chain (Neck, LowerArm) isn't tracker-driven. + for(std::size_t b = 0; b < N; ++b) + { + auto& bone = out.bones[b]; + if(!has_world[b]) + { + bone.validity = 0.f; + continue; + } + + const auto parent_idx = kHumanoidParent[b]; + if(parent_idx == humanoid_bone_index::Count + || !has_world[std::size_t(parent_idx)]) + { + bone.qx = q_world[b][0]; bone.qy = q_world[b][1]; + bone.qz = q_world[b][2]; bone.qw = q_world[b][3]; + } + else + { + float inv_parent[4]; + quat_inv_xyzw(q_world[std::size_t(parent_idx)].data(), inv_parent); + float local[4]; + quat_mul_xyzw(inv_parent, q_world[b].data(), local); + bone.qx = local[0]; bone.qy = local[1]; + bone.qz = local[2]; bone.qw = local[3]; + } + bone.validity = 1.f; + } + + // Hip position = hips tracker position (if tracking). + if(t.hips.validity >= 0.5f) + { + out.hip_x = t.hips.x; + out.hip_y = t.hips.y; + out.hip_z = t.hips.z; + } + + return out; +} + +} // namespace Threedim diff --git a/src/plugins/score-plugin-threedim/Threedim/HumanoidSourceMaps.hpp b/src/plugins/score-plugin-threedim/Threedim/HumanoidSourceMaps.hpp new file mode 100644 index 0000000000..1ca66a67d8 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/HumanoidSourceMaps.hpp @@ -0,0 +1,233 @@ +#pragma once + +// Source-side tables used by HumanoidRetarget's conversion step: +// +// - per-workflow keypoint→bone mapping (BlazePose 33, COCO-17, RTMPose +// Whole 133) — each entry says "bone B's direction is landmark parent_idx +// to child_idx in this workflow" +// - canonical T-pose bone axes — the world-space direction each bone +// points in the canonical T-pose (e.g. LeftUpperArm is -X). Used as +// the "rest direction" of each bone for the shortest-arc computation. +// - bone hierarchy (parent-of-bone) — needed to convert world rotations +// to parent-relative quaternions after the shortest-arc pass. +// +// Kept separate from HumanoidRetarget.hpp to keep the retargeter file +// focused on orchestration + math. + +#include + +#include +#include + +namespace Threedim +{ + +// --------------------------------------------------------------------------- +// Bone tree: for each canonical bone, its parent bone (or Count if root). +// Matches the humanoid_bone_index enum order. +// --------------------------------------------------------------------------- +inline constexpr std::array< + humanoid_bone_index, + std::size_t(humanoid_bone_index::Count)> + kHumanoidParent = {{ + humanoid_bone_index::Count, // Hips (root) + humanoid_bone_index::Hips, // Spine + humanoid_bone_index::Spine, // Chest + humanoid_bone_index::Chest, // Neck + humanoid_bone_index::Neck, // Head + + humanoid_bone_index::Chest, // LeftShoulder + humanoid_bone_index::LeftShoulder, // LeftUpperArm + humanoid_bone_index::LeftUpperArm, // LeftLowerArm + humanoid_bone_index::LeftLowerArm, // LeftHand + + humanoid_bone_index::Chest, // RightShoulder + humanoid_bone_index::RightShoulder, // RightUpperArm + humanoid_bone_index::RightUpperArm, // RightLowerArm + humanoid_bone_index::RightLowerArm, // RightHand + + humanoid_bone_index::Hips, // LeftUpperLeg + humanoid_bone_index::LeftUpperLeg, // LeftLowerLeg + humanoid_bone_index::LeftLowerLeg, // LeftFoot + humanoid_bone_index::LeftFoot, // LeftToes + + humanoid_bone_index::Hips, // RightUpperLeg + humanoid_bone_index::RightUpperLeg, // RightLowerLeg + humanoid_bone_index::RightLowerLeg, // RightFoot + humanoid_bone_index::RightFoot, // RightToes + }}; + +// --------------------------------------------------------------------------- +// Canonical T-pose bone axes. Y-up, right-handed, model facing +Z. +// +// Each entry is the world-space unit direction the bone's parent→child +// segment points in the canonical T-pose. The retargeter uses these as +// the "from" vector in the shortest-arc rotation that aligns the bone +// with the current landmark-derived direction. +// +// Conventions: +// - Spine / Neck / Head chain points up (+Y) +// - Arms point outward (-X for left, +X for right) along the horizontal +// - Legs point down (-Y) +// - Toes point forward (+Z) +// - Shoulders are small bones from spine to upper-arm root; treat as +// pointing toward the upper-arm (horizontal left/right) +// - Hips bone itself is the root; no direction (identity). +// --------------------------------------------------------------------------- +inline constexpr std::array< + std::array, + std::size_t(humanoid_bone_index::Count)> + kHumanoidRestAxis = {{ + {0.f, 0.f, 0.f}, // Hips — root, no direction + {0.f, 1.f, 0.f}, // Spine +Y + {0.f, 1.f, 0.f}, // Chest +Y + {0.f, 1.f, 0.f}, // Neck +Y + {0.f, 1.f, 0.f}, // Head +Y + + {-1.f, 0.f, 0.f}, // LeftShoulder -X + {-1.f, 0.f, 0.f}, // LeftUpperArm -X + {-1.f, 0.f, 0.f}, // LeftLowerArm -X + {-1.f, 0.f, 0.f}, // LeftHand -X + + {1.f, 0.f, 0.f}, // RightShoulder +X + {1.f, 0.f, 0.f}, // RightUpperArm +X + {1.f, 0.f, 0.f}, // RightLowerArm +X + {1.f, 0.f, 0.f}, // RightHand +X + + {0.f, -1.f, 0.f}, // LeftUpperLeg -Y + {0.f, -1.f, 0.f}, // LeftLowerLeg -Y + {0.f, -1.f, 0.f}, // LeftFoot -Y + {0.f, 0.f, 1.f}, // LeftToes +Z + + {0.f, -1.f, 0.f}, // RightUpperLeg -Y + {0.f, -1.f, 0.f}, // RightLowerLeg -Y + {0.f, -1.f, 0.f}, // RightFoot -Y + {0.f, 0.f, 1.f}, // RightToes +Z + }}; + +// --------------------------------------------------------------------------- +// Keypoint mapping: for each canonical bone, (parent_keypoint_idx, +// child_keypoint_idx) into the workflow's keypoint array. -1 means this +// bone isn't derivable from this workflow (the adapter will skip it, +// keeping the target bone at its rest rotation). +// --------------------------------------------------------------------------- +struct HumanoidKeypointEdge +{ + int16_t parent_idx{-1}; + int16_t child_idx{-1}; + bool valid() const noexcept { return parent_idx >= 0 && child_idx >= 0; } +}; + +using HumanoidKeypointMap = std::array< + HumanoidKeypointEdge, + std::size_t(humanoid_bone_index::Count)>; + +// --------------------------------------------------------------------------- +// BlazePose (33 landmarks). +// Index reference: +// 0: nose, 1: left_eye_inner, 2: left_eye, 3: left_eye_outer, +// 4: right_eye_inner, 5: right_eye, 6: right_eye_outer, +// 7: left_ear, 8: right_ear, +// 9: mouth_left, 10: mouth_right, +// 11: left_shoulder, 12: right_shoulder, +// 13: left_elbow, 14: right_elbow, +// 15: left_wrist, 16: right_wrist, +// 17..22: left/right pinky/index/thumb (hand subdetail) +// 23: left_hip, 24: right_hip, +// 25: left_knee, 26: right_knee, +// 27: left_ankle, 28: right_ankle, +// 29: left_heel, 30: right_heel, +// 31: left_foot_index, 32: right_foot_index +// +// Bone directions are parent_kp → child_kp: +// - Spine: midpoint(hips) → midpoint(shoulders). Approximated as +// left_hip → left_shoulder (an acceptable approximation for a +// single-segment spine; precise midpoint handling would need +// a helper with synthesized virtual landmarks). +// - Chest / Neck approximated similarly. +// - Shoulders (the bone from spine to upper-arm root) are treated as +// midpoint(shoulders) → shoulder. Again approximated directly. +// - Toes: ankle → foot_index +// --------------------------------------------------------------------------- +inline constexpr HumanoidKeypointMap kBlazePoseMap = {{ + {-1, -1}, // Hips (root) + {23, 11}, // Spine: left_hip → left_shoulder + {11, 12}, // Chest: shoulders pair (approximation) + {11, 0}, // Neck: left_shoulder → nose (approx) + {0, 2}, // Head: nose → left_eye (approx) + + {11, 11}, // LeftShoulder (collar): degenerate — map skipped by validity + {11, 13}, // LeftUpperArm: left_shoulder → left_elbow + {13, 15}, // LeftLowerArm: left_elbow → left_wrist + {15, 19}, // LeftHand: left_wrist → left_index + + {12, 12}, // RightShoulder (collar): skipped + {12, 14}, // RightUpperArm + {14, 16}, // RightLowerArm + {16, 20}, // RightHand + + {23, 25}, // LeftUpperLeg + {25, 27}, // LeftLowerLeg + {27, 29}, // LeftFoot + {27, 31}, // LeftToes: ankle → foot_index + + {24, 26}, // RightUpperLeg + {26, 28}, // RightLowerLeg + {28, 30}, // RightFoot + {28, 32}, // RightToes +}}; + +// --------------------------------------------------------------------------- +// COCO-17 layout (YOLO-pose, ViTPose, RTMPose_COCO). +// Index reference: +// 0: nose, 1: left_eye, 2: right_eye, 3: left_ear, 4: right_ear, +// 5: left_shoulder, 6: right_shoulder, +// 7: left_elbow, 8: right_elbow, +// 9: left_wrist, 10: right_wrist, +// 11: left_hip, 12: right_hip, +// 13: left_knee, 14: right_knee, +// 15: left_ankle, 16: right_ankle +// +// No toes / feet detail, no fingers — those bones are flagged as +// unmappable and will keep their target rest rotation. +// --------------------------------------------------------------------------- +inline constexpr HumanoidKeypointMap kCoco17Map = {{ + {-1, -1}, // Hips + {11, 5}, // Spine: left_hip → left_shoulder (approx) + {5, 6}, // Chest: shoulders (approx) + {5, 0}, // Neck: shoulder → nose (approx) + {0, 1}, // Head: nose → left_eye + + {-1, -1}, // LeftShoulder — no dedicated landmark + {5, 7}, // LeftUpperArm + {7, 9}, // LeftLowerArm + {-1, -1}, // LeftHand — no wrist-to-hand direction in COCO + + {-1, -1}, // RightShoulder + {6, 8}, // RightUpperArm + {8, 10}, // RightLowerArm + {-1, -1}, // RightHand + + {11, 13}, // LeftUpperLeg + {13, 15}, // LeftLowerLeg + {-1, -1}, // LeftFoot — ankle only + {-1, -1}, // LeftToes + + {12, 14}, // RightUpperLeg + {14, 16}, // RightLowerLeg + {-1, -1}, // RightFoot + {-1, -1}, // RightToes +}}; + +// --------------------------------------------------------------------------- +// RTMPose Whole-body 133 keypoints — first 17 match COCO, 17..22 face, +// 23..90 face mesh, 91..132 hands. For body retargeting we reuse the +// first 17 (same as COCO), and optionally pull finger landmarks for a +// richer hand (Hand bone direction = wrist → middle_finger_mcp). +// +// v1: use only the COCO subset. Hands would require a 21-landmark map +// (follow-up). +// --------------------------------------------------------------------------- +inline constexpr HumanoidKeypointMap kRTMPoseWholeMap = kCoco17Map; + +} // namespace Threedim diff --git a/src/plugins/score-plugin-threedim/Threedim/ImageLoader.cpp b/src/plugins/score-plugin-threedim/Threedim/ImageLoader.cpp new file mode 100644 index 0000000000..bd9c99b0d5 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/ImageLoader.cpp @@ -0,0 +1,85 @@ +#include "ImageLoader.hpp" + +namespace Threedim +{ + +void ImageLoader::init( + score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res) +{ + // RenderList rebuild (e.g. viewport resize) calls release() which + // drops m_tex, then init() against the new RenderList. Without this + // re-stage the user would have to re-trigger the file-port to get + // their texture back. Stage the kept CPU image into m_pendingImage + // so the next update() pass uploads it to the freshly-allocated + // QRhiTexture against the new rhi. + if(!m_keptImage.isNull()) + { + m_pendingImage = m_keptImage; + m_changed = true; + } +} + +void ImageLoader::update( + score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res, + score::gfx::Edge* e) +{ + if(!m_changed || m_pendingImage.isNull()) + return; + + auto& rhi = *renderer.state.rhi; + const QSize sz = m_pendingImage.size(); + + // (Re)create texture when the stored one's size doesn't match. + // RGBA8 — LDR loader. The HDR variant lives in a sibling plug-in + // that links against OpenImageIO and produces RGBA16F/RGBA32F. + if(!m_tex || m_tex->pixelSize() != sz) + { + if(m_tex) + m_tex->deleteLater(); + m_tex = rhi.newTexture(QRhiTexture::RGBA8, sz, 1, QRhiTexture::Flag{}); + if(!m_tex || !m_tex->create()) + { + if(m_tex) + { + m_tex->deleteLater(); + m_tex = nullptr; + } + return; + } + } + + res.uploadTexture(m_tex, m_pendingImage); + + outputs.texture.texture.handle = m_tex; + outputs.texture.texture.width = sz.width(); + outputs.texture.texture.height = sz.height(); + // Format defaults to RGBA8 on construction; explicit for clarity. + outputs.texture.texture.format = halp::gpu_texture::RGBA8; + + // Persist the CPU copy across RenderList rebuilds so init() can + // re-stage on the next resize. Move-from m_pendingImage to keep + // the upload's already-detached QImage data without copying. + m_keptImage = std::move(m_pendingImage); + m_pendingImage = QImage{}; + m_changed = false; +} + +void ImageLoader::release(score::gfx::RenderList& r) +{ + if(m_tex) + { + m_tex->deleteLater(); + m_tex = nullptr; + } + outputs.texture.texture.handle = nullptr; + outputs.texture.texture.width = 0; + outputs.texture.texture.height = 0; +} + +void ImageLoader::runInitialPasses( + score::gfx::RenderList&, QRhiCommandBuffer&, + QRhiResourceUpdateBatch*&, score::gfx::Edge&) +{ +} + +} // namespace Threedim diff --git a/src/plugins/score-plugin-threedim/Threedim/ImageLoader.hpp b/src/plugins/score-plugin-threedim/Threedim/ImageLoader.hpp new file mode 100644 index 0000000000..049fe00b6d --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/ImageLoader.hpp @@ -0,0 +1,101 @@ +#pragma once +#include +#include +#include + +#include +#include +#include +#include + +#include + +namespace Threedim +{ + +// Lightweight LDR image-to-GPU-texture loader. Sibling to BufferLoader +// but for 2D textures. Sits alongside the main OpenImageIO-backed +// ImageLoader in a sibling plug-in, usable when OIIO isn't linked in +// and the image is a plain QImage-supported format (PNG / JPG / BMP / +// …). HDR formats (.hdr / .exr) require the OIIO path. +// +// Primary use: feeds the pure-shader cubemap pipeline +// ImageLoader(path) → cubemap_from_source → SceneResourceRoute(Skybox) +// superseding the bespoke equirect/cross/strip code in CubemapLoader. +class ImageLoader +{ +public: + halp_meta(name, "Image loader (LDR)") + halp_meta(category, "Visuals") + halp_meta(c_name, "image_loader_ldr") + halp_meta(authors, "ossia team") + halp_meta( + manual_url, + "https://ossia.io/score-docs/processes/image-loader.html") + halp_meta(description, + "Loads a 2D image file (PNG / JPG / BMP / …) to a GPU RGBA8 texture") + halp_meta(uuid, "e6b2c1d8-3f45-4a92-8b17-9c4e0d5a6f3b") + + struct ins + { + // File-port boilerplate — same pattern as SplatLoader's obj_t. + // process() runs on the file-load thread, decodes the image, + // returns a lambda that stages the result onto the node from the + // execution thread. + struct image_t : halp::file_port<"Image", halp::mmap_file_view> + { + halp_meta(extensions, + "Images (*.png *.jpg *.jpeg *.bmp *.tga *.webp *.tif *.tiff)"); + static std::function process(file_type data) + { + QImage img; + if(!data.bytes.empty()) + { + img.loadFromData( + reinterpret_cast(data.bytes.data()), + (int)data.bytes.size()); + } + if(img.isNull() && !data.filename.empty()) + { + img = QImage(data.filename.data()); + } + if(!img.isNull() && img.format() != QImage::Format_RGBA8888) + img = img.convertToFormat(QImage::Format_RGBA8888); + return [img = std::move(img)](ImageLoader& self) mutable { + self.m_pendingImage = std::move(img); + self.m_changed = true; + }; + } + } image; + } inputs; + + struct + { + halp::gpu_texture_output<"Texture"> texture; + } outputs; + + void init(score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res); + void update( + score::gfx::RenderList& renderer, QRhiResourceUpdateBatch& res, + score::gfx::Edge* e); + void release(score::gfx::RenderList& r); + void runInitialPasses( + score::gfx::RenderList& renderer, QRhiCommandBuffer& commands, + QRhiResourceUpdateBatch*& res, score::gfx::Edge& edge); + + void operator()() { } + + QImage m_pendingImage; + // Persistent CPU copy of the last successfully uploaded image. Kept + // alive across RenderList rebuilds (resize) so that init() can + // re-upload to the freshly allocated QRhiTexture without needing the + // user to re-trigger the file-port. Without this, release() drops + // m_tex AND clears m_pendingImage in update() — the next init() has + // nothing to upload, the texture port stays bound to the empty + // placeholder for the rest of the session. + QImage m_keptImage; + QRhiTexture* m_tex{}; + bool m_changed{}; +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/InjectBuffer.cpp b/src/plugins/score-plugin-threedim/Threedim/InjectBuffer.cpp new file mode 100644 index 0000000000..2768eb3d9a --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/InjectBuffer.cpp @@ -0,0 +1,74 @@ +#include "InjectBuffer.hpp" + +#include + +namespace Threedim +{ + +void InjectBuffer::rebuild() +{ + const auto& in = inputs.scene_in.scene; + const ossia::scene_state* in_state = in.state.get(); + const int64_t in_version = in_state ? in_state->version : -1; + void* cur_handle = inputs.buffer.buffer.handle; + const int64_t cur_bytes = inputs.buffer.buffer.byte_size; + const auto& cur_name = inputs.aux_name.value; + + m_cached_in_state = in_state; + m_cached_in_version = in_version; + m_cached_handle = cur_handle; + m_cached_byte_size = cur_bytes; + m_cached_name = cur_name; + + // Unwired / incomplete controls → pass-through. Safe to drop in a + // pipeline before the Buffer is connected. + if(!cur_handle || cur_name.empty() || !in_state) + { + m_cached_out = in.state; + m_pending_dirty = 0xFF; + return; + } + + // Clone the scene_state (cheap — it's shallow pointers to shared + // sub-vectors) and append the injection. Existing entries with the + // same name are removed first so a later InjectBuffer in the chain + // always wins. + auto state = std::make_shared(*in_state); + state->inject_buffers.erase( + std::remove_if( + state->inject_buffers.begin(), state->inject_buffers.end(), + [&](const ossia::aux_inject_buffer& ab) { return ab.name == cur_name; }), + state->inject_buffers.end()); + state->inject_buffers.push_back( + {.name = cur_name, + .native_handle = cur_handle, + .byte_size = cur_bytes}); + state->version = ++m_version_counter; + state->dirty_index = m_version_counter; + + m_cached_out = state; + m_pending_dirty = 0xFF; +} + +void InjectBuffer::operator()() +{ + // Upstream scene_state + live buffer handle can change without a + // port-update event; detect and trigger rebuild. aux_name changes + // come via the control update() callback. + const auto* in_state = inputs.scene_in.scene.state.get(); + const int64_t in_version = in_state ? in_state->version : -1; + void* cur_handle = inputs.buffer.buffer.handle; + const int64_t cur_bytes = inputs.buffer.buffer.byte_size; + const bool upstream_changed + = m_cached_in_state != in_state + || m_cached_in_version != in_version + || m_cached_handle != cur_handle + || m_cached_byte_size != cur_bytes; + if(!m_cached_out || upstream_changed) + rebuild(); + outputs.scene_out.scene.state = m_cached_out; + outputs.scene_out.dirty = m_pending_dirty; + m_pending_dirty = 0; +} + +} // namespace Threedim diff --git a/src/plugins/score-plugin-threedim/Threedim/InjectBuffer.hpp b/src/plugins/score-plugin-threedim/Threedim/InjectBuffer.hpp new file mode 100644 index 0000000000..971de57b32 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/InjectBuffer.hpp @@ -0,0 +1,90 @@ +#pragma once +#include +#include +#include + +#include + +#include +#include +#include + +namespace Threedim +{ + +// Mid-pipeline aux-buffer injection. Takes a scene_spec passthrough cable +// plus a live GPU buffer from an upstream producer (CSF output, another +// aux node, etc.) and attaches it to the scene as a pending injection +// under a caller-supplied name. ScenePreprocessor consumes +// `scene_state::inject_buffers` at flatten-time and writes matching +// `auxiliary_buffer` entries onto every output geometry — so the live +// handle ends up bound to any downstream consumer shader that declares +// an AUXILIARY entry with the same name (SSBO or UBO kind). +// +// Wiring: +// CSFProducer → InjectBuffer(name="scene_params", is_uniform=true) +// → ScenePreprocessor → classic_pbr_full +// +// Name collisions with existing auxes published by the scene producers +// (e.g., ScenePreprocessor's own scene_lights / scene_materials) follow +// last-wins — the injection appended after flatten overrides the +// flatten-time entry. Use this to selectively replace standard auxes +// with custom data without forking the preprocessor. +class InjectBuffer +{ +public: + halp_meta(name, "Inject Buffer") + halp_meta(category, "Visuals/3D/Scene") + halp_meta(c_name, "inject_buffer") + halp_meta(authors, "ossia team") + halp_meta( + manual_url, + "https://ossia.io/score-docs/processes/inject-buffer.html") + halp_meta(uuid, "4f9a6e2d-7c83-4b5d-9e1f-8a3c5d6b2f4e") + + struct ins + { + struct + { + halp_meta(name, "Scene In"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_in; + + // Port-driven rebuild: aux_name triggers rebuild(). scene_in + + // buffer handle changes are detected in operator()() because they + // can change without a port-update event. + // Live GPU buffer from an upstream producer. Null handle → the + // injection is skipped (passthrough), so unwiring is safe. + halp::gpu_buffer_input<"Buffer"> buffer; + + struct : halp::lineedit<"Aux name", ""> + { void update(InjectBuffer& n) { n.rebuild(); } } aux_name; + } inputs; + + struct outs + { + struct + { + halp_meta(name, "Scene Out"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_out; + } outputs; + + void rebuild(); + void operator()(); + + // Stable shared_ptr cached while inputs are unchanged — keeps + // ScenePreprocessor's fingerprint fast-path warm. + std::shared_ptr m_cached_out; + uint8_t m_pending_dirty{0xFF}; + const ossia::scene_state* m_cached_in_state{}; + int64_t m_cached_in_version{-1}; + std::string m_cached_name; + void* m_cached_handle{}; + int64_t m_cached_byte_size{}; + int64_t m_version_counter{0}; +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/InjectTexture.cpp b/src/plugins/score-plugin-threedim/Threedim/InjectTexture.cpp new file mode 100644 index 0000000000..9459fb4b7a --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/InjectTexture.cpp @@ -0,0 +1,61 @@ +#include "InjectTexture.hpp" + +#include + +namespace Threedim +{ + +void InjectTexture::rebuild() +{ + const auto& in = inputs.scene_in.scene; + const ossia::scene_state* in_state = in.state.get(); + const int64_t in_version = in_state ? in_state->version : -1; + void* cur_handle = inputs.texture.texture.handle; + const auto& cur_name = inputs.aux_name.value; + + m_cached_in_state = in_state; + m_cached_in_version = in_version; + m_cached_handle = cur_handle; + m_cached_name = cur_name; + + if(!cur_handle || cur_name.empty() || !in_state) + { + m_cached_out = in.state; + m_pending_dirty = 0xFF; + return; + } + + auto state = std::make_shared(*in_state); + state->inject_textures.erase( + std::remove_if( + state->inject_textures.begin(), state->inject_textures.end(), + [&](const ossia::aux_inject_texture& at) { return at.name == cur_name; }), + state->inject_textures.end()); + state->inject_textures.push_back( + {.name = cur_name, .native_handle = cur_handle}); + state->version = ++m_version_counter; + state->dirty_index = m_version_counter; + + m_cached_out = state; + m_pending_dirty = 0xFF; +} + +void InjectTexture::operator()() +{ + // Upstream scene_state + live texture handle can change mid-stream; + // detect and rebuild. + const auto* in_state = inputs.scene_in.scene.state.get(); + const int64_t in_version = in_state ? in_state->version : -1; + void* cur_handle = inputs.texture.texture.handle; + const bool upstream_changed + = m_cached_in_state != in_state + || m_cached_in_version != in_version + || m_cached_handle != cur_handle; + if(!m_cached_out || upstream_changed) + rebuild(); + outputs.scene_out.scene.state = m_cached_out; + outputs.scene_out.dirty = m_pending_dirty; + m_pending_dirty = 0; +} + +} // namespace Threedim diff --git a/src/plugins/score-plugin-threedim/Threedim/InjectTexture.hpp b/src/plugins/score-plugin-threedim/Threedim/InjectTexture.hpp new file mode 100644 index 0000000000..038591611b --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/InjectTexture.hpp @@ -0,0 +1,86 @@ +#pragma once +#include +#include +#include + +#include + +#include +#include +#include + +namespace Threedim +{ + +// Mid-pipeline aux-texture injection. Takes a scene_spec passthrough +// cable plus a live GPU texture from an upstream producer (video node, +// ISF output, CSF image, etc.) and attaches it under a caller-supplied +// name. ScenePreprocessor consumes `scene_state::inject_textures` and +// writes matching `auxiliary_texture` entries onto its output +// geometry — so the live handle flows to any downstream consumer +// shader that declares an AUXILIARY texture entry with the same name. +// +// Texture handles are routed via halp::gpu_texture_input, which goes +// through the Graph's TextureInlet / updateInputTexture() path — a +// fundamentally different mechanism from InjectBuffer's +// halp::gpu_buffer_input (which goes through bufferForInput / Output). +// Hence the split into two distinct node types. +// +// Wiring: +// VideoProducer → InjectTexture(name="base_color_dyn0") +// → ScenePreprocessor → classic_pbr_full +class InjectTexture +{ +public: + halp_meta(name, "Inject Texture") + halp_meta(category, "Visuals/3D/Scene") + halp_meta(c_name, "inject_texture") + halp_meta(authors, "ossia team") + halp_meta( + manual_url, + "https://ossia.io/score-docs/processes/inject-texture.html") + halp_meta(uuid, "3b8d2f7c-9a5e-4f1d-a4c6-6e2d9c4f8a1b") + + struct ins + { + struct + { + halp_meta(name, "Scene In"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_in; + + // Port-driven rebuild: aux_name triggers rebuild(). scene_in + + // texture handle changes detected in operator()() (no port-update + // event fires when a native handle is swapped). + // Live GPU texture from an upstream producer. Null handle → the + // injection is skipped (passthrough). + halp::gpu_texture_input<"Texture"> texture; + + struct : halp::lineedit<"Aux name", ""> + { void update(InjectTexture& n) { n.rebuild(); } } aux_name; + } inputs; + + struct outs + { + struct + { + halp_meta(name, "Scene Out"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_out; + } outputs; + + void rebuild(); + void operator()(); + + std::shared_ptr m_cached_out; + uint8_t m_pending_dirty{0xFF}; + const ossia::scene_state* m_cached_in_state{}; + int64_t m_cached_in_version{-1}; + std::string m_cached_name; + void* m_cached_handle{}; + int64_t m_version_counter{0}; +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/Instancer.cpp b/src/plugins/score-plugin-threedim/Threedim/Instancer.cpp new file mode 100644 index 0000000000..7d7385b5bf --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/Instancer.cpp @@ -0,0 +1,518 @@ +#include "Instancer.hpp" + +#include +#include + +#include +#include +#include +#include + +#include + +namespace Threedim +{ + +namespace +{ + +// Extract the first mesh_component found in a scene tree (depth-first), +// alongside the accumulated `scene_transform` composition encountered +// along the path from `node` to that mesh. The composition is what +// upstream producers use to position their meshes (a glTF root node's +// scale, a Primitive's TRS, etc.); without it, instancing a Duck.gltf +// would draw at the model's intrinsic origin / scale even when the +// upstream node was visibly scaled by the user. +// +// Two behaviours intentionally preserved: +// - First-mesh-only: subtree may contain many meshes; only the first +// in depth-first order is instanced. (The "instance all meshes" +// combobox mode is a future feature.) +// - Sibling scene_transforms BEFORE the mesh ARE composed (matches +// the FlattenVisitor's "transform applies to subsequent siblings" +// contract). Sibling transforms AFTER the mesh would only affect +// later siblings and are correctly ignored here. +struct PrototypeWithTransform +{ + ossia::mesh_component_ptr mesh; + QMatrix4x4 world; // accumulated TRS from `node` down to `mesh` +}; + +namespace +{ +QMatrix4x4 transformToMatrix(const ossia::scene_transform& t) noexcept +{ + QMatrix4x4 m; + m.setToIdentity(); + m.translate(t.translation[0], t.translation[1], t.translation[2]); + m.rotate(QQuaternion(t.rotation[3], t.rotation[0], t.rotation[1], t.rotation[2])); + m.scale(t.scale[0], t.scale[1], t.scale[2]); + return m; +} +} + +PrototypeWithTransform +findFirstMesh(const ossia::scene_node& node, QMatrix4x4 parent = QMatrix4x4{}) noexcept +{ + PrototypeWithTransform out{nullptr, parent}; + if(!node.has_children()) + return out; + + QMatrix4x4 acc = parent; + for(const auto& payload : *node.children) + { + // scene_transform among siblings updates the running composition + // for any subsequent sibling — matching the FlattenVisitor's + // semantics. (See SceneGPUState.cpp:visitPayload scene_transform + // branch.) + if(auto* xform = ossia::get_if(&payload)) + { + acc = acc * transformToMatrix(*xform); + continue; + } + + if(auto* m = ossia::get_if(&payload)) + { + if(*m) + { + out.mesh = *m; + out.world = acc; + return out; + } + } + if(auto* sub = ossia::get_if(&payload)) + { + if(*sub) + { + auto found = findFirstMesh(**sub, acc); + if(found.mesh) + return found; + } + } + } + return out; +} + +// Wrap a halp::gpu_buffer (a thin {handle, byte_size, byte_offset} +// struct) into an ossia::buffer_resource_ptr carrying a +// gpu_buffer_handle variant. Returns null when the input handle is +// null (e.g., no edge wired into that port), letting callers skip +// that slot. +ossia::buffer_resource_ptr +wrapGpuBuffer(const halp::gpu_buffer& buf) noexcept +{ + if(!buf.handle) + return nullptr; + ossia::gpu_buffer_handle gh; + gh.native_handle = buf.handle; + gh.byte_size = buf.byte_size; + gh.byte_offset = buf.byte_offset; + auto res = std::make_shared(); + res->resource = gh; + res->dirty_index = 1; + return res; +} + +// Result of walking a halp::dynamic_gpu_geometry for the attributes +// Instancer knows how to consume. Any slot without a matching +// attribute stays null and falls back to the raw buffer inputs. +struct PointCloudRouting +{ + ossia::buffer_resource_ptr transforms; // translation or transform_matrix + ossia::buffer_resource_ptr colors; // color0 + bool has_matrix{false}; // true if transform_matrix found + int instance_count{-1}; // geometry.vertices, or -1 +}; + +// Resolve a geometry attribute to its source {handle, byte_offset} +// by chasing attribute → input[binding] → buffers[input.buffer]. The +// byte offsets in the attribute and the input add; the final byte +// offset lives on the wrapped buffer_resource. +ossia::buffer_resource_ptr +wrapAttributeAsBuffer(const halp::dynamic_gpu_geometry& mesh, + const halp::geometry_attribute& attr) noexcept +{ + if(attr.binding < 0 || attr.binding >= (int)mesh.input.size()) + return nullptr; + const auto& in = mesh.input[attr.binding]; + if(in.buffer < 0 || in.buffer >= (int)mesh.buffers.size()) + return nullptr; + const auto& b = mesh.buffers[in.buffer]; + if(!b.handle) + return nullptr; + ossia::gpu_buffer_handle gh; + gh.native_handle = b.handle; + gh.byte_size = b.byte_size; + gh.byte_offset = in.byte_offset + attr.byte_offset; + auto res = std::make_shared(); + res->resource = gh; + res->dirty_index = 1; + return res; +} + +PointCloudRouting extractPointCloud( + const halp::dynamic_gpu_geometry& mesh) noexcept +{ + PointCloudRouting out; + if(mesh.buffers.empty() || mesh.attributes.empty()) + return out; + for(const auto& attr : mesh.attributes) + { + using S = halp::attribute_semantic; + switch(attr.semantic) + { + // transform_matrix takes precedence over translation/position + // because it carries the full TRS. + case S::transform_matrix: + out.transforms = wrapAttributeAsBuffer(mesh, attr); + out.has_matrix = true; + break; + case S::translation: + case S::position: + if(!out.has_matrix && !out.transforms) + out.transforms = wrapAttributeAsBuffer(mesh, attr); + break; + case S::color0: + if(!out.colors) + out.colors = wrapAttributeAsBuffer(mesh, attr); + break; + default: + break; + } + } + out.instance_count = mesh.vertices; + return out; +} + +} // namespace + +void Instancer::rebuild() +{ + const auto& in = inputs.scene_in.scene; + const ossia::scene_state* in_state = in.state.get(); + + // Find the prototype mesh in the incoming scene, alongside the + // composed scene_transform from each ancestor walked along the way. + // The composed transform feeds into the wrapped scene_node below + // so the instance cloud honours the upstream's authored TRS (e.g. + // a Primitive node's scale, a glTF root's positioning) rather than + // dropping it on extraction. + ossia::mesh_component_ptr proto; + QMatrix4x4 protoWorld; + protoWorld.setToIdentity(); + if(in.state && in.state->roots) + { + for(const auto& r : *in.state->roots) + { + if(!r) + continue; + auto found = findFirstMesh(*r); + if(found.mesh) + { + proto = found.mesh; + protoWorld = found.world; + break; + } + } + } + + // Point-cloud input takes precedence over the raw buffer inlets + // when it's wired. We detect "wired" as "at least one buffer with + // a non-null handle in the points mesh". The routing struct + // populates transforms / colors from the matching attribute + // semantics; empty routing falls back to the raw buffer ports. + const bool has_points_input + = !inputs.points.mesh.buffers.empty() + && std::any_of( + inputs.points.mesh.buffers.begin(), + inputs.points.mesh.buffers.end(), + [](const halp::geometry_gpu_buffer& b) { return b.handle; }); + PointCloudRouting routing; + if(has_points_input) + routing = extractPointCloud(inputs.points.mesh); + void* points_primary + = has_points_input && !inputs.points.mesh.buffers.empty() + ? inputs.points.mesh.buffers[0].handle + : nullptr; + const int effective_count + = routing.instance_count > 0 ? routing.instance_count + : inputs.count.value; + + // TRS recomputed; we reuse computeTRSMatrix from TransformHelper + // even though we're not targeting a halp::mesh — the cache keeps the + // update hooks simple. + float scratch[16]; + CachedTRS xformCache = m_cachedTRS; + computeTRSMatrix(inputs, scratch, xformCache); + m_cachedTRS = xformCache; + m_cached_in_state = in_state; + m_cached_transforms = inputs.transforms.buffer.handle; + m_cached_colors = inputs.colors.buffer.handle; + m_cached_custom = inputs.custom.buffer.handle; + m_cached_count = effective_count; + m_cached_format = inputs.format.value; + m_cached_points_buf = points_primary; + m_cached_points_vertices = inputs.points.mesh.vertices; + + if(!proto) + { + // No prototype mesh → empty output (but leave the inputs wired, + // so when a mesh appears later we pick it up on the next call). + if(!m_wrapped_state) + m_wrapped_state = std::make_shared(); + m_wrapped_state->roots.reset(); + m_wrapped_state->materials.reset(); + m_wrapped_state->version = ++m_version_counter; + m_wrapped_state->dirty_index = m_version_counter; + m_pending_dirty = 0xFF; + return; + } + + // Build the instance_component. + // Transforms + colors: if a Points input is wired, prefer its + // attributes (transform_matrix / translation / color0). Otherwise + // fall back to the raw buffer inlets. + auto inst = std::make_shared(); + inst->prototype = proto; + inst->instance_count + = effective_count > 0 ? uint32_t(effective_count) : 0u; + inst->instance_transforms + = routing.transforms + ? routing.transforms + : wrapGpuBuffer(inputs.transforms.buffer); + inst->instance_colors + = routing.colors + ? routing.colors + : wrapGpuBuffer(inputs.colors.buffer); + inst->instance_custom = wrapGpuBuffer(inputs.custom.buffer); + + // Transform format: if the Points input provided a transform_matrix + // attribute, force Mat4. Else if it provided translation/position, + // force Translation. Else obey the user's combobox. + if(routing.has_matrix) + { + inst->transform_type + = ossia::instance_component::transform_format::mat4; + } + else if(routing.transforms) + { + inst->transform_type + = ossia::instance_component::transform_format::translation; + } + else + { + switch(inputs.format.value) + { + case TRS: + inst->transform_type = ossia::instance_component::transform_format::trs; + break; + case Translation: + inst->transform_type + = ossia::instance_component::transform_format::translation; + break; + default: + inst->transform_type + = ossia::instance_component::transform_format::mat4; + break; + } + } + inst->dirty_index = ++m_version_counter; + + // Wrap into a scene_node: + // child 0: local-controls scene_transform (Instancer's position / + // rotation / scale knobs). Updates parentWorld for every + // sibling that follows. + // child 1: prototype-ancestor scene_transform (the composed TRS + // that findFirstMesh accumulated walking down to the + // mesh upstream — e.g. the glTF root's scale, or a + // Primitive's TRS if it stamped one). Decomposed back + // into translation/quaternion/scale so the FlattenVisitor + // sees a normal scene_transform; the matrix is converted + // via Qt's decomposition on the off-chance the upstream + // TRS includes shear (rare). When the matrix is identity + // (no upstream transform), this is effectively a no-op + // but is always emitted to keep the child layout stable + // across rebuilds. + // child 2: the instance_component payload. + ossia::scene_transform xform; + xform.translation[0] = inputs.position.value.x; + xform.translation[1] = inputs.position.value.y; + xform.translation[2] = inputs.position.value.z; + auto q = QQuaternion::fromEulerAngles( + inputs.rotation.value.x, inputs.rotation.value.y, + inputs.rotation.value.z); + xform.rotation[0] = q.x(); + xform.rotation[1] = q.y(); + xform.rotation[2] = q.z(); + xform.rotation[3] = q.scalar(); + xform.scale[0] = inputs.scale.value.x; + xform.scale[1] = inputs.scale.value.y; + xform.scale[2] = inputs.scale.value.z; + xform.raw_slot = m_xform_ref; + + // Decompose the prototype-ancestor TRS. QMatrix4x4 doesn't expose a + // single TRS-decomposition call so we pull the columns: column 3 is + // the translation; the upper-left 3×3's column lengths give scale; + // the rotation matrix is the upper-left 3×3 with each column + // normalised. Skips reconstruction (leaves identity defaults) when + // protoWorld is the identity. + ossia::scene_transform protoXform; + protoXform.translation[0] = 0.f; + protoXform.translation[1] = 0.f; + protoXform.translation[2] = 0.f; + protoXform.rotation[0] = 0.f; + protoXform.rotation[1] = 0.f; + protoXform.rotation[2] = 0.f; + protoXform.rotation[3] = 1.f; + protoXform.scale[0] = 1.f; + protoXform.scale[1] = 1.f; + protoXform.scale[2] = 1.f; + if(!protoWorld.isIdentity()) + { + const float* d = protoWorld.constData(); + protoXform.translation[0] = d[12]; + protoXform.translation[1] = d[13]; + protoXform.translation[2] = d[14]; + QVector3D c0(d[0], d[1], d[2]); + QVector3D c1(d[4], d[5], d[6]); + QVector3D c2(d[8], d[9], d[10]); + protoXform.scale[0] = c0.length(); + protoXform.scale[1] = c1.length(); + protoXform.scale[2] = c2.length(); + if(protoXform.scale[0] > 1e-6f) c0 /= protoXform.scale[0]; + if(protoXform.scale[1] > 1e-6f) c1 /= protoXform.scale[1]; + if(protoXform.scale[2] > 1e-6f) c2 /= protoXform.scale[2]; + QMatrix3x3 rotmat; + rotmat(0,0)=c0.x(); rotmat(1,0)=c0.y(); rotmat(2,0)=c0.z(); + rotmat(0,1)=c1.x(); rotmat(1,1)=c1.y(); rotmat(2,1)=c1.z(); + rotmat(0,2)=c2.x(); rotmat(1,2)=c2.y(); rotmat(2,2)=c2.z(); + QQuaternion pq = QQuaternion::fromRotationMatrix(rotmat); + protoXform.rotation[0] = pq.x(); + protoXform.rotation[1] = pq.y(); + protoXform.rotation[2] = pq.z(); + protoXform.rotation[3] = pq.scalar(); + } + // raw_slot stays default (invalid) — this is a synthesized child and + // doesn't need a registry slot. The FlattenVisitor's scene_transform + // branch composes regardless of slot validity. + + auto children = std::make_shared>(); + children->push_back(xform); + children->push_back(protoXform); + children->push_back(ossia::instance_component_ptr(std::move(inst))); + + auto node = std::make_shared(); + node->children = std::move(children); + node->dirty_index = m_version_counter; + + auto roots = std::make_shared>(); + roots->push_back(std::move(node)); + + if(!m_wrapped_state) + m_wrapped_state = std::make_shared(); + m_wrapped_state->roots = std::move(roots); + // Pass through materials / animations / cameras / env from the + // input so PBR shaders still have their material table. + if(in.state) + { + m_wrapped_state->materials = in.state->materials; + m_wrapped_state->animations = in.state->animations; + m_wrapped_state->cameras = in.state->cameras; + m_wrapped_state->skeletons = in.state->skeletons; + m_wrapped_state->environment = in.state->environment; + m_wrapped_state->active_camera_id = in.state->active_camera_id; + } + m_wrapped_state->version = m_version_counter; + m_wrapped_state->dirty_index = m_version_counter; + m_pending_dirty = 0xFF; +} + +void Instancer::operator()() +{ + // Upstream scene_state / buffer-handle / point-cloud dirty flags can + // change without a port-update event — detect here and call + // rebuild(). Controls themselves trigger rebuild via update(). + // + // The Points-input cache also has to compare the current vertex count + // and the primary buffer handle against the cached values written in + // rebuild() (m_cached_points_vertices / m_cached_points_buf). When an + // upstream CSF compute regenerates its point cloud with a different + // count (3500 → 4000) but reuses the same persistent QRhiBuffer, the + // dirty_mesh flag is NOT set (the buffer handle didn't change), and + // without these comparisons Instancer kept publishing the stale + // instance_count. Downstream ScenePreprocessor's update() then took + // its meshesUnchanged early-return; the persistent m_pendingGpuCopies + // queue kept firing the OLD count for the GPU translation/color copy, + // appearing as "instances frozen at the previous count, then snapping + // back at random intervals" whenever some unrelated rebuild kicked in. + const auto& in = inputs.scene_in.scene; + const ossia::scene_state* in_state = in.state.get(); + void* points_primary + = !inputs.points.mesh.buffers.empty() + ? inputs.points.mesh.buffers[0].handle + : nullptr; + const bool upstream_changed + = m_cached_in_state != in_state + || m_cached_transforms != inputs.transforms.buffer.handle + || m_cached_colors != inputs.colors.buffer.handle + || m_cached_custom != inputs.custom.buffer.handle + || m_cached_points_buf != points_primary + || m_cached_points_vertices != inputs.points.mesh.vertices + || inputs.points.dirty_mesh; + if(!m_wrapped_state || upstream_changed) + rebuild(); + outputs.scene_out.scene.state = m_wrapped_state; + outputs.scene_out.dirty = m_pending_dirty; + m_pending_dirty = 0; +} + +void Instancer::init( + score::gfx::RenderList& r, QRhiResourceUpdateBatch& res) +{ + if(!raw_transform_slot.valid()) + { + raw_transform_slot = r.registry().allocate( + score::gfx::GpuResourceRegistry::Arena::RawTransform, + sizeof(score::gfx::RawLocalTransform)); + m_xform_ref = r.registry().toOssiaRef(raw_transform_slot); + } + if(raw_transform_slot.valid()) + { + score::gfx::RawLocalTransform seed{}; + r.registry().updateSlot(res, raw_transform_slot, &seed, sizeof(seed)); + } +} + +void Instancer::update( + score::gfx::RenderList& r, QRhiResourceUpdateBatch& res, score::gfx::Edge*) +{ + if(!raw_transform_slot.valid()) + return; + + score::gfx::RawLocalTransform xform{}; + xform.translation[0] = inputs.position.value.x; + xform.translation[1] = inputs.position.value.y; + xform.translation[2] = inputs.position.value.z; + QQuaternion q = QQuaternion::fromEulerAngles( + inputs.rotation.value.x, inputs.rotation.value.y, + inputs.rotation.value.z); + xform.rotation[0] = q.x(); + xform.rotation[1] = q.y(); + xform.rotation[2] = q.z(); + xform.rotation[3] = q.scalar(); + xform.scale[0] = inputs.scale.value.x; + xform.scale[1] = inputs.scale.value.y; + xform.scale[2] = inputs.scale.value.z; + r.registry().updateSlot(res, raw_transform_slot, &xform, sizeof(xform)); +} + +void Instancer::release(score::gfx::RenderList& r) +{ + if(raw_transform_slot.valid()) + r.registry().free(raw_transform_slot); + m_xform_ref = {}; + // Producer-state-drift Option A — see Light::release. + m_wrapped_state.reset(); +} + +} // namespace Threedim diff --git a/src/plugins/score-plugin-threedim/Threedim/Instancer.hpp b/src/plugins/score-plugin-threedim/Threedim/Instancer.hpp new file mode 100644 index 0000000000..ce5f147366 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/Instancer.hpp @@ -0,0 +1,169 @@ +#pragma once +#include "TransformHelper.hpp" + +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include + +class QRhiResourceUpdateBatch; + +namespace score::gfx +{ +class RenderList; +struct Edge; +} + +namespace Threedim +{ + +// GPU-instancing authoring node. Takes a scene containing a mesh and a +// GPU buffer of per-instance transforms (+ optional colors / custom), +// emits a scene_spec wrapping an `instance_component` that +// ScenePreprocessor forwards to downstream shaders as the standard +// `instance_transforms` / `instance_colors` / `instance_custom` +// auxiliary buffers. +// +// Consumer shaders (classic_pbr_mdi and friends) read the per-instance +// attributes via the existing VERTEX_INPUTS location 3..5 convention +// already in `GeometryToBufferStrategies.hpp`: +// location 3 = per-instance translation / rotation / transform_matrix +// location 4 = per-instance color0 +// location 5 = per-instance (scale / custom) +// No shader edits needed — the aux-buffer naming convention is the +// same one MeshInstancer uses. +// +// Transform formats (packed floats per instance): +// mat4 : 16 floats (full 4×4 matrix, column-major) +// trs : 10 floats (3 translation + 4 quaternion + 3 scale) +// translation : 3 floats (position-only, rotation / scale = identity) +class Instancer +{ +public: + halp_meta(name, "Instancer") + halp_meta(category, "Visuals/3D/Scene") + halp_meta(c_name, "instancer") + halp_meta(authors, "ossia team") + halp_meta( + manual_url, + "https://ossia.io/score-docs/processes/instancer.html") + halp_meta(uuid, "5e8a2c7f-9b4d-4e3a-a1c6-2d7f0b3e8c4a") + + enum TransformFormat + { + Mat4, + TRS, + Translation + }; + + struct ins + { + struct + { + halp_meta(name, "Scene In"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_in; + + halp::gpu_buffer_input<"Transforms"> transforms; + halp::gpu_buffer_input<"Colors"> colors; + halp::gpu_buffer_input<"Custom"> custom; + + // Optional point-cloud geometry input. When wired, its semantic + // attributes override the raw buffer inputs above: + // translation / position → Transforms buffer (Translation mode) + // transform_matrix → Transforms buffer (Mat4 mode) + // color0 → Colors buffer + // The `count` inlet is overridden by the geometry's vertex_count + // when this is wired (so downstream doesn't need to track the + // point-cloud size manually). Lets shaderlib presets + // (RandomScatter, EmitFromMesh, CurlNoiseForce, NoiseField etc.) + // feed Instancer directly without a glue repack. + struct + { + halp_meta(name, "Points"); + halp::dynamic_gpu_geometry mesh; + float transform[16]{}; + bool dirty_mesh = false; + bool dirty_transform = false; + } points; + + // Port-driven rebuild: scalar controls trigger Instancer::rebuild(). + // Upstream scene_in / buffer handles are detected in operator()() + // because they can change without a port-update event. + struct : halp::combobox_t<"Format", TransformFormat> + { + struct range + { + std::string_view values[3]{"mat4", "trs", "translation"}; + int init{0}; + }; + void update(Instancer& n) { n.rebuild(); } + } format; + + struct : halp::spinbox_i32<"Count", halp::irange{1, 1000000, 1}> + { void update(Instancer& n) { n.rebuild(); } } count; + + // Optional TRS applied to the prototype before instancing — lets + // the node place the instanced cloud without a separate + // Transform3D upstream. + struct : PositionControl + { void update(Instancer& n) { n.rebuild(); } } position; + struct : RotationControl + { void update(Instancer& n) { n.rebuild(); } } rotation; + struct : ScaleControl + { void update(Instancer& n) { n.rebuild(); } } scale; + } inputs; + + struct outs + { + struct + { + halp_meta(name, "Scene Out"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_out; + } outputs; + + void rebuild(); + void operator()(); + + void init(score::gfx::RenderList& r, QRhiResourceUpdateBatch& res); + void update( + score::gfx::RenderList& r, QRhiResourceUpdateBatch& res, + score::gfx::Edge* e); + void release(score::gfx::RenderList& r); + + // Cache so we republish a stable shared_ptr when inputs haven't + // changed — ScenePreprocessor's identity caches stay warm. + std::shared_ptr m_wrapped_state; + uint8_t m_pending_dirty{0xFF}; + CachedTRS m_cachedTRS{}; + // Track input identity to detect when a rebuild is needed without + // relying on buffer-contents equality. + const ossia::scene_state* m_cached_in_state{}; + void* m_cached_transforms{}; + void* m_cached_colors{}; + void* m_cached_custom{}; + int32_t m_cached_count{-1}; + int m_cached_format{-1}; + // For the point-cloud input: cache the primary-buffer identity so we + // detect upstream handle replacements without poking every buffer + // every frame. + void* m_cached_points_buf{}; + int64_t m_cached_points_vertices{-1}; + int64_t m_version_counter{0}; + + score::gfx::GpuResourceRegistry::Slot raw_transform_slot; + ossia::gpu_slot_ref m_xform_ref{}; +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/InverseKinematics.hpp b/src/plugins/score-plugin-threedim/Threedim/InverseKinematics.hpp new file mode 100644 index 0000000000..c76823ff4b --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/InverseKinematics.hpp @@ -0,0 +1,313 @@ +#pragma once +#include +#include + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace Threedim +{ + +// Two-bone analytical IK solver operating on a scene_spec's skeleton. +// +// Given a 3-joint chain (root → mid → end), a target world-space position, +// and a pole vector (to disambiguate the elbow plane), produces the joint +// rotations that make the end effector reach — or as close as possible to — +// the target. Law-of-cosines closed form, runs in ~50 floating-point ops, +// no iteration. +// +// The solver reads the input skeleton's TRS, finds the named end joint, +// walks two parents up to identify the chain, and emits a scene_spec with +// ONLY the three joints' local rotations modified. The rest of the +// skeleton and the mesh / material data pass through unchanged. +// +// This is the "reach for that door handle" IK — for full articulated +// rigs with >2 bones, spine chains, or pole-axis constraints, chain a +// sequence of these per limb, or write a FABRIK/CCD successor that +// operates on N-joint chains. The interface is intentionally narrow so +// swapping in more sophisticated solvers later doesn't break patches. +// +// Limitations: +// - no joint-limit / rotation-constraint support yet +// - no twist decomposition +// - chain must be a direct parent line in the skeleton; siblings / branches +// aren't supported +// - target-unreachable case: extends the chain fully toward the target +// (the natural "straight-arm stretch" behaviour). +class InverseKinematics +{ +public: + halp_meta(name, "Inverse Kinematics (2-bone)") + halp_meta(category, "Visuals/3D/Scene") + halp_meta(c_name, "inverse_kinematics") + halp_meta(authors, "ossia team") + halp_meta( + manual_url, + "https://ossia.io/score-docs/processes/inverse-kinematics.html") + halp_meta(uuid, "6e9f2a4c-1b85-4d3e-a7f6-8c2b4d5e9a0f") + + struct ins + { + struct + { + halp_meta(name, "Scene In"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_in; + + halp::lineedit<"End joint name", "hand_r"> end_joint; + + halp::xyz_spinboxes_f32< + "Target", + halp::range{-10000., 10000., 0.}> + target; + halp::xyz_spinboxes_f32< + "Pole vector", + halp::range{-10000., 10000., 0.}> + pole; + + halp::hslider_f32<"Weight", halp::range{0., 1., 1.}> weight; + } inputs; + + struct outs + { + struct + { + halp_meta(name, "Scene Out"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_out; + } outputs; + + std::shared_ptr m_state; + int64_t m_version{0}; + + static QVector3D toVec(const float v[3]) { return QVector3D(v[0], v[1], v[2]); } + static QQuaternion toQuat(const float v[4]) + { + return QQuaternion(v[3], v[0], v[1], v[2]); + } + static void fromQuat(float v[4], const QQuaternion& q) + { + v[0] = q.x(); v[1] = q.y(); v[2] = q.z(); v[3] = q.scalar(); + } + + // Compute world-space position of joint `idx` by walking up the parent + // chain and composing TRS transforms. + static QVector3D worldJointPos( + const ossia::skeleton_component& skel, int32_t idx) + { + if(idx < 0 || idx >= (int32_t)skel.joints.size()) + return QVector3D(); + + // Build a chain from root to idx, then compose forward. + ossia::small_vector chain; + for(int32_t i = idx; i >= 0; i = skel.joints[i].parent_index) + chain.push_back(i); + std::reverse(chain.begin(), chain.end()); + + QMatrix4x4 M; + for(int32_t i : chain) + { + const auto& j = skel.joints[i]; + QMatrix4x4 T; + T.translate(j.translation[0], j.translation[1], j.translation[2]); + T.rotate(QQuaternion( + j.rotation[3], j.rotation[0], j.rotation[1], j.rotation[2])); + T.scale(j.scale[0], j.scale[1], j.scale[2]); + M = M * T; + } + return M.map(QVector3D()); + } + + // 2-bone IK core: given three world positions + target + pole, compute + // the rotations (world-space) to apply at the root and mid joints so that + // end reaches the target. Returns the delta rotations as quaternions. + struct Solution + { + QQuaternion rootDelta; + QQuaternion midDelta; + }; + static Solution solve2Bone( + QVector3D root, QVector3D mid, QVector3D end, + QVector3D target, QVector3D pole) + { + const float eps = 1e-6f; + QVector3D r2m = mid - root; + QVector3D m2e = end - mid; + QVector3D r2e = end - root; + QVector3D r2t = target - root; + + const float lA = r2m.length(); + const float lB = m2e.length(); + const float lTgt = std::min(r2t.length(), lA + lB - eps); + if(lA < eps || lB < eps || lTgt < eps) + return {QQuaternion(), QQuaternion()}; + + // New elbow interior angle via law of cosines: + // cos(theta) = (lA² + lB² - lTgt²) / (2 lA lB) + const float cosNew = std::clamp( + (lA * lA + lB * lB - lTgt * lTgt) / (2.0f * lA * lB), -1.0f, 1.0f); + const float thetaNew = std::acos(cosNew); + + // Current elbow interior angle. + const float cosCur = std::clamp( + QVector3D::dotProduct(-r2m.normalized(), m2e.normalized()), + -1.0f, 1.0f); + const float thetaCur = std::acos(cosCur); + + // Rotation axis for the elbow: perpendicular to the current arm plane, + // oriented by the pole vector so we pick the "elbow side". + QVector3D planeNormal = QVector3D::crossProduct(r2m, m2e); + if(planeNormal.lengthSquared() < eps) + { + // Arm is straight → use pole vector's projected perpendicular. + QVector3D poleDir = (pole - root).normalized(); + planeNormal = QVector3D::crossProduct(r2e.normalized(), poleDir); + if(planeNormal.lengthSquared() < eps) + planeNormal = QVector3D(0, 1, 0); + } + planeNormal.normalize(); + + QQuaternion elbowDelta = QQuaternion::fromAxisAndAngle( + planeNormal, (thetaCur - thetaNew) * 180.0f / float(M_PI)); + + // Rotate the shoulder so the new r2m points toward target minus the + // elbow contribution. + QVector3D r2t_n = r2t.normalized(); + QVector3D r2e_n = r2e.normalized(); + QQuaternion rootDelta = QQuaternion::rotationTo(r2e_n, r2t_n); + + return {rootDelta, elbowDelta}; + } + + void operator()() + { + const auto& in = inputs.scene_in.scene; + if(!in.state || !in.state->roots) + { + outputs.scene_out.scene.state.reset(); + outputs.scene_out.dirty = 0; + return; + } + + // Find the skeleton: first skeleton_component referenced by any mesh. + const ossia::skeleton_component* srcSkel = nullptr; + if(in.state->skeletons && !in.state->skeletons->empty()) + srcSkel = (*in.state->skeletons)[0].get(); + if(!srcSkel || srcSkel->joints.empty()) + { + outputs.scene_out.scene = in; // passthrough + outputs.scene_out.dirty = 0; + return; + } + + const std::string endName = inputs.end_joint.value; + int32_t endIdx = srcSkel->find_joint(endName); + if(endIdx < 0 || srcSkel->joints[endIdx].parent_index < 0) + { + outputs.scene_out.scene = in; + outputs.scene_out.dirty = 0; + return; + } + const int32_t midIdx = srcSkel->joints[endIdx].parent_index; + if(srcSkel->joints[midIdx].parent_index < 0) + { + outputs.scene_out.scene = in; + outputs.scene_out.dirty = 0; + return; + } + const int32_t rootIdx = srcSkel->joints[midIdx].parent_index; + + // Current world-space joint positions. + QVector3D wRoot = worldJointPos(*srcSkel, rootIdx); + QVector3D wMid = worldJointPos(*srcSkel, midIdx); + QVector3D wEnd = worldJointPos(*srcSkel, endIdx); + + QVector3D target( + inputs.target.value.x, inputs.target.value.y, inputs.target.value.z); + QVector3D pole( + inputs.pole.value.x, inputs.pole.value.y, inputs.pole.value.z); + + Solution sol = solve2Bone(wRoot, wMid, wEnd, target, pole); + + // Blend by weight. At weight=0 the output scene is the input unchanged. + const float w = std::clamp(inputs.weight.value, 0.0f, 1.0f); + if(w <= 0.0f) + { + outputs.scene_out.scene = in; + outputs.scene_out.dirty = 0; + return; + } + QQuaternion rootDelta = QQuaternion::slerp(QQuaternion(), sol.rootDelta, w); + QQuaternion midDelta = QQuaternion::slerp(QQuaternion(), sol.midDelta, w); + + // Copy the skeleton and mutate the two rotations. Keep other joints + // untouched so downstream animation / rendering sees a minimal diff. + auto newSkel = std::make_shared(*srcSkel); + + // These deltas are in world space. Translate to local (parent-relative) + // rotation by undoing the parent's accumulated rotation. + auto worldRotOf = [&](int32_t idx) { + QQuaternion q; + for(int32_t i = idx; i >= 0; i = srcSkel->joints[i].parent_index) + { + QQuaternion local( + srcSkel->joints[i].rotation[3], + srcSkel->joints[i].rotation[0], + srcSkel->joints[i].rotation[1], + srcSkel->joints[i].rotation[2]); + q = local * q; + } + return q; + }; + QQuaternion parentRoot = srcSkel->joints[rootIdx].parent_index >= 0 + ? worldRotOf(srcSkel->joints[rootIdx].parent_index) + : QQuaternion(); + QQuaternion parentMid = worldRotOf(rootIdx); + + QQuaternion rootLocalNew + = parentRoot.inverted() * rootDelta * parentRoot + * toQuat(srcSkel->joints[rootIdx].rotation); + QQuaternion midLocalNew + = parentMid.inverted() * midDelta * parentMid + * toQuat(srcSkel->joints[midIdx].rotation); + + fromQuat(newSkel->joints[rootIdx].rotation, rootLocalNew); + fromQuat(newSkel->joints[midIdx].rotation, midLocalNew); + newSkel->dirty_index++; + + // Build the output scene_state — shallow copy of input, swap the + // skeletons vector to contain our mutated skeleton. + if(!m_state || m_state->version != in.state->version - 1) + m_state = std::make_shared(*in.state); + else + *m_state = *in.state; + + auto skels = std::make_shared>(); + if(in.state->skeletons) + *skels = *in.state->skeletons; + if(skels->empty()) + skels->push_back(newSkel); + else + (*skels)[0] = newSkel; + m_state->skeletons = std::move(skels); + m_version++; + m_state->version = m_version; + + outputs.scene_out.scene.state = m_state; + outputs.scene_out.dirty = ossia::scene_port::dirty_transform; + } +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/Light.cpp b/src/plugins/score-plugin-threedim/Threedim/Light.cpp new file mode 100644 index 0000000000..a7982fc79c --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/Light.cpp @@ -0,0 +1,278 @@ +#include "Light.hpp" + +#include +#include + +#include +#include + +namespace Threedim +{ + +namespace +{ +inline ossia::light_type toLightType(Light::Mode m) noexcept +{ + switch(m) + { + case Light::Directional: return ossia::light_type::directional; + case Light::Point: return ossia::light_type::point; + case Light::Spot: return ossia::light_type::spot; + case Light::Rect: return ossia::light_type::rect_area; + case Light::Disk: return ossia::light_type::disk_area; + case Light::Sphere: return ossia::light_type::sphere_area; + case Light::Dome: return ossia::light_type::dome; + } + return ossia::light_type::point; +} + +inline ossia::light_decay toLightDecay(Light::Decay d) noexcept +{ + switch(d) + { + case Light::DecayNone: return ossia::light_decay::none; + case Light::DecayLinear: return ossia::light_decay::linear; + case Light::DecayQuadratic: return ossia::light_decay::quadratic; + case Light::DecayCubic: return ossia::light_decay::cubic; + } + return ossia::light_decay::quadratic; +} +} + +void Light::rebuild() +{ + if(!m_state) + m_state = std::make_shared(); + if(m_light_stable_id == 0) + m_light_stable_id = ossia::mint_stable_id(); + if(m_xform_stable_id == 0) + m_xform_stable_id = ossia::mint_stable_id(); + + auto lc = std::make_shared(); + lc->stable_id = m_light_stable_id; + lc->type = toLightType(Mode(inputs.mode.value)); + lc->decay = toLightDecay(Decay(inputs.decay.value)); + + lc->color[0] = inputs.color.value.r; + lc->color[1] = inputs.color.value.g; + lc->color[2] = inputs.color.value.b; + lc->intensity = inputs.intensity.value; + lc->range = inputs.range.value; + + // Degrees → radians for cone angles. + constexpr float deg2rad = float(M_PI) / 180.f; + lc->inner_cone_angle = inputs.inner_cone.value * deg2rad; + lc->outer_cone_angle = inputs.outer_cone.value * deg2rad; + + // Area-shape dimensions: Rect uses width+height, Disk/Sphere use + // radius. The fields are unused for Directional/Point/Spot but + // setting them anyway is harmless. + lc->width = inputs.width.value; + lc->height = inputs.height.value; + lc->radius = inputs.radius.value; + + lc->shadow.enabled = inputs.cast_shadow.value; + lc->shadow.bias = inputs.shadow_bias.value; + lc->shadow.normal_bias = inputs.shadow_normal_bias.value; + + // Propagate the RawLight arena slot ref (populated in init()). + lc->raw_slot = m_light_ref; + + lc->dirty_index = ++m_version; + + // Standard wrapping: a scene_node holding [scene_transform, + // light_component]. The transform encodes the light's world position + // + orientation; FlattenVisitor pushes that through parentWorld when + // visiting this node, so the light's direction column ends up + // correctly oriented in world space even when the node is placed + // under a parent transform chain. + ossia::scene_transform xform; + xform.stable_id = m_xform_stable_id; + xform.translation[0] = inputs.position.value.x; + xform.translation[1] = inputs.position.value.y; + xform.translation[2] = inputs.position.value.z; + + QQuaternion q = QQuaternion::fromEulerAngles( + inputs.rotation.value.x, + inputs.rotation.value.y, + inputs.rotation.value.z); + + // Directional / spot / area-light direction is determined by the + // node's rotation applied to -Z (Vulkan / glTF convention). When + // the rotation is identity, the light points along -Z. + xform.rotation[0] = q.x(); + xform.rotation[1] = q.y(); + xform.rotation[2] = q.z(); + xform.rotation[3] = q.scalar(); + xform.scale[0] = xform.scale[1] = xform.scale[2] = 1.f; + // Propagate the RawTransform slot ref (populated in init()). + xform.raw_slot = m_xform_ref; + + auto children = std::make_shared>(); + children->push_back(xform); + children->push_back(ossia::light_component_ptr(std::move(lc))); + + auto node = std::make_shared(); + node->children = std::move(children); + + auto roots = std::make_shared>(); + roots->push_back(std::move(node)); + + m_state->roots = std::move(roots); + m_state->version = m_version; + m_pending_dirty = ossia::scene_port::dirty_lights; +} + +void Light::operator()() +{ + if(!m_state) + rebuild(); + outputs.scene_out.scene.state = m_state; + outputs.scene_out.dirty = m_pending_dirty; + m_pending_dirty = 0; +} + +namespace +{ +// Mode → raw type encoding used by RawLightData::local_direction.w and +// LightGPU::position_type.w. Area / dome modes collapse onto punctual +// analogues for the raw arena (directional for dome, point for rect / +// disk / sphere) — area-light shading is a shader-side extension +// scheduled after the preprocessor consumes the raw slots. +inline float toRawLightType(Light::Mode m) noexcept +{ + switch(m) + { + case Light::Directional: return 0.f; + case Light::Point: return 1.f; + case Light::Spot: return 2.f; + case Light::Rect: + case Light::Disk: + case Light::Sphere: return 1.f; + case Light::Dome: return 0.f; + } + return 1.f; +} + +inline uint32_t toRawLightDecay(Light::Decay d) noexcept +{ + return (uint32_t)d; +} +} + +// Order invariant: called by GfxRenderer::initState BEFORE the first +// operator()() and BEFORE processControlIn fires any rebuild() callback. +// m_light_ref / m_xform_ref populated here are therefore safe to read +// in rebuild() without a guard. Adding prepare() to this node breaks the +// invariant — see CpuFilterNode.hpp for details. +void Light::init(score::gfx::RenderList& r, QRhiResourceUpdateBatch& res) +{ + if(!raw_light_slot.valid()) + { + raw_light_slot = r.registry().allocate( + score::gfx::GpuResourceRegistry::Arena::RawLight, + sizeof(score::gfx::RawLightData)); + m_light_ref = r.registry().toOssiaRef(raw_light_slot); + } + if(raw_light_slot.valid()) + { + score::gfx::RawLightData seed{}; + r.registry().updateSlot(res, raw_light_slot, &seed, sizeof(seed)); + } + if(!raw_transform_slot.valid()) + { + raw_transform_slot = r.registry().allocate( + score::gfx::GpuResourceRegistry::Arena::RawTransform, + sizeof(score::gfx::RawLocalTransform)); + m_xform_ref = r.registry().toOssiaRef(raw_transform_slot); + } + if(raw_transform_slot.valid()) + { + score::gfx::RawLocalTransform seed{}; + r.registry().updateSlot(res, raw_transform_slot, &seed, sizeof(seed)); + } +} + +void Light::update( + score::gfx::RenderList& r, QRhiResourceUpdateBatch& res, score::gfx::Edge*) +{ + if(!raw_light_slot.valid()) + return; + + score::gfx::RawLightData raw{}; + raw.color[0] = inputs.color.value.r; + raw.color[1] = inputs.color.value.g; + raw.color[2] = inputs.color.value.b; + raw.color[3] = inputs.intensity.value; + + // Light convention: local -Z is the configured direction. The + // preprocessor's world-matrix pass maps that through the node's + // parent chain + rotation to get the world-space direction used + // by the consumer shader. Keep the canonical local vector here. + raw.local_direction[0] = 0.f; + raw.local_direction[1] = 0.f; + raw.local_direction[2] = -1.f; + raw.local_direction[3] = toRawLightType(Mode(inputs.mode.value)); + + constexpr float deg2rad = float(M_PI) / 180.f; + const float inner_rad = inputs.inner_cone.value * deg2rad; + const float outer_rad = inputs.outer_cone.value * deg2rad; + + raw.range_cone[0] = inputs.range.value; + raw.range_cone[1] = std::cos(inner_rad); + raw.range_cone[2] = std::cos(outer_rad); + raw.range_cone[3] = inputs.shadow_bias.value; + + raw.shadow_enabled = inputs.cast_shadow.value ? 1u : 0u; + raw.decay_mode = toRawLightDecay(Decay(inputs.decay.value)); + raw.normal_bias = inputs.shadow_normal_bias.value; + // Stamp our scene_transform's arena slot index so consumer shaders + // can read world_transforms.data[transform_slot] to compose a world- + // space direction/position from the local-frame fields above. + raw.transform_slot = raw_transform_slot.valid() + ? raw_transform_slot.slot_index + : 0u; + + r.registry().updateSlot(res, raw_light_slot, &raw, sizeof(raw)); + + if(raw_transform_slot.valid()) + { + score::gfx::RawLocalTransform xform{}; + xform.translation[0] = inputs.position.value.x; + xform.translation[1] = inputs.position.value.y; + xform.translation[2] = inputs.position.value.z; + QQuaternion q = QQuaternion::fromEulerAngles( + inputs.rotation.value.x, inputs.rotation.value.y, + inputs.rotation.value.z); + xform.rotation[0] = q.x(); + xform.rotation[1] = q.y(); + xform.rotation[2] = q.z(); + xform.rotation[3] = q.scalar(); + xform.scale[0] = 1.f; + xform.scale[1] = 1.f; + xform.scale[2] = 1.f; + r.registry().updateSlot(res, raw_transform_slot, &xform, sizeof(xform)); + } +} + +void Light::release(score::gfx::RenderList& r) +{ + if(raw_light_slot.valid()) + r.registry().free(raw_light_slot); + if(raw_transform_slot.valid()) + r.registry().free(raw_transform_slot); + m_light_ref = {}; + m_xform_ref = {}; + // Clear the cached scene_state shared_ptr so the next operator()() + // re-runs rebuild() against the post-release registry. Without this, + // an in-place release+init path (relinkGraph / maybeRebuild) would + // republish a state whose lc->raw_slot still embeds the OLD + // (now-freed) slot index. ScenePreprocessor then harvests that + // stale index into scene_light_indices, the rasterizer reads from + // a different slot than the one Light::update() is now writing + // to → wildly wrong lighting that drifts each cycle as the LIFO + // free-list reshuffles. Producer-state-drift Option A. + m_state.reset(); +} + +} // namespace Threedim diff --git a/src/plugins/score-plugin-threedim/Threedim/Light.hpp b/src/plugins/score-plugin-threedim/Threedim/Light.hpp new file mode 100644 index 0000000000..c874affb67 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/Light.hpp @@ -0,0 +1,188 @@ +#pragma once +#include +#include +#include + +#include + +#include + +#include +#include +#include + +#include +#include + +class QRhiResourceUpdateBatch; + +namespace score::gfx +{ +class RenderList; +struct Edge; +} + +namespace Threedim +{ + +// Unified light producer. One node with a mode combobox covers every +// punctual / area light type ossia::light_component defines — +// directional, point, spot, rect, disk, sphere, cylinder, dome — +// mirroring UsdLux's RectLight/DiskLight/SphereLight and glTF +// KHR_lights_punctual. +// +// Emits an ossia::scene_spec containing one scene_node with: +// - child[0] = scene_transform (position + rotation, no scale) +// - child[1] = light_component_ptr +// ScenePreprocessor packs it into the scene-wide `scene_lights` SSBO via +// packLight(). Current consumer shaders (`classic_pbr_*.frag`) only +// sample the common fields (position/direction/color/intensity/range + +// spot cone angles) — area-light shapes pass through correctly but +// are rendered as point-light approximations until shaders add the +// Rect/Disk/Sphere sampling math. That's a shader-side follow-up. +class Light +{ +public: + halp_meta(name, "Light") + halp_meta(category, "Visuals/3D/Scene") + halp_meta(c_name, "light") + halp_meta(authors, "ossia team") + halp_meta( + manual_url, + "https://ossia.io/score-docs/processes/light.html") + halp_meta(uuid, "9f3c1a5e-4b7d-4e2a-8c5f-1d6e0b9a3c7f") + + enum Mode + { + Directional, + Point, + Spot, + Rect, + Disk, + Sphere, + Dome + }; + + enum Decay + { + DecayNone, + DecayLinear, + DecayQuadratic, // physically correct + DecayCubic + }; + + struct ins + { + // Port-driven rebuild: each control's update() callback triggers + // Light::rebuild() on user change. operator()() just republishes. + struct : halp::combobox_t<"Mode", Mode> + { + struct range + { + std::string_view values[7]{ + "Directional", "Point", "Spot", + "Rect", "Disk", "Sphere", "Dome"}; + int init{0}; + }; + void update(Light& n) { n.rebuild(); } + } mode; + + // Common — always applies + struct : halp::color_chooser<"Color"> + { void update(Light& n) { n.rebuild(); } } color; + struct : halp::hslider_f32<"Intensity", halp::range{0., 100., 1.}> + { void update(Light& n) { n.rebuild(); } } intensity; + // range=0 → infinite falloff (directional / dome ignore this field) + struct : halp::hslider_f32<"Range", halp::range{0., 1000., 0.}> + { void update(Light& n) { n.rebuild(); } } range; + + struct : halp::combobox_t<"Falloff", Decay> + { + struct range + { + std::string_view values[4]{ + "None", "Linear", "Quadratic (physical)", "Cubic"}; + int init{2}; + }; + void update(Light& n) { n.rebuild(); } + } decay; + + // Spot cone (radians via hsliders taking degrees; converted in cpp) + struct : halp::hslider_f32<"Inner cone °", halp::range{0., 90., 0.}> + { void update(Light& n) { n.rebuild(); } } inner_cone; + struct : halp::hslider_f32<"Outer cone °", halp::range{0., 90., 45.}> + { void update(Light& n) { n.rebuild(); } } outer_cone; + + // Area shapes + struct : halp::hslider_f32<"Width", halp::range{0.01, 100., 1.}> + { void update(Light& n) { n.rebuild(); } } width; + struct : halp::hslider_f32<"Height", halp::range{0.01, 100., 1.}> + { void update(Light& n) { n.rebuild(); } } height; + struct : halp::hslider_f32<"Radius", halp::range{0.01, 100., 0.5}> + { void update(Light& n) { n.rebuild(); } } radius; + + // Shadow settings + struct : halp::toggle<"Cast shadow"> + { void update(Light& n) { n.rebuild(); } } cast_shadow; + struct : halp::hslider_f32<"Shadow bias", halp::range{0., 0.1, 0.001}> + { void update(Light& n) { n.rebuild(); } } shadow_bias; + struct : halp::hslider_f32<"Shadow normal bias", halp::range{0., 0.1, 0.01}> + { void update(Light& n) { n.rebuild(); } } shadow_normal_bias; + + // Transform: position for positional lights, rotation encodes the + // direction used by Directional / Spot (local -Z mapped to the + // light direction, glTF / Vulkan convention). + struct : PositionControl + { void update(Light& n) { n.rebuild(); } } position; + struct : RotationControl + { void update(Light& n) { n.rebuild(); } } rotation; + } inputs; + + struct outs + { + struct + { + halp_meta(name, "Scene"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_out; + } outputs; + + // Built once from control values whenever a port's update() fires. + // operator()() just republishes m_state. + void rebuild(); + void operator()(); + + // Render-thread hooks. init claims one RawLight slot; update packs + // color / intensity / type / local-direction / range / cone angles / + // decay / shadow into a RawLightData and uploads; release returns + // the slot. Final world-direction composition happens inside the + // preprocessor (parent-chain world matrix), so this slot carries + // only the node-local fields. + void init(score::gfx::RenderList& r, QRhiResourceUpdateBatch& res); + void update( + score::gfx::RenderList& r, QRhiResourceUpdateBatch& res, + score::gfx::Edge* e); + void release(score::gfx::RenderList& r); + + std::shared_ptr m_state; + int64_t m_version{0}; + uint8_t m_pending_dirty{ossia::scene_port::dirty_lights}; + // Stable id for the single light_component this node emits. Minted + // lazily on first rebuild() and reused across all subsequent rebuilds + // so downstream caches (preprocessor fingerprint, SER coherence key) + // stay keyed on identity, not pointer. + uint64_t m_light_stable_id{}; + uint64_t m_xform_stable_id{}; + + score::gfx::GpuResourceRegistry::Slot raw_light_slot; + score::gfx::GpuResourceRegistry::Slot raw_transform_slot; + + // Ossia-facing snapshots. Written once in init() on the render + // thread; copied onto each emitted light_component / scene_transform + // raw_slot in operator()() on the execution thread. + ossia::gpu_slot_ref m_light_ref{}; + ossia::gpu_slot_ref m_xform_ref{}; +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/MaterialOverride.cpp b/src/plugins/score-plugin-threedim/Threedim/MaterialOverride.cpp new file mode 100644 index 0000000000..3c88f07955 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/MaterialOverride.cpp @@ -0,0 +1,219 @@ +#include "MaterialOverride.hpp" + +#include + +namespace Threedim +{ + +namespace +{ + +// Copy a gpu texture handle from halp into an ossia texture_ref. +// We only populate the `texture` field — `source` stays null so the +// ScenePreprocessor's channelDynamicHandle() treats this ref as DYNAMIC. +// Sampler state is left at its default (linear/linear/repeat); can be +// exposed as controls later if needed. +void applyTextureOverride( + ossia::texture_ref& dst, const halp::gpu_texture& src) noexcept +{ + dst.source.reset(); + dst.texture.native_handle = src.handle; + dst.texture.bindless_index = 0; + // sampler stays default +} + +// Decide whether a given material-index should receive overrides, given +// the mode and index inputs. +bool shouldOverride(int idx, int mode, int override_index) noexcept +{ + switch(mode) + { + case MaterialOverride::All: return true; + case MaterialOverride::ByIndex: return idx == override_index; + default: return false; + } +} + +} // namespace + +void MaterialOverride::rebuild() +{ + const auto& in = inputs.scene_in.scene; + const ossia::scene_state* in_state = in.state.get(); + const int64_t in_version = in_state ? in_state->version : -1; + + void* cur_tex[4]{ + inputs.base_color_tex.texture.handle, + inputs.metal_rough_tex.texture.handle, + inputs.normal_tex.texture.handle, + inputs.emissive_tex.texture.handle}; + + // No texture overrides and no factor overrides → passthrough. Keeps + // downstream identity caches warm for the common "unconfigured" case. + const bool any_tex = cur_tex[0] || cur_tex[1] || cur_tex[2] || cur_tex[3]; + const bool any_factor = inputs.use_base_color.value || inputs.use_metallic.value + || inputs.use_roughness.value + || inputs.use_emissive.value; + if(!any_tex && !any_factor) + { + m_cached_out = in.state; + m_pending_dirty = 0xFF; + return; + } + + const float cur_base[4]{ + inputs.base_r.value, inputs.base_g.value, + inputs.base_b.value, inputs.base_a.value}; + const float cur_em[4]{ + inputs.em_r.value, inputs.em_g.value, inputs.em_b.value, + inputs.em_strength.value}; + + m_cached_in_state = in_state; + m_cached_in_version = in_version; + m_cached_mode = inputs.mode.value; + m_cached_index = inputs.index.value; + std::copy(cur_tex, cur_tex + 4, m_cached_tex); + m_cached_use_base = inputs.use_base_color.value; + m_cached_use_metallic = inputs.use_metallic.value; + m_cached_use_roughness = inputs.use_roughness.value; + m_cached_use_emissive = inputs.use_emissive.value; + std::copy(cur_base, cur_base + 4, m_cached_base); + m_cached_metallic = inputs.metallic.value; + m_cached_roughness = inputs.roughness.value; + std::copy(cur_em, cur_em + 4, m_cached_em); + + if(!in_state || !in_state->materials || in_state->materials->empty()) + { + m_cached_out = in.state; + m_pending_dirty = 0xFF; + return; + } + + const auto& src_mats = *in_state->materials; + auto new_mats = std::make_shared>(); + new_mats->reserve(src_mats.size()); + + // Track which source materials we clone this cycle so we can GC stale + // entries from m_clone_cache (freed when upstream shrinks or swaps). + ossia::hash_set seen_src; + seen_src.reserve(src_mats.size()); + + for(std::size_t i = 0; i < src_mats.size(); ++i) + { + const auto& src_mat = src_mats[i]; + if(!src_mat || !shouldOverride((int)i, inputs.mode.value, inputs.index.value)) + { + new_mats->push_back(src_mat); + continue; + } + seen_src.insert(src_mat.get()); + + // Reuse the cached clone shared_ptr if we've cloned this source + // before — MUTATING its fields in place. The shared_ptr address + // stays stable across rebuilds, so the preprocessor's + // m_loaderMaterialSlots keeps the material arena slot allocated + // across frames: no per-frame GC + reallocate churn, Material arena + // content stays hot for SSBO-direct shader reads (task 28a). + // stable_id is inherited from the source via the copy — the + // fingerprint sees the override as the same logical material. + auto it = m_clone_cache.find(src_mat.get()); + std::shared_ptr cloned; + if(it != m_clone_cache.end()) + { + // Reuse: start from the original upstream fields every rebuild to + // avoid accumulating stale override state (e.g. when the user + // toggles 'use_metallic' off, the factor must revert to + // upstream's). + cloned = it->second; + *cloned = *src_mat; + } + else + { + cloned = std::make_shared(*src_mat); + m_clone_cache.emplace(src_mat.get(), cloned); + } + + if(cur_tex[0]) + applyTextureOverride(cloned->base_color_texture, inputs.base_color_tex.texture); + if(cur_tex[1]) + applyTextureOverride( + cloned->metallic_roughness_texture, inputs.metal_rough_tex.texture); + if(cur_tex[2]) + applyTextureOverride(cloned->normal_texture, inputs.normal_tex.texture); + if(cur_tex[3]) + applyTextureOverride(cloned->emissive_texture, inputs.emissive_tex.texture); + + if(inputs.use_base_color.value) + { + cloned->base_color_factor[0] = cur_base[0]; + cloned->base_color_factor[1] = cur_base[1]; + cloned->base_color_factor[2] = cur_base[2]; + cloned->base_color_factor[3] = cur_base[3]; + } + if(inputs.use_metallic.value) + cloned->metallic_factor = inputs.metallic.value; + if(inputs.use_roughness.value) + cloned->roughness_factor = inputs.roughness.value; + if(inputs.use_emissive.value) + { + cloned->emissive_factor[0] = cur_em[0]; + cloned->emissive_factor[1] = cur_em[1]; + cloned->emissive_factor[2] = cur_em[2]; + cloned->emissive_strength = cur_em[3]; + } + + new_mats->push_back(cloned); + } + + // GC cache entries whose source material vanished from upstream. + for(auto it = m_clone_cache.begin(); it != m_clone_cache.end();) + { + if(seen_src.find(it->first) == seen_src.end()) + it = m_clone_cache.erase(it); + else + ++it; + } + + auto state = std::make_shared(); + // Passthrough: roots / cameras / animations / skeletons / environment + // all reference the upstream shared_ptrs (no deep copy). Only materials + // is swapped out. + state->roots = in_state->roots; + state->animations = in_state->animations; + state->cameras = in_state->cameras; + state->skeletons = in_state->skeletons; + state->environment = in_state->environment; + state->active_camera_id = in_state->active_camera_id; + state->materials = std::move(new_mats); + state->version = ++m_version_counter; + state->dirty_index = m_version_counter; + + m_cached_out = state; + m_pending_dirty = 0xFF; +} + +void MaterialOverride::operator()() +{ + // Upstream scene_state and live texture handles can change without a + // port-update event (upstream runs per-tick; video/CSF textures swap + // native handles mid-stream). Detect those here and trigger rebuild. + const auto& in = inputs.scene_in.scene; + const ossia::scene_state* in_state = in.state.get(); + const int64_t in_version = in_state ? in_state->version : -1; + void* cur_tex[4]{ + inputs.base_color_tex.texture.handle, + inputs.metal_rough_tex.texture.handle, + inputs.normal_tex.texture.handle, + inputs.emissive_tex.texture.handle}; + const bool upstream_changed + = m_cached_in_state != in_state || m_cached_in_version != in_version + || m_cached_tex[0] != cur_tex[0] || m_cached_tex[1] != cur_tex[1] + || m_cached_tex[2] != cur_tex[2] || m_cached_tex[3] != cur_tex[3]; + if(!m_cached_out || upstream_changed) + rebuild(); + outputs.scene_out.scene.state = m_cached_out; + outputs.scene_out.dirty = m_pending_dirty; + m_pending_dirty = 0; +} + +} // namespace Threedim diff --git a/src/plugins/score-plugin-threedim/Threedim/MaterialOverride.hpp b/src/plugins/score-plugin-threedim/Threedim/MaterialOverride.hpp new file mode 100644 index 0000000000..a5d554cf24 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/MaterialOverride.hpp @@ -0,0 +1,177 @@ +#pragma once +#include +#include +#include + +#include +#include + +#include +#include +#include + +namespace Threedim +{ + +// Injects runtime GPU textures and/or factor overrides into a scene's +// material table. The primary live-VJ use case: drop a video texture +// (or HDR shader output) onto an existing material without reloading +// the scene. Authored on top of the Dynamic Texture pathway in +// ScenePreprocessor — the texture handle is forwarded verbatim and +// ScenePreprocessor emits it as a `*Dyn` auxiliary-texture binding +// that classic_pbr_full (and any shader opting into the DYNAMIC source +// branch) samples directly. +// +// Scope: the four PBR slots (base color / metal-rough / normal / +// emissive). Occlusion and extension textures (transmission, clearcoat, +// sheen…) are not in the ScenePreprocessor's array pool yet, so +// overriding them here would have no effect downstream. +// +// Mode: +// All — every material in the scene gets the override applied. +// ByIndex — only `scene.state->materials[Index]` is overridden. Other +// materials pass through unchanged. Use Scene Inspector + +// the ByIndex variant to target a single object. +// +// Factor toggles gate whether the scalar/vector controls take effect; +// textures auto-gate on "handle is non-null" so an unwired inlet is a +// no-op regardless of state. +class MaterialOverride +{ +public: + halp_meta(name, "Material Override") + halp_meta(category, "Visuals/3D/Scene") + halp_meta(c_name, "material_override") + halp_meta(authors, "ossia team") + halp_meta( + manual_url, + "https://ossia.io/score-docs/processes/material-override.html") + halp_meta(uuid, "c3d8e5f2-9a4b-4e7d-b8c1-2f6a9e3d5b7c") + + enum Mode + { + All, + ByIndex + }; + + struct ins + { + struct + { + halp_meta(name, "Scene In"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_in; + + // Port-driven rebuild: scalar controls trigger rebuild() via their + // update() callbacks. Texture handles are checked in operator()() + // because their native handles can change without a port-update + // event (live video / CSF outputs swap native handles mid-stream). + struct : halp::combobox_t<"Mode", Mode> + { + struct range + { + std::string_view values[2]{"All", "By Index"}; + int init{0}; + }; + void update(MaterialOverride& n) { n.rebuild(); } + } mode; + struct : halp::spinbox_i32<"Index", halp::irange{0, 4096, 0}> + { void update(MaterialOverride& n) { n.rebuild(); } } index; + + // Texture overrides. Unwired (handle==nullptr) → pass through. + // Handle changes detected in operator()(), not via control update(). + halp::gpu_texture_input<"Base Color Tex"> base_color_tex; + halp::gpu_texture_input<"Metal Rough Tex"> metal_rough_tex; + halp::gpu_texture_input<"Normal Tex"> normal_tex; + halp::gpu_texture_input<"Emissive Tex"> emissive_tex; + + // Factor overrides. Gated on the companion toggles; otherwise the + // original factor from the loader passes through. + struct : halp::toggle<"Use base color"> + { void update(MaterialOverride& n) { n.rebuild(); } } use_base_color; + struct : halp::hslider_f32<"R", halp::range{0., 1., 1.}> + { void update(MaterialOverride& n) { n.rebuild(); } } base_r; + struct : halp::hslider_f32<"G", halp::range{0., 1., 1.}> + { void update(MaterialOverride& n) { n.rebuild(); } } base_g; + struct : halp::hslider_f32<"B", halp::range{0., 1., 1.}> + { void update(MaterialOverride& n) { n.rebuild(); } } base_b; + struct : halp::hslider_f32<"A", halp::range{0., 1., 1.}> + { void update(MaterialOverride& n) { n.rebuild(); } } base_a; + + struct : halp::toggle<"Use metallic"> + { void update(MaterialOverride& n) { n.rebuild(); } } use_metallic; + struct : halp::hslider_f32<"Metallic", halp::range{0., 1., 0.}> + { void update(MaterialOverride& n) { n.rebuild(); } } metallic; + + struct : halp::toggle<"Use roughness"> + { void update(MaterialOverride& n) { n.rebuild(); } } use_roughness; + struct : halp::hslider_f32<"Roughness", halp::range{0., 1., 0.5}> + { void update(MaterialOverride& n) { n.rebuild(); } } roughness; + + struct : halp::toggle<"Use emissive"> + { void update(MaterialOverride& n) { n.rebuild(); } } use_emissive; + struct : halp::hslider_f32<"Emissive R", halp::range{0., 10., 0.}> + { void update(MaterialOverride& n) { n.rebuild(); } } em_r; + struct : halp::hslider_f32<"Emissive G", halp::range{0., 10., 0.}> + { void update(MaterialOverride& n) { n.rebuild(); } } em_g; + struct : halp::hslider_f32<"Emissive B", halp::range{0., 10., 0.}> + { void update(MaterialOverride& n) { n.rebuild(); } } em_b; + struct : halp::hslider_f32<"Emissive strength", halp::range{0., 10., 1.}> + { void update(MaterialOverride& n) { n.rebuild(); } } em_strength; + } inputs; + + struct outs + { + struct + { + halp_meta(name, "Scene Out"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_out; + } outputs; + + void rebuild(); + void operator()(); + + // Cached output; stable shared_ptr identity when inputs haven't + // changed so ScenePreprocessor's per-frame fingerprint fast-path stays + // warm. Dynamic-texture swaps still propagate because ScenePreprocessor + // refreshes its dynamic-slot map every frame (keyed on native_handle). + std::shared_ptr m_cached_out; + uint8_t m_pending_dirty{0xFF}; + + // Cache of override clones keyed by source material_component*. We + // reuse the same std::shared_ptr clone + // across rebuilds when the source is unchanged, MUTATING its fields + // in place. That keeps the shared_ptr address stable → the + // preprocessor's m_loaderMaterialSlots keeps the arena slot allocated + // across frames → no per-frame GC + re-allocate cycle → the Material + // arena SSBO content is stable without churn. When the upstream + // material list changes structurally, stale cache entries are + // garbage-collected in rebuild(). + ossia::hash_map< + const ossia::material_component*, + std::shared_ptr> + m_clone_cache; + + // Identity cache: (input-scene pointer, input version, control values, + // texture handles). If all match, we reuse m_cached_out without + // rebuilding the materials list. + const ossia::scene_state* m_cached_in_state{}; + int64_t m_cached_in_version{-1}; + int m_cached_mode{-1}; + int m_cached_index{-1}; + void* m_cached_tex[4]{}; + bool m_cached_use_base{false}; + bool m_cached_use_metallic{false}; + bool m_cached_use_roughness{false}; + bool m_cached_use_emissive{false}; + float m_cached_base[4]{}; + float m_cached_metallic{-1.f}; + float m_cached_roughness{-1.f}; + float m_cached_em[4]{}; + int64_t m_version_counter{0}; +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/MergeGeometries/Executor.cpp b/src/plugins/score-plugin-threedim/Threedim/MergeGeometries/Executor.cpp new file mode 100644 index 0000000000..f9ee79a031 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/MergeGeometries/Executor.cpp @@ -0,0 +1,58 @@ +#include "Executor.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +namespace Gfx::MergeGeometries +{ +class merge_geometries_exec_node final : public gfx_exec_node +{ +public: + merge_geometries_exec_node(GfxExecutionAction& ctx) + : gfx_exec_node{ctx} + { + } + + void init() + { + auto node = std::make_unique(); + id = exec_context->ui->register_node(std::move(node)); + } + + ~merge_geometries_exec_node() { exec_context->ui->unregister_node(id); } + + std::string label() const noexcept override { return "Gfx::MergeGeometries_node"; } +}; + +ProcessExecutorComponent::ProcessExecutorComponent( + Gfx::MergeGeometries::Model& element, + const Execution::Context& ctx, + QObject* parent) + : ProcessComponent_T{element, ctx, "mergeGeometriesComponent", parent} +{ + auto n = ossia::make_node( + *ctx.execState, ctx.doc.plugin().exec); + + for(int i = 0; i < 8; ++i) + n->add_geometry(); + n->add_geometry_out(); + n->init(); + + this->node = n; + m_ossia_process = std::make_shared(n); +} + +void ProcessExecutorComponent::cleanup() +{ + ProcessComponent_T::cleanup(); +} +} diff --git a/src/plugins/score-plugin-threedim/Threedim/MergeGeometries/Executor.hpp b/src/plugins/score-plugin-threedim/Threedim/MergeGeometries/Executor.hpp new file mode 100644 index 0000000000..ea2a7dff4c --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/MergeGeometries/Executor.hpp @@ -0,0 +1,22 @@ +#pragma once +#include + +#include + +namespace Gfx::MergeGeometries +{ +class Model; +class ProcessExecutorComponent final + : public Execution:: + ProcessComponent_T +{ + COMPONENT_METADATA("b7c8d9e0-f1a2-4b3c-8d4e-5f6a7b8c9d0e") +public: + ProcessExecutorComponent( + Model& element, const Execution::Context& ctx, QObject* parent); + void cleanup() override; +}; + +using ProcessExecutorComponentFactory + = Execution::ProcessComponentFactory_T; +} diff --git a/src/plugins/score-plugin-threedim/Threedim/MergeGeometries/Metadata.hpp b/src/plugins/score-plugin-threedim/Threedim/MergeGeometries/Metadata.hpp new file mode 100644 index 0000000000..f3c8f0df39 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/MergeGeometries/Metadata.hpp @@ -0,0 +1,22 @@ +#pragma once +#include + +namespace Gfx::MergeGeometries +{ +class Model; +} + +PROCESS_METADATA( + , Gfx::MergeGeometries::Model, "e8f7a6b5-c4d3-4e2f-1a0b-9c8d7e6f5a4b", + "mergegeometries", + "Merge Geometries", + Process::ProcessCategory::Visual, + "Visuals/3D/Scene", + "Concatenate N upstream geometry_specs into one for a single downstream renderer", + "ossia team", + (QStringList{"gfx", "geometry", "merge", "3d", "scene"}), + {}, + {}, + QUrl{}, + Process::ProcessFlags::SupportsAll +) diff --git a/src/plugins/score-plugin-threedim/Threedim/MergeGeometries/Process.cpp b/src/plugins/score-plugin-threedim/Threedim/MergeGeometries/Process.cpp new file mode 100644 index 0000000000..eb5d146c32 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/MergeGeometries/Process.cpp @@ -0,0 +1,74 @@ +#include "Process.hpp" + +#include + +#include + +#include +#include + +#include + +W_OBJECT_IMPL(Gfx::MergeGeometries::Model) +namespace Gfx::MergeGeometries +{ + +Model::Model( + const TimeVal& duration, const Id& id, QObject* parent) + : Process::ProcessModel{duration, id, "gfxProcess", parent} +{ + metadata().setInstanceName(*this); + init(); +} + +Model::~Model() = default; + +void Model::init() +{ + if(m_inlets.empty() && m_outlets.empty()) + { + for(int i = 0; i < 8; ++i) + { + QString name = QStringLiteral("Geometry %1").arg(i + 1); + m_inlets.push_back(new GeometryInlet{name, Id(i), this}); + } + m_outlets.push_back(new GeometryOutlet{"Merged", Id(0), this}); + } +} + +QString Model::prettyName() const noexcept +{ + return tr("Merge Geometries"); +} + +} + +template <> +void DataStreamReader::read(const Gfx::MergeGeometries::Model& proc) +{ + readPorts(*this, proc.m_inlets, proc.m_outlets); + insertDelimiter(); +} + +template <> +void DataStreamWriter::write(Gfx::MergeGeometries::Model& proc) +{ + writePorts( + *this, components.interfaces(), proc.m_inlets, + proc.m_outlets, &proc); + checkDelimiter(); +} + +template <> +void JSONReader::read(const Gfx::MergeGeometries::Model& proc) +{ + readPorts(*this, proc.m_inlets, proc.m_outlets); +} + +template <> +void JSONWriter::write(Gfx::MergeGeometries::Model& proc) +{ + writePorts( + *this, components.interfaces(), proc.m_inlets, + proc.m_outlets, &proc); +} diff --git a/src/plugins/score-plugin-threedim/Threedim/MergeGeometries/Process.hpp b/src/plugins/score-plugin-threedim/Threedim/MergeGeometries/Process.hpp new file mode 100644 index 0000000000..0ed0735e6a --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/MergeGeometries/Process.hpp @@ -0,0 +1,37 @@ +#pragma once +#include +#include +#include +#include + +namespace Gfx::MergeGeometries +{ +class Model final : public Process::ProcessModel +{ + SCORE_SERIALIZE_FRIENDS + PROCESS_METADATA_IMPL(Gfx::MergeGeometries::Model) + W_OBJECT(Model) + +public: + Model( + const TimeVal& duration, + const Id& id, + QObject* parent); + + template + Model(Impl& vis, QObject* parent) + : Process::ProcessModel{vis, parent} + { + vis.writeTo(*this); + init(); + } + + ~Model() override; + +private: + void init(); + QString prettyName() const noexcept override; +}; + +using ProcessFactory = Process::ProcessFactory_T; +} diff --git a/src/plugins/score-plugin-threedim/Threedim/ModelDisplay/ModelDisplayNode.cpp b/src/plugins/score-plugin-threedim/Threedim/ModelDisplay/ModelDisplayNode.cpp index 32a1b21ac6..c9974a09ef 100644 --- a/src/plugins/score-plugin-threedim/Threedim/ModelDisplay/ModelDisplayNode.cpp +++ b/src/plugins/score-plugin-threedim/Threedim/ModelDisplay/ModelDisplayNode.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -62,6 +63,8 @@ layout(std140, binding = 2) uniform camera_t { \n\ mat4 matrixProjection; \n\ mat3 matrixNormal; \n\ float fov; \n\ + float near; \n\ + float far; \n\ } camera; \n\ \n\ " @@ -81,29 +84,160 @@ float gl_PointSize; const constexpr auto vtx_projection_perspective = R"_( vec4 v_projected = camera.matrixModelViewProjection * vec4(in_position.xyz, 1.0); )_"; -const constexpr auto vtx_projection_fulldome = R"_( -vec4 v_projected = vec4(1.0); +// ---------------------------------------------------------------------------- +// Fulldome fisheye projections +// +// All four variants share the same θ/φ derivation and the same reverse-Z +// depth; they differ only in the `r_ndc = f(θ)` mapping. Kept as separate +// vertex-shader snippets (rather than a runtime branch on a uniform) so +// the GPU dispatches branch-free code for the selected projection. +// +// equidistant — r = θ / (FOV/2) (domemaster, uniform angular resolution; default) +// equisolid — r = sin(θ/2) / sin(FOV/4) (equal-area; typical of photographic fisheye lenses) +// stereographic — r = tan(θ/2) / tan(FOV/4) (conformal; "little planet" look) +// orthographic — r = sin(θ) / sin(FOV/2) (parallel-projection sphere; FOV ≤ 180° only) +// +// Points with r_ndc > 1 fall outside the NDC unit square and are hardware- +// clipped, so FOV > 180° works out of the box for equidistant / equisolid / +// stereographic. Orthographic cannot exceed 180° geometrically. +// ---------------------------------------------------------------------------- +const constexpr auto vtx_projection_fulldome_equidistant = R"_( +// +// Fulldome / domemaster (equidistant angular fisheye). +// +// r_2D = theta / (fov/2) — radial image distance (NDC units) +// theta = angle from dome forward axis (view-space +Z in this convention) +// phi = azimuth around forward axis +// +// Convention kept from the original implementation: the .xzy swizzle re- +// orients world +Z as dome-up, world +Y as dome-forward; the view matrix +// then places the zenith along view-space +Z. +// +// Works for FOV > 180° (e.g. 240°): points with theta > FOV/2 land outside +// the NDC unit square and get hardware-clipped. For point clouds each +// vertex is a single point, so no per-primitive clipping subtleties. +// +// Depth: linear reverse-Z in radial distance. z_gl in [-1,+1] such that +// renderer.clipSpaceCorrMatrix (GL→Vulkan Z remap) yields z_vulkan=1 at +// near, z_vulkan=0 at far. Matches the project-wide reverse-Z convention +// (depth cleared to 0.0, compare op Greater). +// +vec4 v_projected = vec4(0.0, 0.0, 0.0, 1.0); { vec4 viewspace = camera.matrixModelView * vec4(in_position.xzy, 1.0); - // Code from Emmanuel Durand: - // https://emmanueldurand.net/spherical_projection/ - // - inlined as another function injected could be called toSphere or do #define pi. yay GLSL... - float r = length(viewspace.xyz); - float val = clamp(viewspace.z / r, -1.0, 1.0); - float theta = atan(length(viewspace.xy), viewspace.z); - - val = viewspace.x / (r * sin(theta)); - float first = acos(clamp(val, -1.0, 1.0)); - val = viewspace.y / (r * sin(theta)); - float second = asin(clamp(val, -1.0, 1.0)); - - float phi = mix(2.0 * 3.14159265358979323846264338327 - first, first, second >= 0.0); - const float proj_ratio = 3.14159265358979323846264338327 / (360.0 / camera.fov); - v_projected.x = theta * cos(phi); - v_projected.y = theta * sin(phi); - v_projected.y /= proj_ratio; - v_projected.x /= proj_ratio; - v_projected.z = r / 1000.; + vec3 d = viewspace.xyz; + float r = length(d); + + const float PI = 3.14159265358979323846264338327; + + if(r > 1e-6) + { + float theta = acos(clamp(d.z / r, -1.0, 1.0)); + float phi = (length(d.xy) > 1e-6) ? atan(d.y, d.x) : 0.0; + float half_fov_rad = max(radians(camera.fov * 0.5), 1e-6); + float r_ndc = theta / half_fov_rad; + + v_projected.x = r_ndc * cos(phi); + v_projected.y = r_ndc * sin(phi); + } + + // Reverse-Z linear depth: z_gl = 1 at r = near (gets remapped to + // z_vulkan = 1 by clipSpaceCorrMatrix), z_gl = -1 at r = far. + float t = clamp( + (r - camera.near) / max(camera.far - camera.near, 1e-6), + 0.0, 1.0); + v_projected.z = 1.0 - 2.0 * t; + v_projected.w = 1.0; +} +)_"; + +// Equisolid-angle (equal-area fisheye). Matches the response of most +// physical fisheye camera lenses (Nikon, Canon). Areas-on-the-sphere map +// to equal areas-on-the-image, so the edge gets less angular resolution +// than the centre. +const constexpr auto vtx_projection_fulldome_equisolid = R"_( +vec4 v_projected = vec4(0.0, 0.0, 0.0, 1.0); +{ + vec4 viewspace = camera.matrixModelView * vec4(in_position.xzy, 1.0); + vec3 d = viewspace.xyz; + float r = length(d); + + if(r > 1e-6) + { + float theta = acos(clamp(d.z / r, -1.0, 1.0)); + float phi = (length(d.xy) > 1e-6) ? atan(d.y, d.x) : 0.0; + float quarter_fov_rad = max(radians(camera.fov * 0.25), 1e-6); + float r_ndc = sin(theta * 0.5) / sin(quarter_fov_rad); + + v_projected.x = r_ndc * cos(phi); + v_projected.y = r_ndc * sin(phi); + } + + float t = clamp( + (r - camera.near) / max(camera.far - camera.near, 1e-6), + 0.0, 1.0); + v_projected.z = 1.0 - 2.0 * t; + v_projected.w = 1.0; +} +)_"; + +// Stereographic fisheye. Conformal — local angles / shapes preserved, +// circles on the sphere stay circles in the image. No edge compression of +// shape. Good for VR / architectural preview, less good for uniform +// pixel-per-degree on a dome. +const constexpr auto vtx_projection_fulldome_stereographic = R"_( +vec4 v_projected = vec4(0.0, 0.0, 0.0, 1.0); +{ + vec4 viewspace = camera.matrixModelView * vec4(in_position.xzy, 1.0); + vec3 d = viewspace.xyz; + float r = length(d); + + if(r > 1e-6) + { + float theta = acos(clamp(d.z / r, -1.0, 1.0)); + float phi = (length(d.xy) > 1e-6) ? atan(d.y, d.x) : 0.0; + float quarter_fov_rad = max(radians(camera.fov * 0.25), 1e-6); + // tan diverges at θ=π; rely on hardware clipping for θ ≥ FOV/2. + float r_ndc = tan(theta * 0.5) / tan(quarter_fov_rad); + + v_projected.x = r_ndc * cos(phi); + v_projected.y = r_ndc * sin(phi); + } + + float t = clamp( + (r - camera.near) / max(camera.far - camera.near, 1e-6), + 0.0, 1.0); + v_projected.z = 1.0 - 2.0 * t; + v_projected.w = 1.0; +} +)_"; + +// Orthographic sphere projection. Parallel projection — the image looks +// like a billiard-ball photographed from infinity. FOV must be ≤ 180°; +// beyond that the mapping collapses (sin(θ) decreases past π/2). +const constexpr auto vtx_projection_fulldome_orthographic = R"_( +vec4 v_projected = vec4(0.0, 0.0, 0.0, 1.0); +{ + vec4 viewspace = camera.matrixModelView * vec4(in_position.xzy, 1.0); + vec3 d = viewspace.xyz; + float r = length(d); + + if(r > 1e-6) + { + float theta = acos(clamp(d.z / r, -1.0, 1.0)); + float phi = (length(d.xy) > 1e-6) ? atan(d.y, d.x) : 0.0; + float half_fov_rad = max(radians(camera.fov * 0.5), 1e-6); + float r_ndc = sin(theta) / sin(half_fov_rad); + + v_projected.x = r_ndc * cos(phi); + v_projected.y = r_ndc * sin(phi); + } + + float t = clamp( + (r - camera.near) / max(camera.far - camera.near, 1e-6), + 0.0, 1.0); + v_projected.z = 1.0 - 2.0 * t; + v_projected.w = 1.0; } )_"; const constexpr auto vtx_output_process_triangle = R"_()_"; @@ -149,6 +283,17 @@ void main() %vtx_do_projection% gl_Position = renderer.clipSpaceCorrMatrix * v_projected; +#if defined(QSHADER_HLSL) || defined(QSHADER_MSL) + // Match the codebase Y-handling convention used by ImageNode et al.: + // GL is Y-up framebuffer (no flip), Vulkan's Y flip is baked into + // QRhi's clipSpaceCorrMatrix, but D3D/Metal share Vulkan's framebuffer + // origin without sharing its NDC sign convention — so we flip here so + // the offscreen texture lands top-row-first like the other backends, + // and the screen compositor (ScaledRenderer) keeps its SPIRV-only UV + // flip. Without this the model rendered fine on GL/Vulkan but ended + // up upside-down on D3D11/12. + gl_Position.y = -gl_Position.y; +#endif %vtx_output_process% } @@ -237,6 +382,17 @@ void main() %vtx_do_projection% gl_Position = renderer.clipSpaceCorrMatrix * v_projected; +#if defined(QSHADER_HLSL) || defined(QSHADER_MSL) + // Match the codebase Y-handling convention used by ImageNode et al.: + // GL is Y-up framebuffer (no flip), Vulkan's Y flip is baked into + // QRhi's clipSpaceCorrMatrix, but D3D/Metal share Vulkan's framebuffer + // origin without sharing its NDC sign convention — so we flip here so + // the offscreen texture lands top-row-first like the other backends, + // and the screen compositor (ScaledRenderer) keeps its SPIRV-only UV + // flip. Without this the model rendered fine on GL/Vulkan but ended + // up upside-down on D3D11/12. + gl_Position.y = -gl_Position.y; +#endif %vtx_output_process% } @@ -292,6 +448,17 @@ void main() %vtx_do_projection% gl_Position = renderer.clipSpaceCorrMatrix * v_projected; +#if defined(QSHADER_HLSL) || defined(QSHADER_MSL) + // Match the codebase Y-handling convention used by ImageNode et al.: + // GL is Y-up framebuffer (no flip), Vulkan's Y flip is baked into + // QRhi's clipSpaceCorrMatrix, but D3D/Metal share Vulkan's framebuffer + // origin without sharing its NDC sign convention — so we flip here so + // the offscreen texture lands top-row-first like the other backends, + // and the screen compositor (ScaledRenderer) keeps its SPIRV-only UV + // flip. Without this the model rendered fine on GL/Vulkan but ended + // up upside-down on D3D11/12. + gl_Position.y = -gl_Position.y; +#endif %vtx_output_process% } @@ -358,6 +525,17 @@ void main() %vtx_do_projection% gl_Position = renderer.clipSpaceCorrMatrix * v_projected; +#if defined(QSHADER_HLSL) || defined(QSHADER_MSL) + // Match the codebase Y-handling convention used by ImageNode et al.: + // GL is Y-up framebuffer (no flip), Vulkan's Y flip is baked into + // QRhi's clipSpaceCorrMatrix, but D3D/Metal share Vulkan's framebuffer + // origin without sharing its NDC sign convention — so we flip here so + // the offscreen texture lands top-row-first like the other backends, + // and the screen compositor (ScaledRenderer) keeps its SPIRV-only UV + // flip. Without this the model rendered fine on GL/Vulkan but ended + // up upside-down on D3D11/12. + gl_Position.y = -gl_Position.y; +#endif %vtx_output_process% } @@ -413,6 +591,17 @@ void main() %vtx_do_projection% gl_Position = renderer.clipSpaceCorrMatrix * v_projected; +#if defined(QSHADER_HLSL) || defined(QSHADER_MSL) + // Match the codebase Y-handling convention used by ImageNode et al.: + // GL is Y-up framebuffer (no flip), Vulkan's Y flip is baked into + // QRhi's clipSpaceCorrMatrix, but D3D/Metal share Vulkan's framebuffer + // origin without sharing its NDC sign convention — so we flip here so + // the offscreen texture lands top-row-first like the other backends, + // and the screen compositor (ScaledRenderer) keeps its SPIRV-only UV + // flip. Without this the model rendered fine on GL/Vulkan but ended + // up upside-down on D3D11/12. + gl_Position.y = -gl_Position.y; +#endif %vtx_output_process% } @@ -461,6 +650,17 @@ void main() %vtx_do_projection% gl_Position = renderer.clipSpaceCorrMatrix * v_projected; +#if defined(QSHADER_HLSL) || defined(QSHADER_MSL) + // Match the codebase Y-handling convention used by ImageNode et al.: + // GL is Y-up framebuffer (no flip), Vulkan's Y flip is baked into + // QRhi's clipSpaceCorrMatrix, but D3D/Metal share Vulkan's framebuffer + // origin without sharing its NDC sign convention — so we flip here so + // the offscreen texture lands top-row-first like the other backends, + // and the screen compositor (ScaledRenderer) keeps its SPIRV-only UV + // flip. Without this the model rendered fine on GL/Vulkan but ended + // up upside-down on D3D11/12. + gl_Position.y = -gl_Position.y; +#endif %vtx_output_process% } @@ -510,6 +710,17 @@ void main() %vtx_do_projection% gl_Position = renderer.clipSpaceCorrMatrix * v_projected; +#if defined(QSHADER_HLSL) || defined(QSHADER_MSL) + // Match the codebase Y-handling convention used by ImageNode et al.: + // GL is Y-up framebuffer (no flip), Vulkan's Y flip is baked into + // QRhi's clipSpaceCorrMatrix, but D3D/Metal share Vulkan's framebuffer + // origin without sharing its NDC sign convention — so we flip here so + // the offscreen texture lands top-row-first like the other backends, + // and the screen compositor (ScaledRenderer) keeps its SPIRV-only UV + // flip. Without this the model rendered fine on GL/Vulkan but ended + // up upside-down on D3D11/12. + gl_Position.y = -gl_Position.y; +#endif %vtx_output_process% } @@ -557,6 +768,17 @@ void main() %vtx_do_projection% gl_Position = renderer.clipSpaceCorrMatrix * v_projected; +#if defined(QSHADER_HLSL) || defined(QSHADER_MSL) + // Match the codebase Y-handling convention used by ImageNode et al.: + // GL is Y-up framebuffer (no flip), Vulkan's Y flip is baked into + // QRhi's clipSpaceCorrMatrix, but D3D/Metal share Vulkan's framebuffer + // origin without sharing its NDC sign convention — so we flip here so + // the offscreen texture lands top-row-first like the other backends, + // and the screen compositor (ScaledRenderer) keeps its SPIRV-only UV + // flip. Without this the model rendered fine on GL/Vulkan but ended + // up upside-down on D3D11/12. + gl_Position.y = -gl_Position.y; +#endif %vtx_output_process% } @@ -608,7 +830,19 @@ class ModelDisplayNode::Renderer : public GenericNodeRenderer QShader viewspaceVS, viewspaceFS; QShader barycentricVS, barycentricFS; QShader colorVS, colorFS; - } triangle_perspective, point_perspective, triangle_fulldome, point_fulldome; + }; + + // Camera mode enum — matches the UI ordering. Index into + // triangle_shaders / point_shaders arrays. + // + // 0 = Perspective + // 1 = Fulldome (Equidistant, domemaster) + // 2 = Fulldome (Equisolid-angle, photographic fisheye) + // 3 = Fulldome (Stereographic, conformal) + // 4 = Fulldome (Orthographic, ≤ 180° only) + static constexpr int CAMERA_MODE_COUNT = 5; + RenderShaders triangle_shaders[CAMERA_MODE_COUNT]; + RenderShaders point_shaders[CAMERA_MODE_COUNT]; int64_t meshChangedIndex{-1}; int m_curShader{0}; @@ -870,32 +1104,14 @@ class ModelDisplayNode::Renderer : public GenericNodeRenderer m_blend_alpha_op = n.blend_alpha_op; m_blend_enabled = n.blend_enabled; - switch(m_draw_mode) - { - case 0: - case 2: - switch(m_camera_mode) - { - case 0: - initPasses_impl(renderer, mesh, triangle_perspective); - break; - case 1: - initPasses_impl(renderer, mesh, triangle_fulldome); - break; - } - break; - case 1: - switch(m_camera_mode) - { - case 0: - initPasses_impl(renderer, mesh, point_perspective); - break; - case 1: - initPasses_impl(renderer, mesh, point_fulldome); - break; - } - break; - } + // Pick triangle- vs point-topology shader set, then index by + // camera_mode. Values outside [0, CAMERA_MODE_COUNT) clamp to + // perspective so a stale UI value never indexes out-of-bounds. + const int mode = (m_camera_mode >= 0 && m_camera_mode < CAMERA_MODE_COUNT) + ? m_camera_mode + : 0; + auto& set = (m_draw_mode == 1) ? point_shaders[mode] : triangle_shaders[mode]; + initPasses_impl(renderer, mesh, set); QRhiGraphicsPipeline::TargetBlend blend; blend.enable = m_blend_enabled; @@ -908,24 +1124,33 @@ class ModelDisplayNode::Renderer : public GenericNodeRenderer for(auto& [e, pass] : this->m_p) { - pass.pipeline->destroy(); + pass.p.pipeline->destroy(); - pass.pipeline->setTargetBlends({blend}); + pass.p.pipeline->setTargetBlends({blend}); switch(m_draw_mode) { case 0: - pass.pipeline->setTopology(QRhiGraphicsPipeline::Triangles); + pass.p.pipeline->setTopology(QRhiGraphicsPipeline::Triangles); break; case 1: - pass.pipeline->setTopology(QRhiGraphicsPipeline::Points); + pass.p.pipeline->setTopology(QRhiGraphicsPipeline::Points); break; case 2: - pass.pipeline->setTopology(QRhiGraphicsPipeline::Lines); + pass.p.pipeline->setTopology(QRhiGraphicsPipeline::Lines); break; } - pass.pipeline->create(); + // Reverse-Z project rule (matches PipelineStateHelpers::applyPipelineState + // default). buildPipeline leaves DepthOp at QRhi's default `Less` which + // rejects every fragment against the 0.0-cleared reverse-Z buffer. + // ModelDisplay's projection matrix now produces reverse-Z NDC, so we + // must also flip the compare op. + pass.p.pipeline->setDepthTest(true); + pass.p.pipeline->setDepthWrite(true); + pass.p.pipeline->setDepthOp(QRhiGraphicsPipeline::Greater); + + pass.p.pipeline->create(); } } @@ -1019,19 +1244,25 @@ class ModelDisplayNode::Renderer : public GenericNodeRenderer void createShaders(RenderList& renderer, const score::gfx::Mesh& mesh) { - createShaders( - this->triangle_perspective, renderer, vtx_output_triangle, - vtx_output_process_triangle, vtx_projection_perspective, mesh); - createShaders( - this->point_perspective, renderer, vtx_output_point, vtx_output_process_point, - vtx_projection_perspective, mesh); - - createShaders( - this->triangle_fulldome, renderer, vtx_output_triangle, - vtx_output_process_triangle, vtx_projection_fulldome, mesh); - createShaders( - this->point_fulldome, renderer, vtx_output_point, vtx_output_process_point, - vtx_projection_fulldome, mesh); + // One projection snippet per camera_mode — order MUST match the UI + // enum ordering described on RenderShaders. + const char* projections[CAMERA_MODE_COUNT] = { + vtx_projection_perspective, + vtx_projection_fulldome_equidistant, + vtx_projection_fulldome_equisolid, + vtx_projection_fulldome_stereographic, + vtx_projection_fulldome_orthographic, + }; + + for(int i = 0; i < CAMERA_MODE_COUNT; ++i) + { + createShaders( + triangle_shaders[i], renderer, vtx_output_triangle, + vtx_output_process_triangle, projections[i], mesh); + createShaders( + point_shaders[i], renderer, vtx_output_point, vtx_output_process_point, + projections[i], mesh); + } } void recreateRenderTarget(RenderList& renderer) @@ -1057,7 +1288,7 @@ class ModelDisplayNode::Renderer : public GenericNodeRenderer m_samplers.push_back({sampler, texture}); } - void init(RenderList& renderer, QRhiResourceUpdateBatch& res) override + void initState(RenderList& renderer, QRhiResourceUpdateBatch& res) override { recreateRenderTarget(renderer); const auto& mesh = m_mesh ? *m_mesh : renderer.defaultQuad(); @@ -1066,6 +1297,62 @@ class ModelDisplayNode::Renderer : public GenericNodeRenderer processUBOInit(renderer); m_material.init(renderer, node.input, m_samplers); + m_initialized = true; + } + + void addOutputPass( + RenderList& renderer, Edge& edge, QRhiResourceUpdateBatch& res) override + { + // The shader selection depends on mesh properties and node settings. + // initPasses() creates passes for ALL edges at once, so we only call it + // the first time (when m_p is empty). Subsequent edges are already covered. + if(m_p.empty()) + { + const auto& mesh = m_mesh ? *m_mesh : renderer.defaultQuad(); + initPasses(renderer, mesh); + } + } + + bool hasOutputPassForEdge(Edge& edge) const override + { + return ossia::find_if(m_p, [&](const auto& p) { return p.first == &edge; }) + != m_p.end(); + } + + void releaseState(RenderList& r) override + { + if(!m_initialized) + return; + + m_renderer = nullptr; + + // Release any remaining passes + for(auto& pass : m_p) + pass.second.release(); + m_p.clear(); + + for(auto sampler : m_samplers) + { + delete sampler.sampler; + } + m_samplers.clear(); + + delete m_processUBO; + m_processUBO = nullptr; + + delete m_material.buffer; + m_material.buffer = nullptr; + + m_meshbufs = {}; + + m_initialized = false; + } + + void init(RenderList& renderer, QRhiResourceUpdateBatch& res) override + { + initState(renderer, res); + + const auto& mesh = m_mesh ? *m_mesh : renderer.defaultQuad(); initPasses(renderer, mesh); } @@ -1085,10 +1372,20 @@ class ModelDisplayNode::Renderer : public GenericNodeRenderer memcpy(to, from.data(), sizeof(float[N])); } + int mdupdate_log = 0; void update(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) override { auto& n = static_cast(this->node); + if(mdupdate_log < 3) + { + qDebug() << "ModelDisplay::update materialChanged=" << this->materialChanged + << "geometryChanged=" << this->geometryChanged + << "fov=" << n.fov + << "passes=" << m_p.size(); + mdupdate_log++; + } + bool mustRecreatePasses = false; if(this->materialChanged) { @@ -1103,6 +1400,21 @@ class ModelDisplayNode::Renderer : public GenericNodeRenderer qreal(renderer.state.renderSize.width()) / renderer.state.renderSize.height(), n.near, n.far); + + // Project-wide reverse-Z convention: near=1, far=0, depth cleared to + // 0.0, depth op Greater. QMatrix4x4::perspective() produces standard + // GL Z (near=-1, far=+1) which clipSpaceCorrMatrix then maps to + // Vulkan [0, 1] — the wrong direction for reverse-Z. + // + // Pre-multiplying by a Z-flip matrix flips the NDC z output of the + // perspective: z_ndc → -z_ndc. After clipSpaceCorrMatrix's [-1,1] → + // [0,1] remap, that gives near→1, far→0, exactly what the rest of + // the pipeline expects. + { + QMatrix4x4 zFlip; + zFlip(2, 2) = -1.0f; + projection = zFlip * projection; + } QMatrix4x4 view; view.lookAt( @@ -1122,6 +1434,8 @@ class ModelDisplayNode::Renderer : public GenericNodeRenderer toGL(mvp, mc.mvp); toGL(norm, mc.modelNormal); mc.fov = n.fov; + mc.znear = n.near; + mc.zfar = n.far; res.updateDynamicBuffer(m_material.buffer, 0, sizeof(ModelCameraUBO), &mc); @@ -1146,6 +1460,7 @@ class ModelDisplayNode::Renderer : public GenericNodeRenderer if(m_blend_enabled != n.blend_enabled) mustRecreatePasses = true; } + this->materialChanged = false; res.updateDynamicBuffer(m_processUBO, 0, sizeof(ProcessUBO), &n.standardUBO); diff --git a/src/plugins/score-plugin-threedim/Threedim/ObjLoader.hpp b/src/plugins/score-plugin-threedim/Threedim/ObjLoader.hpp deleted file mode 100644 index 516fe12bfd..0000000000 --- a/src/plugins/score-plugin-threedim/Threedim/ObjLoader.hpp +++ /dev/null @@ -1,51 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include - -namespace Threedim -{ - -class ObjLoader -{ -public: - halp_meta(name, "Object loader") - halp_meta(category, "Visuals/Meshes") - halp_meta(c_name, "obj_loader") - halp_meta( - authors, - "Jean-Michaël Celerier, TinyOBJ authors, miniPLY authors, Eigen authors") - halp_meta(manual_url, "https://ossia.io/score-docs/processes/meshes.html#obj-loader") - halp_meta(uuid, "5df71765-505f-4ab7-98c1-f305d10a01ef") - - struct ins - { - struct obj_t : halp::file_port<"3D file"> - { - halp_meta(extensions, "3D files (*.obj *.ply)"); - static std::function process(file_type data); - } obj; - PositionControl position; - RotationControl rotation; - ScaleControl scale; - } inputs; - - struct - { - struct : halp::mesh - { - halp_meta(name, "Geometry"); - std::vector mesh; - } geometry; - } outputs; - - void rebuild_geometry(); - - std::vector meshinfo{}; - float_vec complete; -}; - -} diff --git a/src/plugins/score-plugin-threedim/Threedim/PBRMesh.cpp b/src/plugins/score-plugin-threedim/Threedim/PBRMesh.cpp new file mode 100644 index 0000000000..98b206ec52 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/PBRMesh.cpp @@ -0,0 +1,436 @@ +#include "PBRMesh.hpp" + +#include +#include + +#include +#include + +#include +#include + +namespace Threedim +{ + +namespace +{ + +// halp::attribute_format → ossia::vertex_format. The enum orderings differ, +// so this has to be a switch rather than a static_cast. Unknown formats +// fall back to float3 (the most common vertex-attribute case). +ossia::vertex_format mapFormat(halp::attribute_format f) noexcept +{ + using H = halp::attribute_format; + using O = ossia::vertex_format; + switch(f) + { + case H::float1: return O::float1; + case H::float2: return O::float2; + case H::float3: return O::float3; + case H::float4: return O::float4; + case H::half1: return O::half1; + case H::half2: return O::half2; + case H::half3: return O::half3; + case H::half4: return O::half4; + case H::unormbyte1: return O::unorm8x1; + case H::unormbyte2: return O::unorm8x2; + case H::unormbyte4: return O::unorm8x4; + case H::uint1: return O::uint32x1; + case H::uint2: return O::uint32x2; + case H::uint3: return O::uint32x3; + case H::uint4: return O::uint32x4; + case H::sint1: return O::sint32x1; + case H::sint2: return O::sint32x2; + case H::sint3: return O::sint32x3; + case H::sint4: return O::sint32x4; + case H::ushort1: return O::uint16x1; + case H::ushort2: return O::uint16x2; + case H::ushort4: return O::uint16x4; + case H::sshort1: return O::sint16x1; + case H::sshort2: return O::sint16x2; + case H::sshort4: return O::sint16x4; + default: return O::float3; + } +} + +ossia::primitive_topology mapTopology(halp::primitive_topology t) noexcept +{ + using H = halp::primitive_topology; + using O = ossia::primitive_topology; + switch(t) + { + case H::triangles: return O::triangles; + case H::triangle_strip: return O::triangle_strip; + case H::triangle_fan: return O::triangle_fan; + case H::lines: return O::lines; + case H::line_strip: return O::line_strip; + case H::points: return O::points; + } + return O::triangles; +} + +ossia::index_format mapIndexFormat(halp::index_format f) noexcept +{ + return (f == halp::index_format::uint16) ? ossia::index_format::uint16 + : ossia::index_format::uint32; +} + +// Wrap a halp GPU buffer handle into an ossia::buffer_resource carrying a +// gpu_buffer_handle (no CPU-side data, no upload). Returns null on a null +// handle so caller can skip that slot. +ossia::buffer_resource_ptr +wrapGpuBuffer(void* handle, int64_t byte_size) noexcept +{ + if(!handle) + return nullptr; + ossia::gpu_buffer_handle gh; + gh.native_handle = handle; + gh.byte_size = byte_size; + gh.byte_offset = 0; + auto res = std::make_shared(); + res->resource = gh; + res->dirty_index = 1; + return res; +} + +} // namespace + +void PBRMesh::operator()() +{ + if(m_material_stable_id == 0) m_material_stable_id = ossia::mint_stable_id(); + if(m_primitive_stable_id == 0) m_primitive_stable_id = ossia::mint_stable_id(); + if(m_xform_stable_id == 0) m_xform_stable_id = ossia::mint_stable_id(); + + const auto& m = inputs.geometry_in.mesh; + void* buf0_handle + = m.buffers.empty() ? nullptr : m.buffers[0].handle; + + // Identity-caching fast path: skip the rebuild when the input + // geometry buffers / counts / textures / factors are all unchanged. + const float cur_factors[10]{ + inputs.base_r.value, inputs.base_g.value, inputs.base_b.value, + inputs.base_a.value, inputs.metallic.value, inputs.roughness.value, + inputs.em_r.value, inputs.em_g.value, inputs.em_b.value, + inputs.em_strength.value}; + void* cur_tex[4]{ + inputs.base_color_tex.texture.handle, + inputs.metal_rough_tex.texture.handle, + inputs.normal_tex.texture.handle, + inputs.emissive_tex.texture.handle}; + + float scratch[16]; + CachedTRS xformCache = m_cachedTRS; + const bool trs_changed = computeTRSMatrix(inputs, scratch, xformCache); + + // Intentionally NOT gating on `inputs.geometry_in.dirty_mesh`: upstream + // CSF compute nodes raise that flag every frame to signal content + // changed, but our downstream ScenePreprocessor handles content changes + // via its GPU-copy path (which re-fires every runInitialPasses). Only + // STRUCTURAL changes — buffer-handle swap, vertex/index count change, + // texture-override swap, factor change — need a new scene_state + // version; content-only changes keep the cached shared_ptr so + // ScenePreprocessor's fingerprint fast-path stays warm and doesn't + // rebuild the MDI merge + invalidate downstream pipeline state. + const bool inputs_changed + = m_cached_buf0 != buf0_handle + || m_cached_vertices != m.vertices + || m_cached_indices != m.indices + || !std::equal(m_cached_tex, m_cached_tex + 4, cur_tex) + || !std::equal(m_cached_factors, m_cached_factors + 10, cur_factors); + + if(!inputs_changed && !trs_changed && m_wrapped_state && buf0_handle) + { + outputs.scene_out.scene.state = m_wrapped_state; + outputs.scene_out.dirty = 0; + return; + } + m_cachedTRS = xformCache; + m_cached_buf0 = buf0_handle; + m_cached_vertices = m.vertices; + m_cached_indices = m.indices; + std::copy(cur_tex, cur_tex + 4, m_cached_tex); + std::copy(cur_factors, cur_factors + 10, m_cached_factors); + + if(!buf0_handle || m.vertices <= 0) + { + outputs.scene_out.scene = {}; + m_wrapped_state.reset(); + return; + } + + // Wrap halp buffers → ossia buffer_resources (parallel indexing so + // attribute buffer_index resolution is a direct lookup). + ossia::small_vector wrapped_buffers; + wrapped_buffers.reserve(m.buffers.size()); + for(const auto& b : m.buffers) + wrapped_buffers.push_back(wrapGpuBuffer(b.handle, b.byte_size)); + + // Build one mesh_primitive off the geometry. + ossia::mesh_primitive mp; + // vertex_buffers parallel to halp's buffers so attr.buffer_index resolves + // directly. Leaves nulls in place — attributes whose buffer is null are + // filtered out on the attribute walk below. + for(const auto& w : wrapped_buffers) + if(w) + mp.vertex_buffers.push_back(w); + + // Map halp buffer index → mp.vertex_buffers index (we may have dropped + // nulls along the way). + ossia::small_vector bufRemap; + bufRemap.resize(wrapped_buffers.size(), -1); + int out_idx = 0; + for(std::size_t i = 0; i < wrapped_buffers.size(); ++i) + { + if(wrapped_buffers[i]) + bufRemap[i] = out_idx++; + } + + for(const auto& attr : m.attributes) + { + if(attr.binding < 0 || attr.binding >= (int)m.input.size()) + continue; + const auto& in = m.input[attr.binding]; + if(in.buffer < 0 || in.buffer >= (int)bufRemap.size()) + continue; + const int buf_idx = bufRemap[in.buffer]; + if(buf_idx < 0) + continue; + + ossia::vertex_attribute va; + va.semantic = static_cast(attr.semantic); + va.format = mapFormat(attr.format); + va.buffer_index = (uint32_t)buf_idx; + va.byte_offset = uint32_t(in.byte_offset + attr.byte_offset); + // Binding stride governs per-vertex advance; fall back to 0 (tightly + // packed single attribute) if the binding entry is missing. + va.byte_stride = (attr.binding < (int)m.bindings.size()) + ? (uint32_t)m.bindings[attr.binding].stride + : 0u; + va.rate = ossia::vertex_attribute::input_rate::per_vertex; + mp.attributes.push_back(va); + } + + // Index buffer (optional). + if(m.index.buffer >= 0 && m.index.buffer < (int)m.buffers.size()) + { + const auto& ib = m.buffers[m.index.buffer]; + if(ib.handle) + { + ossia::gpu_buffer_handle gh; + gh.native_handle = ib.handle; + gh.byte_size = ib.byte_size; + gh.byte_offset = m.index.byte_offset; + auto ibr = std::make_shared(); + ibr->resource = gh; + ibr->dirty_index = 1; + mp.index_buffer = ibr; + mp.index_type = mapIndexFormat(m.index.format); + } + } + + mp.topology = mapTopology(m.topology); + mp.stable_id = m_primitive_stable_id; + mp.vertex_count = (uint32_t)std::max(0, m.vertices); + mp.index_count = (uint32_t)std::max(0, m.indices); + + // Author the material. Factors come from the controls; texture slots + // populate the dynamic-handle pathway when the corresponding inlet + // carries a non-null handle. The primitive's `material` is bound to + // this shared_ptr directly — no index lookup. + auto mat = std::make_shared(); + mat->stable_id = m_material_stable_id; + mat->base_color_factor[0] = cur_factors[0]; + mat->base_color_factor[1] = cur_factors[1]; + mat->base_color_factor[2] = cur_factors[2]; + mat->base_color_factor[3] = cur_factors[3]; + mat->metallic_factor = cur_factors[4]; + mat->roughness_factor = cur_factors[5]; + mat->emissive_factor[0] = cur_factors[6]; + mat->emissive_factor[1] = cur_factors[7]; + mat->emissive_factor[2] = cur_factors[8]; + mat->emissive_strength = cur_factors[9]; + + auto stamp_tex = [](ossia::texture_ref& dst, void* h) { + if(!h) + return; + dst.texture.native_handle = h; + dst.texture.bindless_index = 0; + dst.source.reset(); + }; + stamp_tex(mat->base_color_texture, cur_tex[0]); + stamp_tex(mat->metallic_roughness_texture, cur_tex[1]); + stamp_tex(mat->normal_texture, cur_tex[2]); + stamp_tex(mat->emissive_texture, cur_tex[3]); + + // Propagate the Material arena slot ref (populated in init()). + mat->raw_slot = m_material_ref; + + mp.material = ossia::material_component_ptr(mat); + + auto mesh_comp = std::make_shared(); + mesh_comp->primitives.push_back(std::move(mp)); + + // Assemble the single scene_node: TRS first (Loader convention), then + // the mesh_component as the second payload. Matches GltfParser's + // layout so the built-in TRS controls act on the mesh the same way. + ossia::scene_transform xform; + xform.stable_id = m_xform_stable_id; + xform.translation[0] = inputs.position.value.x; + xform.translation[1] = inputs.position.value.y; + xform.translation[2] = inputs.position.value.z; + auto q = QQuaternion::fromEulerAngles( + inputs.rotation.value.x, inputs.rotation.value.y, + inputs.rotation.value.z); + xform.rotation[0] = q.x(); + xform.rotation[1] = q.y(); + xform.rotation[2] = q.z(); + xform.rotation[3] = q.scalar(); + xform.scale[0] = inputs.scale.value.x; + xform.scale[1] = inputs.scale.value.y; + xform.scale[2] = inputs.scale.value.z; + // Propagate the RawTransform slot ref (populated in init()). + xform.raw_slot = m_xform_ref; + + auto children = std::make_shared>(); + children->push_back(xform); + children->push_back(ossia::mesh_component_ptr(std::move(mesh_comp))); + + auto node = std::make_shared(); + node->children = std::move(children); + node->dirty_index = ++m_version_counter; + + auto roots = std::make_shared>(); + roots->push_back(std::move(node)); + + auto mats = std::make_shared>(); + mats->push_back(std::move(mat)); + + auto state = std::make_shared(); + state->roots = std::move(roots); + state->materials = std::move(mats); + state->version = m_version_counter; + state->dirty_index = m_version_counter; + + m_wrapped_state = std::move(state); + outputs.scene_out.scene.state = m_wrapped_state; + outputs.scene_out.dirty = 0xFF; +} + +void PBRMesh::init(score::gfx::RenderList& r, QRhiResourceUpdateBatch& res) +{ + // One slot in the Material arena per PBRMesh for its lifetime. Seeded + // with default-constructed MaterialGPU bytes so any reader that samples + // the slot before the first update() sees a neutral white material + // rather than undefined memory. + if(!material_slot.valid()) + { + material_slot = r.registry().allocate( + score::gfx::GpuResourceRegistry::Arena::Material, + sizeof(score::gfx::MaterialGPU)); + m_material_ref = r.registry().toOssiaRef(material_slot); + } + if(material_slot.valid()) + { + score::gfx::MaterialGPU seed{}; + r.registry().updateSlot(res, material_slot, &seed, sizeof(seed)); + } + if(!raw_transform_slot.valid()) + { + raw_transform_slot = r.registry().allocate( + score::gfx::GpuResourceRegistry::Arena::RawTransform, + sizeof(score::gfx::RawLocalTransform)); + m_xform_ref = r.registry().toOssiaRef(raw_transform_slot); + } + if(raw_transform_slot.valid()) + { + score::gfx::RawLocalTransform seed{}; + r.registry().updateSlot(res, raw_transform_slot, &seed, sizeof(seed)); + } +} + +void PBRMesh::update( + score::gfx::RenderList& r, QRhiResourceUpdateBatch& res, + score::gfx::Edge*) +{ + // Pack control-snapshot factor values into the slot, and — when a + // runtime GPU handle is wired on one of the four channels — register + // a dynamic slot in the registry and stamp tex_ref_dynamic(slot) into + // the slot's textureRefs[]. Producer-authored refs agree with the + // preprocessor's own rebuildDynamicSlots because both go through the + // same shared registry map — idempotent. + if(!material_slot.valid()) + return; + + score::gfx::MaterialGPU gpu{}; + gpu.baseColor[0] = inputs.base_r.value; + gpu.baseColor[1] = inputs.base_g.value; + gpu.baseColor[2] = inputs.base_b.value; + gpu.baseColor[3] = inputs.base_a.value; + gpu.metallicRoughnessOcclusionUnlit[0] = inputs.metallic.value; + gpu.metallicRoughnessOcclusionUnlit[1] = inputs.roughness.value; + gpu.metallicRoughnessOcclusionUnlit[2] = 1.f; + gpu.metallicRoughnessOcclusionUnlit[3] = 0.f; + gpu.emissive_strength[0] = inputs.em_r.value; + gpu.emissive_strength[1] = inputs.em_g.value; + gpu.emissive_strength[2] = inputs.em_b.value; + gpu.emissive_strength[3] = inputs.em_strength.value; + + using Ch = score::gfx::GpuResourceRegistry::TextureChannel; + uint32_t fm = 0u; + using namespace score::gfx::material_feature; + auto stamp_dyn = [&](Ch ch, void* handle, int idx, uint32_t feature_bit) { + if(!handle) + return; + const int slot = r.registry().resolveDynamicSlot(ch, handle); + if(slot < 0) + return; + gpu.textureRefs[idx] = score::gfx::tex_ref_dynamic((uint32_t)slot); + fm |= feature_bit; + }; + stamp_dyn(Ch::BaseColor, inputs.base_color_tex.texture.handle, 0, has_base_color_texture); + stamp_dyn(Ch::MetalRough, inputs.metal_rough_tex.texture.handle, 1, has_metal_rough_texture); + stamp_dyn(Ch::Normal, inputs.normal_tex.texture.handle, 2, has_normal_texture); + stamp_dyn(Ch::Emissive, inputs.emissive_tex.texture.handle, 3, has_emissive_texture); + + // PBRMesh is lit PBR (unlit flag not exposed), fully opaque by default. + // No extension lobes wired through the current control surface. As + // extension support grows on this node we OR additional feature bits. + gpu.feature_mask = fm; + // hit_group_id stays 0 = standard lit; RT pipeline build will swap in + // a mask-specific index when relevant. + + r.registry().updateSlot(res, material_slot, &gpu, sizeof(gpu)); + + if(raw_transform_slot.valid()) + { + score::gfx::RawLocalTransform xform{}; + xform.translation[0] = inputs.position.value.x; + xform.translation[1] = inputs.position.value.y; + xform.translation[2] = inputs.position.value.z; + QQuaternion q = QQuaternion::fromEulerAngles( + inputs.rotation.value.x, inputs.rotation.value.y, + inputs.rotation.value.z); + xform.rotation[0] = q.x(); + xform.rotation[1] = q.y(); + xform.rotation[2] = q.z(); + xform.rotation[3] = q.scalar(); + xform.scale[0] = inputs.scale.value.x; + xform.scale[1] = inputs.scale.value.y; + xform.scale[2] = inputs.scale.value.z; + r.registry().updateSlot(res, raw_transform_slot, &xform, sizeof(xform)); + } +} + +void PBRMesh::release(score::gfx::RenderList& r) +{ + if(material_slot.valid()) + r.registry().free(material_slot); + if(raw_transform_slot.valid()) + r.registry().free(raw_transform_slot); + m_material_ref = {}; + m_xform_ref = {}; + // Producer-state-drift Option A — see Light::release. + m_wrapped_state.reset(); +} + +} // namespace Threedim diff --git a/src/plugins/score-plugin-threedim/Threedim/PBRMesh.hpp b/src/plugins/score-plugin-threedim/Threedim/PBRMesh.hpp new file mode 100644 index 0000000000..fb54f01df3 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/PBRMesh.hpp @@ -0,0 +1,156 @@ +#pragma once +#include "TransformHelper.hpp" + +#include + +#include + +#include +#include +#include +#include +#include + +#include +#include + +class QRhiResourceUpdateBatch; + +namespace score::gfx +{ +class RenderList; +struct Edge; +} + +namespace Threedim +{ + +// Wraps a GPU-resident geometry (the output of a compute-shader framework +// node — `halp::dynamic_gpu_geometry`) as a one-node scene_spec with a +// PBR material attached. The bridge between "CSF produces a geometry" +// and "scene-graph pipeline consumes scene_spec". +// +// Typical wiring: +// CSFNode(mesh_out) → PBRMesh(mesh_in, texture_in) → ScenePreprocessor +// +// The node emits a single scene_node at the root holding: +// - a scene_transform built from the TRS controls +// - a mesh_component wrapping the GPU geometry into one mesh_primitive +// - a direct material_component_ptr (also published into the scene's +// here: one material_component carrying the factor controls + any +// wired-in runtime textures) +// +// Texture inputs route through the Dynamic Texture pathway in +// ScenePreprocessor: non-null handles become `*Dyn` auxiliary-texture +// bindings that classic_pbr_full samples directly, no CPU upload, no +// array-layer copy. Unwired inputs fall through to the scalar factors. +class PBRMesh +{ +public: + halp_meta(name, "PBR Mesh") + halp_meta(category, "Visuals/3D/Scene") + halp_meta(c_name, "pbr_mesh") + halp_meta(authors, "ossia team") + halp_meta( + manual_url, "https://ossia.io/score-docs/processes/pbr-mesh.html") + halp_meta(uuid, "d7a2f5c9-3e8b-4b1d-a6f2-5c8e9d1f3b7a") + + struct ins + { + struct + { + halp_meta(name, "Mesh"); + halp::dynamic_gpu_geometry mesh; + float transform[16]{}; + bool dirty_mesh = false; + bool dirty_transform = false; + } geometry_in; + + // Texture slots. Non-null handle → emitted as a dynamic texture + // on the material; null → shader falls back to the scalar factor. + halp::gpu_texture_input<"Base Color Tex"> base_color_tex; + halp::gpu_texture_input<"Metal Rough Tex"> metal_rough_tex; + halp::gpu_texture_input<"Normal Tex"> normal_tex; + halp::gpu_texture_input<"Emissive Tex"> emissive_tex; + + // PBR factors — used as-is by the material (no per-factor toggle: + // defaults here match glTF defaults, so "untouched" controls produce + // a reasonable neutral material). + halp::hslider_f32<"Color R", halp::range{0., 1., 1.}> base_r; + halp::hslider_f32<"Color G", halp::range{0., 1., 1.}> base_g; + halp::hslider_f32<"Color B", halp::range{0., 1., 1.}> base_b; + halp::hslider_f32<"Color A", halp::range{0., 1., 1.}> base_a; + halp::hslider_f32<"Metallic", halp::range{0., 1., 0.}> metallic; + halp::hslider_f32<"Roughness", halp::range{0., 1., 0.5}> roughness; + halp::hslider_f32<"Emissive R", halp::range{0., 10., 0.}> em_r; + halp::hslider_f32<"Emissive G", halp::range{0., 10., 0.}> em_g; + halp::hslider_f32<"Emissive B", halp::range{0., 10., 0.}> em_b; + halp::hslider_f32<"Emissive strength", halp::range{0., 10., 1.}> em_strength; + + // Root-node placement. Same TRS controls as Transform3D / Instancer + // so the node stands alone without a separate transform upstream. + PositionControl position; + RotationControl rotation; + ScaleControl scale; + } inputs; + + struct outs + { + struct + { + halp_meta(name, "Scene Out"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_out; + } outputs; + + void operator()(); + + // Render-thread hooks. init allocates a Material arena slot and seeds + // it with default bytes; update packs the factor fields from the + // control inputs into a MaterialGPU and uploads to the slot; release + // returns the slot. Texture references (textureRefs[]) are left at + // tex_ref_none() here — the preprocessor resolves those during its + // material-channel upload pass because only it knows the per-channel + // dynamic-slot / static-layer assignments for the upstream handles. + void init(score::gfx::RenderList& r, QRhiResourceUpdateBatch& res); + void update( + score::gfx::RenderList& r, QRhiResourceUpdateBatch& res, + score::gfx::Edge* e); + void release(score::gfx::RenderList& r); + + // Republished stable shared_ptr when nothing changed, so ScenePreprocessor's + // identity/fingerprint caches stay warm. + std::shared_ptr m_wrapped_state; + CachedTRS m_cachedTRS{}; + + // Identity cache: upstream mesh-buffer handles + vertex/index count + + // texture handles + factor values. Dirty if any change. + void* m_cached_buf0{}; + int64_t m_cached_vertices{-1}; + int64_t m_cached_indices{-1}; + void* m_cached_tex[4]{}; + float m_cached_factors[10]{}; + int64_t m_version_counter{0}; + + // Stable ids minted once on first rebuild and reused across every + // subsequent rebuild so downstream fingerprint / SER / BVH caches stay + // identity-stable. + uint64_t m_material_stable_id{}; + uint64_t m_primitive_stable_id{}; + uint64_t m_xform_stable_id{}; + + // Slots: one in the Material arena, one in RawTransform for the + // emitted scene_transform. Allocated in init(), written in update(), + // freed in release(). + score::gfx::GpuResourceRegistry::Slot material_slot; + score::gfx::GpuResourceRegistry::Slot raw_transform_slot; + + // Ossia-facing snapshots. Written once in init() on the render + // thread; copied onto the emitted material_component / + // scene_transform raw_slot in operator()(). + ossia::gpu_slot_ref m_material_ref{}; + ossia::gpu_slot_ref m_xform_ref{}; +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/PCLToGeometry.cpp b/src/plugins/score-plugin-threedim/Threedim/PCLToGeometry.cpp index e2a7681012..1a4ea47f5c 100644 --- a/src/plugins/score-plugin-threedim/Threedim/PCLToGeometry.cpp +++ b/src/plugins/score-plugin-threedim/Threedim/PCLToGeometry.cpp @@ -1,60 +1,11 @@ #include "PCLToGeometry.hpp" -#include #include -#include #include -#include namespace Threedim { -PCLToMesh::PCLToMesh() -{ - rebuild_transform(inputs, outputs); - outputs.geometry.dirty_mesh = true; -} - -void PCLToMesh::operator()() -{ - auto& tex = this->inputs.in.buffer; - if (!tex.changed) - return; - - float* data = reinterpret_cast(tex.raw_data); - create_mesh(std::span(data, tex.byte_size / sizeof(float))); -} - -void PCLToMesh::create_mesh(std::span v) -{ - { - // std::size_t vertices = v.size() / 3; - - // this->complete.clear(); - // this->complete.resize(std::ceil((v.size() / 3.) * (3 + 3 + 2))); - // std::copy_n(v.begin(), v.size(), complete.begin()); - - // auto& pch = rnd::fast_random_device(); - // this->complete.resize(6 * 25000); - // for (float& v : this->complete) - // v = std::uniform_real_distribution<>{0.f, 1.f}(pch); - - auto prev_size = outputs.geometry.mesh.buffers.main_buffer.element_count; - const bool changed = v.size() != prev_size; // FIXME - //complete.assign(v.begin(), v.end()); - - outputs.geometry.mesh.buffers.main_buffer.elements - = (float*)this->inputs.in.buffer.raw_data; //complete.data(); - outputs.geometry.mesh.buffers.main_buffer.element_count - = this->inputs.in.buffer.byte_size / sizeof(float); //complete.size(); - outputs.geometry.mesh.buffers.main_buffer.dirty = true; - - outputs.geometry.mesh.input.input0.byte_offset = 0; - outputs.geometry.mesh.vertices = v.size() / 6; - outputs.geometry.dirty_mesh = true; // FIXME - } -} - PCLToMesh2::PCLToMesh2() { diff --git a/src/plugins/score-plugin-threedim/Threedim/PCLToGeometry.hpp b/src/plugins/score-plugin-threedim/Threedim/PCLToGeometry.hpp index d38908204d..48d7f08460 100644 --- a/src/plugins/score-plugin-threedim/Threedim/PCLToGeometry.hpp +++ b/src/plugins/score-plugin-threedim/Threedim/PCLToGeometry.hpp @@ -1,121 +1,14 @@ #pragma once -#include - -#include - #include #include #include #include #include -namespace halp -{ - -struct position_gpu_geometry -{ - struct buffers - { - struct - { - enum - { - dynamic, - vertex - }; - void* handle{}; - int size{}; - bool dirty{}; - } main_buffer; - } buffers; - - struct bindings - { - struct - { - enum - { - per_vertex - }; - int stride = 3 * sizeof(float); - int step_rate = 1; - } position_binding; - }; - - struct attributes - { - struct - { - enum - { - position - }; - using datatype = float[3]; - int32_t offset = 0; - int32_t binding = 0; - } position; - }; - - struct - { - struct - { - static constexpr auto buffer() { return &buffers::main_buffer; } - int offset = 0; - } input0; - } input; - - int vertices = 0; - enum - { - triangles, - counter_clockwise, - cull_back - }; -}; - -} namespace Threedim { -class PCLToMesh -{ -public: - halp_meta(name, "Pointcloud to mesh") - halp_meta(category, "Visuals/Meshes") - halp_meta(c_name, "pointcloud_to_mesh") - halp_meta(manual_url, "https://ossia.io/score-docs/processes/pointcloud-to-mesh.html") - halp_meta(uuid, "2450ffbf-04ed-4b42-8848-69f200d2742a") - - struct ins - { - halp::cpu_buffer_input<"Buffer"> in; - PositionControl position; - RotationControl rotation; - ScaleControl scale; - } inputs; - - struct - { - struct - { - halp_meta(name, "Geometry"); - halp::position_color_packed_geometry mesh; - float transform[16]{}; - bool dirty_mesh = false; - bool dirty_transform = false; - } geometry; - } outputs; - - PCLToMesh(); - void create_mesh(std::span v); - void operator()(); - - std::vector complete; -}; - - class PCLToMesh2 { public: diff --git a/src/plugins/score-plugin-threedim/Threedim/Primitive.hpp b/src/plugins/score-plugin-threedim/Threedim/Primitive.hpp index 9a894a567f..e79c4181b5 100644 --- a/src/plugins/score-plugin-threedim/Threedim/Primitive.hpp +++ b/src/plugins/score-plugin-threedim/Threedim/Primitive.hpp @@ -1,5 +1,7 @@ #pragma once +#include "TransformHelper.hpp" + #include #include #include @@ -13,9 +15,20 @@ struct Primitive halp_meta(author, "Jean-Michaël Celerier, vcglib") halp_meta(manual_url, "https://ossia.io/score-docs/processes/meshes.html#primitive") + // Derived classes' operator() calls this with their own inputs + + // geometry output to propagate the position/rotation/scale controls + // into the transform[16] slot + dirty_transform flag. Only sets + // dirty_transform when the matrix actually changes vs last frame. + template + void apply_transform(const In& in, Out& out) + { + out.dirty_transform = computeTRSMatrix(in, out.transform, m_cachedTRS); + } + void operator()() { } PrimitiveOutputs outputs; std::vector complete; + CachedTRS m_cachedTRS{}; }; // Plane is a special case due to needing a different geometry type @@ -53,9 +66,14 @@ struct Plane void prepare(halp::setup) { update(); } void update(); - void operator()() { } + void operator()() + { + outputs.geometry.dirty_transform + = computeTRSMatrix(inputs, outputs.geometry.transform, m_cachedTRS); + } std::vector complete; + CachedTRS m_cachedTRS{}; }; struct Cube : Primitive @@ -74,6 +92,7 @@ struct Cube : Primitive void prepare(halp::setup) { update(); } void update(); + void operator()() { apply_transform(inputs, outputs.geometry); } }; struct Sphere : Primitive @@ -97,6 +116,7 @@ struct Sphere : Primitive void prepare(halp::setup) { update(); } void update(); + void operator()() { apply_transform(inputs, outputs.geometry); } }; struct Icosahedron : Primitive @@ -114,6 +134,7 @@ struct Icosahedron : Primitive void prepare(halp::setup) { update(); } void update(); + void operator()() { apply_transform(inputs, outputs.geometry); } }; struct Cone : Primitive @@ -151,6 +172,7 @@ struct Cone : Primitive void prepare(halp::setup) { update(); } void update(); + void operator()() { apply_transform(inputs, outputs.geometry); } }; struct Cylinder : Primitive @@ -178,6 +200,7 @@ struct Cylinder : Primitive void prepare(halp::setup) { update(); } void update(); + void operator()() { apply_transform(inputs, outputs.geometry); } }; struct Torus : Primitive @@ -215,6 +238,7 @@ struct Torus : Primitive void prepare(halp::setup) { update(); } void update(); + void operator()() { apply_transform(inputs, outputs.geometry); } }; } diff --git a/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/FormatOverride.cpp b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/FormatOverride.cpp new file mode 100644 index 0000000000..14ba2d1d4b --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/FormatOverride.cpp @@ -0,0 +1,98 @@ +#include "FormatOverride.hpp" + +#include + +#include +#include + +namespace Threedim::PrimitiveCloud +{ + +namespace +{ + +// Recursively rewrites primitive_cloud_components inside a scene_node's +// children list. Returns a fresh scene_node shared_ptr when something +// was rewritten (or a nested scene_node was rewritten), the original +// otherwise — so unchanged subtrees keep their identity for downstream +// fingerprinting. +ossia::scene_node_ptr rewriteNode( + const ossia::scene_node_ptr& in, std::string_view override) +{ + if(!in || !in->children || in->children->empty()) + return in; + + bool any_rewrite = false; + std::vector fresh_children; + fresh_children.reserve(in->children->size()); + + for(const auto& payload : *in->children) + { + if(auto* pc = ossia::get_if(&payload)) + { + if(*pc && (*pc)->format_id != override) + { + auto fresh = std::make_shared(**pc); + fresh->format_id = std::string{override}; + fresh_children.emplace_back( + ossia::primitive_cloud_component_ptr{std::move(fresh)}); + any_rewrite = true; + continue; + } + } + else if(auto* sn = ossia::get_if(&payload)) + { + auto rewritten = rewriteNode(*sn, override); + if(rewritten.get() != sn->get()) + { + fresh_children.emplace_back(std::move(rewritten)); + any_rewrite = true; + continue; + } + } + fresh_children.emplace_back(payload); + } + + if(!any_rewrite) + return in; + + auto fresh = std::make_shared(*in); + fresh->children = std::make_shared>( + std::move(fresh_children)); + return fresh; +} + +} // namespace + +std::shared_ptr applyFormatOverride( + std::shared_ptr state, std::string_view override) +{ + if(!state) + return nullptr; + if(override.empty()) + return std::const_pointer_cast(state); + + auto out = std::make_shared(*state); + + if(state->roots && !state->roots->empty()) + { + auto fresh_roots = std::make_shared>(); + fresh_roots->reserve(state->roots->size()); + bool any_rewrite = false; + for(const auto& root : *state->roots) + { + auto rewritten = rewriteNode(root, override); + if(rewritten.get() != root.get()) + any_rewrite = true; + fresh_roots->push_back(std::move(rewritten)); + } + if(any_rewrite) + out->roots = std::move(fresh_roots); + } + + out->version = state->version + 1; + out->dirty_index = state->dirty_index + 1; + return out; +} + +} // namespace Threedim::PrimitiveCloud diff --git a/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/FormatOverride.hpp b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/FormatOverride.hpp new file mode 100644 index 0000000000..19698b4926 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/FormatOverride.hpp @@ -0,0 +1,34 @@ +#pragma once + +#include + +#include +#include + +namespace Threedim::PrimitiveCloud +{ + +// Shallow-clones `state` and rewrites every primitive_cloud_component +// reachable through the scene tree to carry `override` as its +// `format_id`. Heavy fields (raw_data buffer_resource, extra_buffers, +// bounds, …) are shared via shared_ptr — no GPU upload duplicates. +// +// Used by AssetLoader's "Format override" line edit and the TagAs +// pass-through node so unrecognised PLY columns / procedural producers +// without an autodetected format_id can still be routed by a +// FlattenedSceneFilterNode in mode 12 (format_id == match_str). +// +// `override.empty()` returns the input verbatim (`const_pointer_cast` +// to drop the const, but no actual mutation is performed). A null +// `state` returns null. Otherwise the returned shared_ptr is freshly +// allocated; its `version` and `dirty_index` are bumped by 1 so +// downstream change-detection sees a fresh frame. +// +// Walks scene_node children recursively. Nested scene_node_ptr inside +// children is itself deep-cloned so the rewrite is leak-free for the +// const tree shape. +std::shared_ptr applyFormatOverride( + std::shared_ptr state, + std::string_view override); + +} // namespace Threedim::PrimitiveCloud diff --git a/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/PlyParser.cpp b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/PlyParser.cpp new file mode 100644 index 0000000000..a6e5de3ea0 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/PlyParser.cpp @@ -0,0 +1,276 @@ +#include "PlyParser.hpp" + +#include + +#include +#include +#include +#include +#include + +namespace Threedim::PrimitiveCloud +{ + +namespace +{ + +// Standard mesh column set. A PLY whose vertex element has only these +// columns (and a face element) is a regular triangle mesh and goes +// through the existing AssetLoader mesh path. +bool is_mesh_column(std::string_view name) noexcept +{ + static constexpr std::string_view mesh_cols[] = { + "x", "y", "z", + "nx", "ny", "nz", + "red", "green", "blue", "alpha", + "r", "g", "b", "a", + "s", "t", "u", "v", + "texture_u", "texture_v", + }; + for(auto c : mesh_cols) + if(name == c) + return true; + return false; +} + +// Bytes per PLY scalar type. Lists aren't supported on the splat path +// (caller filters them out) so countType is irrelevant here. +uint32_t byte_size_for(miniply::PLYPropertyType t) noexcept +{ + using PT = miniply::PLYPropertyType; + switch(t) + { + case PT::Char: case PT::UChar: return 1; + case PT::Short: case PT::UShort: return 2; + case PT::Int: case PT::UInt: return 4; + case PT::Float: return 4; + case PT::Double: return 8; + default: return 0; + } +} + +// Round `v` up to the next multiple of `align` (a power of two). +uint32_t align_up(uint32_t v, uint32_t align) noexcept +{ + return (v + (align - 1)) & ~(align - 1); +} + +// Detect whether the vertex element looks like a splat. Returns true +// if it carries any column NOT in the standard mesh set OR if there +// is no `face` element in the file. +bool detect_splat_shape(miniply::PLYReader& reader) +{ + bool has_face = false; + bool has_extra = false; + + for(uint32_t i = 0, end = reader.num_elements(); i < end; ++i) + { + auto* el = reader.get_element(i); + if(!el) continue; + if(el->name == "face") + { + has_face = true; + continue; + } + if(el->name == miniply::kPLYVertexElement) + { + for(auto& p : el->properties) + { + // List columns aren't a splat thing — skip. + if(p.countType != miniply::PLYPropertyType::None) + continue; + if(!is_mesh_column(p.name)) + { + has_extra = true; + break; + } + } + } + } + return has_extra || !has_face; +} + +// Recognise a known column-name fingerprint and return the canonical +// format_id. Empty result means "unknown / wired by hand". +std::string detect_format_id(const miniply::PLYElement& vtx) +{ + bool has_f_dc = false; + bool has_f_rest = false; + bool has_scale = false; + bool has_rot = false; + bool has_opacity = false; + for(auto& p : vtx.properties) + { + if(p.countType != miniply::PLYPropertyType::None) + continue; + const auto& n = p.name; + if(n == "f_dc_0" || n == "f_dc_1" || n == "f_dc_2") has_f_dc = true; + else if(n.rfind("f_rest_", 0) == 0) has_f_rest = true; + else if(n == "scale_0" || n == "scale_1" || n == "scale_2") has_scale = true; + else if(n == "rot_0" || n == "rot_1" || n == "rot_2" || n == "rot_3") has_rot = true; + else if(n == "opacity") has_opacity = true; + } + if(has_f_dc && has_f_rest && has_scale && has_rot && has_opacity) + return "3dgs.classic"; + return {}; +} + +} // namespace + +bool ply_is_splat_shaped(std::string_view path) +{ + // miniply::PLYReader expects a NUL-terminated path. string_view from + // halp::file_port::filename is null-terminated in practice but not + // guaranteed; copy to be safe. + std::string p{path}; + miniply::PLYReader reader(p.c_str()); + if(!reader.valid()) + return false; + return detect_splat_shape(reader); +} + +ossia::primitive_cloud_component_ptr parse_ply(std::string_view path) +{ + std::string p{path}; + miniply::PLYReader reader(p.c_str()); + if(!reader.valid()) + return nullptr; + + if(!detect_splat_shape(reader)) + return nullptr; + + // Walk to the vertex element. + while(reader.has_element()) + { + if(!reader.element_is(miniply::kPLYVertexElement)) + { + reader.next_element(); + continue; + } + if(!reader.load_element()) + return nullptr; + break; + } + if(!reader.has_element()) + return nullptr; + + const auto* vtx = reader.element(); + if(!vtx) + return nullptr; + const uint32_t N = reader.num_rows(); + if(N == 0) + return nullptr; + + // Skip list columns: not part of the splat schema. We collect the + // scalar-only column subset and lay them out tightly in row order. + // The conventional layout is: each scalar at its natural alignment, + // row stride padded to 4 (almost every splat PLY is all-float so + // this is essentially "sum of bytes per column"; we do the more + // conservative thing for mixed-type files). + struct Col + { + uint32_t prop_idx; + miniply::PLYPropertyType type; + uint32_t offset_in_row; + uint32_t size; + std::string name; + }; + std::vector cols; + cols.reserve(vtx->properties.size()); + + uint32_t row_offset = 0; + uint32_t row_align = 1; + for(uint32_t i = 0; i < (uint32_t)vtx->properties.size(); ++i) + { + const auto& p = vtx->properties[i]; + if(p.countType != miniply::PLYPropertyType::None) + continue; // list — skip + const uint32_t sz = byte_size_for(p.type); + if(sz == 0) + continue; + row_offset = align_up(row_offset, sz); + cols.push_back(Col{i, p.type, row_offset, sz, p.name}); + row_offset += sz; + if(sz > row_align) + row_align = sz; + } + if(cols.empty()) + return nullptr; + const uint32_t row_stride = align_up(row_offset, row_align); + + // Allocate the packed row buffer. shared_ptr wraps the + // storage; the buffer_resource keeps it alive via its data field. + const std::size_t bytes = std::size_t(N) * row_stride; + auto storage = std::shared_ptr(new uint8_t[bytes]()); + + // Extract each scalar column at its row offset. + for(const auto& c : cols) + { + uint32_t idx = c.prop_idx; + reader.extract_properties_with_stride( + &idx, 1, c.type, + storage.get() + c.offset_in_row, row_stride); + } + + // AABB: find x/y/z by name, read each position from the packed buffer. + ossia::aabb bounds{}; + bounds.min[0] = bounds.min[1] = bounds.min[2] = 1.f; + bounds.max[0] = bounds.max[1] = bounds.max[2] = -1.f; + { + const Col* cx = nullptr; const Col* cy = nullptr; const Col* cz = nullptr; + for(const auto& c : cols) + { + if(c.name == "x") cx = &c; + else if(c.name == "y") cy = &c; + else if(c.name == "z") cz = &c; + } + if(cx && cy && cz + && cx->type == miniply::PLYPropertyType::Float + && cy->type == miniply::PLYPropertyType::Float + && cz->type == miniply::PLYPropertyType::Float) + { + const uint8_t* base = storage.get(); + for(uint32_t i = 0; i < N; ++i) + { + float x, y, z; + std::memcpy(&x, base + i * row_stride + cx->offset_in_row, sizeof(float)); + std::memcpy(&y, base + i * row_stride + cy->offset_in_row, sizeof(float)); + std::memcpy(&z, base + i * row_stride + cz->offset_in_row, sizeof(float)); + bounds.expand(x, y, z); + } + } + } + + // Wrap as a buffer_resource. Storage uses storage_buffer usage so + // ScenePreprocessor uploads it as an SSBO. + auto br = std::make_shared(); + br->resource = ossia::buffer_data{ + .data = std::shared_ptr(storage, storage.get()), + .byte_size = (int64_t)bytes, + .usage_hint = ossia::buffer_data::usage::storage_buffer}; + br->content_hash = (uint64_t)(uintptr_t)storage.get(); + + auto out = std::make_shared(); + out->raw_data = std::move(br); + out->row_stride = row_stride; + out->primitive_count = N; + out->topology = ossia::primitive_topology::points; + out->format_id = detect_format_id(*vtx); + // For known formats, name the per-row struct so ScenePreprocessor + // exposes raw_data as a per-vertex `splat: ` ATTRIBUTE and the + // CSF can declare a matching TYPES entry. Empty falls back to the + // legacy AUXILIARY raw_splats path. + if(out->format_id == "3dgs.classic") + out->struct_type_name = "Splat3DGS"; + out->bounds = bounds; + out->stable_id = ossia::mint_stable_id(); + + // (format_params left empty for v1: format CSF authors declare the + // LAYOUT block themselves matching the PLY column order. Adding a + // reflective column-table here later is a pure addition — no + // consumer depends on its absence.) + + return out; +} + +} // namespace Threedim::PrimitiveCloud diff --git a/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/PlyParser.hpp b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/PlyParser.hpp new file mode 100644 index 0000000000..0dad735832 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/PlyParser.hpp @@ -0,0 +1,34 @@ +#pragma once + +#include + +#include +#include + +namespace Threedim::PrimitiveCloud +{ + +// Cheap header-only sniff: is the PLY file at `path` shaped like a +// primitive cloud (no `face` element, or has columns outside the +// standard mesh set {x,y,z,nx,ny,nz,red,green,blue,alpha,s,t,u,v})? +// Reads only the textual header, doesn't load row data. +bool ply_is_splat_shaped(std::string_view path); + +// Parse `path` and produce a primitive_cloud_component. The component's +// raw_data is a single tightly-packed buffer of the PLY rows: each row +// is a struct of the columns in their PLY-declared order, std430-style +// natural alignment (each float at +4, each int at +4, each uchar at +// +1 with no inter-field padding — but the row stride is rounded to +// the largest field alignment within the row, see +// internal::row_stride_for). +// +// Returns nullptr if the PLY is not splat-shaped, or if parsing fails. +// +// Sets format_id to a recognized signature when columns match a known +// fingerprint: +// - has f_dc_0/1/2 + f_rest_* + scale_0/1/2 + rot_0/1/2/3 + opacity +// -> "3dgs.classic" +// - else empty (the user wires the chain by hand or saves a preset) +ossia::primitive_cloud_component_ptr parse_ply(std::string_view path); + +} // namespace Threedim::PrimitiveCloud diff --git a/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SceneFromCloud.cpp b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SceneFromCloud.cpp new file mode 100644 index 0000000000..e59b02daf4 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SceneFromCloud.cpp @@ -0,0 +1,44 @@ +#include "SceneFromCloud.hpp" + +#include +#include + +namespace Threedim::PrimitiveCloud +{ + +std::shared_ptr sceneStateFromCloud( + ossia::primitive_cloud_component_ptr cloud, + std::string_view source_label) +{ + if(!cloud) + return nullptr; + + auto children = std::make_shared>(); + children->push_back(ossia::primitive_cloud_component_ptr{cloud}); + + auto node = std::make_shared(); + // Stable id keyed on the cloud's raw_data pointer. Required by the + // registry's slot allocator: a 0 id is uncacheable and the cloud + // disappears between frames. + uint64_t key = 0; + if(cloud->raw_data) + key = (uint64_t)((uintptr_t)cloud->raw_data.get()); + if(key == 0) + key = (uint64_t)((uintptr_t)cloud.get()); + node->id.value = key; + node->name = source_label.empty() + ? std::string("primitive_cloud") + : std::string(source_label); + node->children = std::move(children); + + auto roots = std::make_shared>(); + roots->push_back(std::move(node)); + + auto state = std::make_shared(); + state->roots = std::move(roots); + state->version = 1; + state->dirty_index = 1; + return state; +} + +} // namespace Threedim::PrimitiveCloud diff --git a/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SceneFromCloud.hpp b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SceneFromCloud.hpp new file mode 100644 index 0000000000..550c2cf758 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SceneFromCloud.hpp @@ -0,0 +1,21 @@ +#pragma once + +#include + +#include +#include + +namespace Threedim::PrimitiveCloud +{ + +// Wrap a parsed primitive_cloud_component into a fresh scene_state with +// one scene_node carrying it as its sole payload. Mirrors +// SceneFromMeshes::sceneStateFromMeshes for the splat path. +// +// `source_label` becomes the scene_node name (typically the source +// filename). Returns nullptr if `cloud` is null. +std::shared_ptr sceneStateFromCloud( + ossia::primitive_cloud_component_ptr cloud, + std::string_view source_label = {}); + +} // namespace Threedim::PrimitiveCloud diff --git a/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SplatBinary.cpp b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SplatBinary.cpp new file mode 100644 index 0000000000..43136ad83f --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SplatBinary.cpp @@ -0,0 +1,56 @@ +#include "SplatBinary.hpp" + +#include +#include +#include + +namespace Threedim::PrimitiveCloud +{ + +ossia::primitive_cloud_component_ptr parse_splat_binary(std::string_view bytes) +{ + constexpr uint32_t kRowSize = 32; + if(bytes.empty() || (bytes.size() % kRowSize) != 0) + return nullptr; + + const uint32_t N = (uint32_t)(bytes.size() / kRowSize); + if(N == 0) + return nullptr; + + // Copy into a stable shared buffer. The input string_view points at + // halp's mmap or text-file storage which doesn't outlive this call. + auto storage = std::shared_ptr(new uint8_t[bytes.size()]); + std::memcpy(storage.get(), bytes.data(), bytes.size()); + + // AABB from first 12 bytes of each row (xyz floats). + ossia::aabb bounds{}; + bounds.min[0] = bounds.min[1] = bounds.min[2] = 1.f; + bounds.max[0] = bounds.max[1] = bounds.max[2] = -1.f; + for(uint32_t i = 0; i < N; ++i) + { + float x, y, z; + std::memcpy(&x, storage.get() + i * kRowSize + 0, sizeof(float)); + std::memcpy(&y, storage.get() + i * kRowSize + 4, sizeof(float)); + std::memcpy(&z, storage.get() + i * kRowSize + 8, sizeof(float)); + bounds.expand(x, y, z); + } + + auto br = std::make_shared(); + br->resource = ossia::buffer_data{ + .data = std::shared_ptr(storage, storage.get()), + .byte_size = (int64_t)bytes.size(), + .usage_hint = ossia::buffer_data::usage::storage_buffer}; + br->content_hash = (uint64_t)(uintptr_t)storage.get(); + + auto out = std::make_shared(); + out->raw_data = std::move(br); + out->row_stride = kRowSize; + out->primitive_count = N; + out->topology = ossia::primitive_topology::points; + out->format_id = "3dgs.splat-binary"; + out->bounds = bounds; + out->stable_id = ossia::mint_stable_id(); + return out; +} + +} // namespace Threedim::PrimitiveCloud diff --git a/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SplatBinary.hpp b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SplatBinary.hpp new file mode 100644 index 0000000000..886337dd25 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SplatBinary.hpp @@ -0,0 +1,28 @@ +#pragma once + +#include + +#include + +namespace Threedim::PrimitiveCloud +{ + +// Parse an Antimatter15 .splat file (32 bytes per primitive, +// fixed schema). +// +// On-disk row layout (little-endian, packed, no padding): +// bytes 0..11 position xyz, 3 × float32 +// bytes 12..23 scale_xyz, 3 × float32 (linear, NOT log-space) +// bytes 24..27 color rgba, 4 × uint8 unorm +// bytes 28..31 rotation quat, 4 × uint8 (sign-encoded as +// (q + 1) * 127.5 around index 0; recipient +// reconstructs by (b - 128) / 128) +// +// We pass these bytes through verbatim. The "3dgs.splat-binary" preset's +// CSF declares the matching LAYOUT, dequantizes color8 to color, and +// reconstructs the quat from the int8s. +// +// Returns nullptr if `bytes.size() % 32 != 0` or the input is empty. +ossia::primitive_cloud_component_ptr parse_splat_binary(std::string_view bytes); + +} // namespace Threedim::PrimitiveCloud diff --git a/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SpzCodec.cpp b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SpzCodec.cpp new file mode 100644 index 0000000000..64076bf3ac --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SpzCodec.cpp @@ -0,0 +1,160 @@ +#include "SpzCodec.hpp" + +#include + +#include +#include +#include +#include + +namespace Threedim::PrimitiveCloud +{ + +namespace +{ + +// Canonical row layout matching what the 3dgs.classic preset's +// AUXILIARY LAYOUT in 01_Decode.cs expects. Field offsets in floats. +struct CanonicalRow +{ + static constexpr uint32_t kFloats = 62; + static constexpr uint32_t kBytes = kFloats * sizeof(float); + + static constexpr uint32_t kPos = 0; // 3 floats + static constexpr uint32_t kNormal = 3; // 3 floats (zero-filled) + static constexpr uint32_t kSHDC = 6; // 3 floats + static constexpr uint32_t kSHRest = 9; // 45 floats (channel-major) + static constexpr uint32_t kAlpha = 54; // 1 float (pre-sigmoid) + static constexpr uint32_t kScale = 55; // 3 floats (log-space) + static constexpr uint32_t kRot = 58; // 4 floats (w,x,y,z) +}; + +} // namespace + +ossia::primitive_cloud_component_ptr parse_spz(std::string_view bytes) +{ + if(bytes.empty()) + return nullptr; + + // The Niantic library expects the gzipped/NGSP payload as a + // std::vector. Copy in (single allocation; the cost is + // dwarfed by the gzip inflate). Specify RUB→RDF in the unpack + // options so the library handles the basis flip for us. + std::vector data( + reinterpret_cast(bytes.data()), + reinterpret_cast(bytes.data()) + bytes.size()); + + spz::UnpackOptions opts; + opts.to = spz::CoordinateSystem::RDF; + + spz::GaussianCloud cloud = spz::loadSpz(data, opts); + if(cloud.numPoints <= 0 || cloud.positions.empty()) + return nullptr; + + const uint32_t N = (uint32_t)cloud.numPoints; + const uint32_t shDeg = (uint32_t)cloud.shDegree; + const uint32_t shCoefs = (shDeg == 0) ? 0 + : (shDeg == 1) ? 3 + : (shDeg == 2) ? 8 + : (shDeg == 3) ? 15 + : 24; // degree 4 + const uint32_t restPad = 15; // 3dgs.classic preset always reads 15 R/G/B coefs + + if(cloud.positions.size() != (size_t)N * 3 + || cloud.scales.size() != (size_t)N * 3 + || cloud.rotations.size() != (size_t)N * 4 + || cloud.alphas.size() != (size_t)N + || cloud.colors.size() != (size_t)N * 3) + { + return nullptr; + } + if(shCoefs > 0 && cloud.sh.size() != (size_t)N * shCoefs * 3) + return nullptr; + + const std::size_t totalBytes + = (std::size_t)N * (std::size_t)CanonicalRow::kBytes; + auto storage = std::shared_ptr(new uint8_t[totalBytes]()); + + ossia::aabb bounds{}; + bounds.min[0] = bounds.min[1] = bounds.min[2] = 1.f; + bounds.max[0] = bounds.max[1] = bounds.max[2] = -1.f; + + // Effective coefficient count we'll actually fill per-channel + // (clamped to 15 — preset hardcodes 45 = 3·15 rest floats; degree-4 + // input gets truncated to degree 3 here, lossy but renderable). + const uint32_t fillCoefs = (shCoefs > restPad) ? restPad : shCoefs; + + float* base = reinterpret_cast(storage.get()); + for(uint32_t i = 0; i < N; ++i) + { + float* row = base + (std::size_t)i * CanonicalRow::kFloats; + + // Position. + const float x = cloud.positions[i * 3 + 0]; + const float y = cloud.positions[i * 3 + 1]; + const float z = cloud.positions[i * 3 + 2]; + row[CanonicalRow::kPos + 0] = x; + row[CanonicalRow::kPos + 1] = y; + row[CanonicalRow::kPos + 2] = z; + bounds.expand(x, y, z); + + // Normals — not stored in SPZ; leave zero-filled. + + // SH DC (= colors). + row[CanonicalRow::kSHDC + 0] = cloud.colors[i * 3 + 0]; + row[CanonicalRow::kSHDC + 1] = cloud.colors[i * 3 + 1]; + row[CanonicalRow::kSHDC + 2] = cloud.colors[i * 3 + 2]; + + // SH rest. SPZ packs (R,G,B) inner per coefficient; PLY canonical + // is channel-major (R block, G block, B block) per row. Transpose. + if(fillCoefs > 0) + { + const float* sh_src + = cloud.sh.data() + (std::size_t)i * shCoefs * 3; + float* shR = row + CanonicalRow::kSHRest + 0 * restPad; + float* shG = row + CanonicalRow::kSHRest + 1 * restPad; + float* shB = row + CanonicalRow::kSHRest + 2 * restPad; + for(uint32_t c = 0; c < fillCoefs; ++c) + { + shR[c] = sh_src[c * 3 + 0]; + shG[c] = sh_src[c * 3 + 1]; + shB[c] = sh_src[c * 3 + 2]; + } + // Remaining coefs (fillCoefs..restPad) stay zero. + } + + // Alpha — both formats store the pre-sigmoid value; pass through. + row[CanonicalRow::kAlpha] = cloud.alphas[i]; + + // Scale (log-space). + row[CanonicalRow::kScale + 0] = cloud.scales[i * 3 + 0]; + row[CanonicalRow::kScale + 1] = cloud.scales[i * 3 + 1]; + row[CanonicalRow::kScale + 2] = cloud.scales[i * 3 + 2]; + + // Rotation. SPZ: (x,y,z,w). PLY canonical: (w,x,y,z). + row[CanonicalRow::kRot + 0] = cloud.rotations[i * 4 + 3]; // w + row[CanonicalRow::kRot + 1] = cloud.rotations[i * 4 + 0]; // x + row[CanonicalRow::kRot + 2] = cloud.rotations[i * 4 + 1]; // y + row[CanonicalRow::kRot + 3] = cloud.rotations[i * 4 + 2]; // z + } + + auto br = std::make_shared(); + br->resource = ossia::buffer_data{ + .data = std::shared_ptr(storage, storage.get()), + .byte_size = (int64_t)totalBytes, + .usage_hint = ossia::buffer_data::usage::storage_buffer}; + br->content_hash = (uint64_t)(uintptr_t)storage.get(); + + auto out = std::make_shared(); + out->raw_data = std::move(br); + out->row_stride = CanonicalRow::kBytes; + out->primitive_count = N; + out->topology = ossia::primitive_topology::points; + out->format_id = "3dgs.classic"; + out->struct_type_name = "Splat3DGS"; + out->bounds = bounds; + out->stable_id = ossia::mint_stable_id(); + return out; +} + +} // namespace Threedim::PrimitiveCloud diff --git a/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SpzCodec.hpp b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SpzCodec.hpp new file mode 100644 index 0000000000..975e1358de --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/PrimitiveCloud/SpzCodec.hpp @@ -0,0 +1,35 @@ +#pragma once + +#include + +#include +#include + +namespace Threedim::PrimitiveCloud +{ + +// Decode a Niantic SPZ v1-3 file into a primitive_cloud_component. +// +// SPZ stores splats column-grouped (positions, then scales, then +// rotations, then alphas, then colors, then SH) inside a gzip- +// compressed payload, in the RUB coordinate system. We unpack via +// the vendored Niantic library, rotate to RDF (the convention every +// existing 3dgs.classic preset assumes), then transpose into the +// canonical 62-float / 248-byte PLY-compatible row layout: +// +// floats 0..2 x, y, z +// floats 3..5 nx, ny, nz (zero — not in SPZ) +// floats 6..8 f_dc_0..2 (SH DC = colors) +// floats 9..53 f_rest_0..44 (R coeffs, then G, then B; padded +// with zero for shDegree<3) +// float 54 opacity (pre-sigmoid) +// floats 55..57 scale_0..2 (log-space) +// floats 58..61 rot_0..3 (PLY convention w,x,y,z) +// +// Returns nullptr on parse failure or v4 files (ZSTD support not +// vendored — converting v4 → v3 with the upstream `spz-tool` works +// around it). Sets format_id = "3dgs.classic" so the existing preset +// picks it up transparently. +ossia::primitive_cloud_component_ptr parse_spz(std::string_view bytes); + +} // namespace Threedim::PrimitiveCloud diff --git a/src/plugins/score-plugin-threedim/Threedim/RenderPipeline/Process.cpp b/src/plugins/score-plugin-threedim/Threedim/RenderPipeline/Process.cpp index bc118514c7..3dcf0ce2a1 100644 --- a/src/plugins/score-plugin-threedim/Threedim/RenderPipeline/Process.cpp +++ b/src/plugins/score-plugin-threedim/Threedim/RenderPipeline/Process.cpp @@ -7,6 +7,9 @@ #include #include +#include +#include + #include #include #include @@ -29,8 +32,18 @@ Model::Model( { if(QFile fs{init}; fs.open(QIODevice::ReadOnly)) { - QFile vs{fi.absolutePath() + QDir::separator() + fi.baseName() + ".vs"}; - if(vs.open(QIODevice::ReadOnly)) + m_scriptPath = init; + if(QFile vs{fi.absolutePath() + QDir::separator() + fi.baseName() + ".vs"}; + vs.open(QIODevice::ReadOnly)) + { + (void)setProgram( + {ShaderSource::ProgramType::RawRasterPipeline, vs.readAll(), + fs.readAll()}); + return; + } + else if( + QFile vs{fi.absolutePath() + QDir::separator() + fi.baseName() + ".vert"}; + vs.open(QIODevice::ReadOnly)) { (void)setProgram( {ShaderSource::ProgramType::RawRasterPipeline, vs.readAll(), @@ -118,7 +131,7 @@ bool Model::validate(const std::vector& txt) const noexcept { ShaderSource src{txt}; src.type = isf::parser::ShaderType::RawRasterPipeline; - const auto& [_, error] = ProgramCache::instance().get(src); + const auto& [_, error] = ProgramCache::instance().get(src, m_scriptPath); if(!error.isEmpty()) { this->errorMessage(error); @@ -152,7 +165,9 @@ Process::ScriptChangeResult Model::setProgram(ShaderSource f) f.type = ProcessedProgram::ProgramType::RawRasterPipeline; setVertex(f.vertex); setFragment(f.fragment); - if(const auto& [processed, error] = ProgramCache::instance().get(f); bool(processed)) + if(const auto& [processed, error] + = ProgramCache::instance().get(f, m_scriptPath); + bool(processed)) { ossia::flat_map previous_values; for(auto inl : m_inlets) @@ -164,7 +179,6 @@ Process::ScriptChangeResult Model::setProgram(ShaderSource f) m_processedProgram = *processed; - qDebug() << (int)f.type << (int)processed->type; // initDefaultPorts(); m_inlets.push_back(new GeometryInlet{"Geometry In", Id(1000), this}); @@ -177,7 +191,7 @@ Process::ScriptChangeResult Model::setProgram(ShaderSource f) } else { - qDebug() << "Error while processing program: " << error; + qWarning() << "RenderPipeline: error while processing program:" << error; } return {}; } @@ -190,7 +204,9 @@ Process::Descriptor ProcessFactory::descriptor(QString path) const noexcept template <> void DataStreamReader::read(const Gfx::RenderPipeline::Model& proc) { - m_stream << proc.m_program; + auto& ctx = score::IDocument::documentContext(proc); + m_stream << proc.m_program + << score::relativizeFilePath(proc.m_scriptPath, ctx); readPorts(*this, proc.m_inlets, proc.m_outlets); @@ -201,7 +217,12 @@ template <> void DataStreamWriter::write(Gfx::RenderPipeline::Model& proc) { Gfx::ShaderSource s; - m_stream >> s; + m_stream >> s >> proc.m_scriptPath; + if(!proc.m_scriptPath.isEmpty()) + { + auto& ctx = score::IDocument::documentContext(proc); + proc.m_scriptPath = score::locateFilePath(proc.m_scriptPath, ctx); + } s.type = isf::parser::ShaderType::RawRasterPipeline; (void)proc.setProgram(s); @@ -217,6 +238,11 @@ void JSONReader::read(const Gfx::RenderPipeline::Model& proc) { obj["Vertex"] = proc.vertex(); obj["Fragment"] = proc.fragment(); + if(!proc.m_scriptPath.isEmpty()) + { + auto& ctx = score::IDocument::documentContext(proc); + obj["Root"] = score::relativizeFilePath(proc.m_scriptPath, ctx); + } readPorts(*this, proc.m_inlets, proc.m_outlets); } @@ -228,6 +254,15 @@ void JSONWriter::write(Gfx::RenderPipeline::Model& proc) s.vertex = obj["Vertex"].toString(); s.fragment = obj["Fragment"].toString(); s.type = isf::parser::ShaderType::ISF; + if(auto r = obj.tryGet("Root")) + { + proc.m_scriptPath <<= *r; + if(!proc.m_scriptPath.isEmpty()) + { + auto& ctx = score::IDocument::documentContext(proc); + proc.m_scriptPath = score::locateFilePath(proc.m_scriptPath, ctx); + } + } (void)proc.setProgram(s); writePorts( diff --git a/src/plugins/score-plugin-threedim/Threedim/RenderPipeline/Process.hpp b/src/plugins/score-plugin-threedim/Threedim/RenderPipeline/Process.hpp index f0aa99f504..dcf3486a93 100644 --- a/src/plugins/score-plugin-threedim/Threedim/RenderPipeline/Process.hpp +++ b/src/plugins/score-plugin-threedim/Threedim/RenderPipeline/Process.hpp @@ -68,12 +68,17 @@ class Model final : public Process::ProcessModel void errorMessage(const QString& arg_2) const W_SIGNAL(errorMessage, arg_2); + // Absolute path of the shader file this model was loaded from. Used as + // the base for quoted #include resolution. Empty for in-memory source. + QString rootPath() const noexcept { return m_scriptPath; } + private: void init(); void initDefaultPorts(); QString prettyName() const noexcept override; ShaderSource m_program; ProcessedProgram m_processedProgram; + QString m_scriptPath; }; struct ProcessFactory final : Process::ProcessFactory_T diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneDuplicator.cpp b/src/plugins/score-plugin-threedim/Threedim/SceneDuplicator.cpp new file mode 100644 index 0000000000..b08a3dedf6 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/SceneDuplicator.cpp @@ -0,0 +1,166 @@ +#include "SceneDuplicator.hpp" + +#include + +#include + +namespace Threedim +{ + +namespace +{ + +// Compute one clone's TRS given its index and the pattern / params. +// Fills an ossia::scene_transform suitable for prepending to the cloned +// root's children. All positions in world space; parents identity. +ossia::scene_transform +transformForIndex(int idx, int count, int mode, int grid_cols_hint, + float spacing, float radius) noexcept +{ + ossia::scene_transform t; + t.rotation[3] = 1.f; + t.scale[0] = t.scale[1] = t.scale[2] = 1.f; + + switch(mode) + { + case SceneDuplicator::Grid: + { + const int cols = grid_cols_hint > 0 + ? grid_cols_hint + : std::max(1, (int)std::round(std::sqrt(double(count)))); + const int row = idx / cols; + const int col = idx % cols; + // Center the grid around the origin. + const int rows = (count + cols - 1) / cols; + const float cx = (col - 0.5f * (cols - 1)) * spacing; + const float cz = (row - 0.5f * (rows - 1)) * spacing; + t.translation[0] = cx; + t.translation[1] = 0.f; + t.translation[2] = cz; + break; + } + case SceneDuplicator::Ring: + { + const float theta = (count > 0) + ? (float(idx) / float(count)) * 2.f * float(M_PI) + : 0.f; + t.translation[0] = radius * std::cos(theta); + t.translation[1] = 0.f; + t.translation[2] = radius * std::sin(theta); + // Face outward (local +Z towards the center). Rotate around Y so + // local -Z points away from the origin. + auto q = QQuaternion::fromEulerAngles( + 0.f, -theta * 180.f / float(M_PI), 0.f); + t.rotation[0] = q.x(); + t.rotation[1] = q.y(); + t.rotation[2] = q.z(); + t.rotation[3] = q.scalar(); + break; + } + case SceneDuplicator::Line: + default: + { + t.translation[0] = (idx - 0.5f * (count - 1)) * spacing; + t.translation[1] = 0.f; + t.translation[2] = 0.f; + break; + } + } + return t; +} + +// Build one cloned root scene_node wrapping the prototype's roots. +// Structure: +// scene_node { name = "_", children = [ +// scene_transform(xform), +// ...prototype roots (as scene_node_ptr payloads — shared; cheap) +// ]} +ossia::scene_node_ptr makeCloneRoot( + const std::vector& proto_roots, + const std::string& base_name, int idx, + const ossia::scene_transform& xform, int64_t dirty_index) +{ + auto children = std::make_shared>(); + children->reserve(1 + proto_roots.size()); + children->push_back(xform); + for(const auto& r : proto_roots) + if(r) + children->push_back(r); + + auto node = std::make_shared(); + node->name = base_name + "_" + std::to_string(idx); + node->children = std::move(children); + node->dirty_index = dirty_index; + return node; +} + +} // namespace + +void SceneDuplicator::rebuild() +{ + const auto& in = inputs.scene_in.scene; + const ossia::scene_state* in_state = in.state.get(); + const int64_t in_version = in_state ? in_state->version : -1; + const int count = std::max(1, inputs.count.value); + + m_cached_in_state = in_state; + m_cached_in_version = in_version; + + if(!in_state || !in_state->roots || in_state->roots->empty()) + { + m_cached_out = in.state; + m_pending_dirty = 0xFF; + return; + } + + // Base name for clones — derived from the first root's name, falling + // back to "Clone" when the prototype has no names. + std::string base = (*in_state->roots)[0] ? (*in_state->roots)[0]->name + : std::string{}; + if(base.empty()) + base = "Clone"; + + const int64_t version = ++m_version_counter; + + auto new_roots = std::make_shared>(); + new_roots->reserve(count); + for(int i = 0; i < count; ++i) + { + const auto xform = transformForIndex( + i, count, inputs.pattern.value, inputs.grid_cols.value, + inputs.spacing.value, inputs.radius.value); + new_roots->push_back( + makeCloneRoot(*in_state->roots, base, i, xform, version)); + } + + auto state = std::make_shared(); + state->roots = std::move(new_roots); + // Share all non-root resources with the input — clones read the same + // materials / animations / cameras / skeletons / environment. + state->materials = in_state->materials; + state->animations = in_state->animations; + state->cameras = in_state->cameras; + state->skeletons = in_state->skeletons; + state->environment = in_state->environment; + state->active_camera_id = in_state->active_camera_id; + state->version = version; + state->dirty_index = version; + + m_cached_out = state; + m_pending_dirty = 0xFF; +} + +void SceneDuplicator::operator()() +{ + const auto* in_state = inputs.scene_in.scene.state.get(); + const int64_t in_version = in_state ? in_state->version : -1; + const bool upstream_changed + = m_cached_in_state != in_state || m_cached_in_version != in_version; + if(!m_cached_out || upstream_changed) + rebuild(); + outputs.scene_out.scene.state = m_cached_out; + outputs.scene_out.dirty = m_pending_dirty; + m_pending_dirty = 0; +} + +} // namespace Threedim diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneDuplicator.hpp b/src/plugins/score-plugin-threedim/Threedim/SceneDuplicator.hpp new file mode 100644 index 0000000000..107bd6807b --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/SceneDuplicator.hpp @@ -0,0 +1,117 @@ +#pragma once +#include +#include + +#include + +#include +#include +#include + +namespace Threedim +{ + +// Scene-graph-level duplicator. Given a prototype scene_spec, emits N +// cloned root nodes placed by a procedural pattern. Complementary to +// Instancer: +// +// Instancer = GPU-primitive instancing (one mesh, N instances, +// one draw call — scales to 1M+ particles, but the +// prototype is a single mesh). +// SceneDuplicator = scene-graph instancing (rich prototype w/ +// hierarchy / multiple meshes / lights, N CPU +// clones each with its own TRS — scales to +// dozens-to-a-few-hundreds). +// +// Materials / animations / skeletons / environment pass through from the +// prototype unchanged (shared across clones). Only the root-level node +// tree is cloned so downstream path-based tooling addresses each clone +// independently via `/_/...`. +// +// Patterns: +// Grid — `count` clones laid out on an XZ grid with `spacing`. Y=0. +// Ring — `count` clones on a circle in the XZ plane of `radius` +// centered at the origin, facing outward (rotated around Y). +// Line — `count` clones along +X with `spacing` separation. +// +// Downstream addressing: each clone's root node is named +// `_` (0-indexed), so: +// SceneDuplicator(prototype=ChairScene, mode=Ring, count=8) → +// /Chair_0, /Chair_1, … /Chair_7 +// ConfigurePrimitive(paths=["/Chair_*"], active=false) → disables all +class SceneDuplicator +{ +public: + halp_meta(name, "Scene Duplicator") + halp_meta(category, "Visuals/3D/Scene") + halp_meta(c_name, "scene_duplicator") + halp_meta(authors, "ossia team") + halp_meta( + manual_url, + "https://ossia.io/score-docs/processes/scene-duplicator.html") + halp_meta(uuid, "9e7a4b3d-5f2c-4a8b-9d1e-6c3f8b5d2a7e") + + enum Pattern + { + Grid, + Ring, + Line + }; + + struct ins + { + struct + { + halp_meta(name, "Scene In"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_in; + + // Port-driven rebuild: controls trigger rebuild(); upstream + // scene_in changes detected in operator()(). + struct : halp::combobox_t<"Pattern", Pattern> + { + struct range + { + std::string_view values[3]{"Grid", "Ring", "Line"}; + int init{0}; + }; + void update(SceneDuplicator& n) { n.rebuild(); } + } pattern; + + struct : halp::spinbox_i32<"Count", halp::irange{1, 4096, 4}> + { void update(SceneDuplicator& n) { n.rebuild(); } } count; + struct : halp::hslider_f32<"Spacing", halp::range{0.01, 1000., 2.}> + { void update(SceneDuplicator& n) { n.rebuild(); } } spacing; + struct : halp::hslider_f32<"Radius", halp::range{0.01, 1000., 5.}> + { void update(SceneDuplicator& n) { n.rebuild(); } } radius; + // Grid mode: grid is `cols × rows` with cols ≈ round(sqrt(count)). + // Exposed as a control so the user can force a specific aspect. + // 0 = auto (square-ish). + struct : halp::spinbox_i32<"Grid cols", halp::irange{0, 256, 0}> + { void update(SceneDuplicator& n) { n.rebuild(); } } grid_cols; + } inputs; + + struct outs + { + struct + { + halp_meta(name, "Scene Out"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_out; + } outputs; + + void rebuild(); + void operator()(); + + // Stable shared_ptr cached while inputs are unchanged — keeps + // ScenePreprocessor's fingerprint fast-path warm. + std::shared_ptr m_cached_out; + uint8_t m_pending_dirty{0xFF}; + const ossia::scene_state* m_cached_in_state{}; + int64_t m_cached_in_version{-1}; + int64_t m_version_counter{0}; +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneFilter/Executor.cpp b/src/plugins/score-plugin-threedim/Threedim/SceneFilter/Executor.cpp new file mode 100644 index 0000000000..edaf5cacce --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/SceneFilter/Executor.cpp @@ -0,0 +1,66 @@ +#include "Executor.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +namespace Gfx::SceneFilter +{ +class scene_filter_exec_node final : public gfx_exec_node +{ +public: + scene_filter_exec_node(GfxExecutionAction& ctx) + : gfx_exec_node{ctx} + { + } + + void init() + { + auto node = std::make_unique(); + id = exec_context->ui->register_node(std::move(node)); + } + + ~scene_filter_exec_node() { exec_context->ui->unregister_node(id); } + + std::string label() const noexcept override { return "Gfx::SceneFilter_node"; } +}; + +ProcessExecutorComponent::ProcessExecutorComponent( + Gfx::SceneFilter::Model& element, + const Execution::Context& ctx, + QObject* parent) + : ProcessComponent_T{element, ctx, "sceneFilterComponent", parent} +{ + auto n = ossia::make_node( + *ctx.execState, ctx.doc.plugin().exec); + + n->add_geometry(); + { + auto ctrl = qobject_cast(element.inlets()[1]); + auto& p = n->add_control(); + ctrl->setupExecution(*n->root_inputs().back(), this); + p->value = ctrl->value(); + QObject::connect( + ctrl, &Process::ControlInlet::valueChanged, this, + con_unvalidated{ctx, 1, 0, n}); + } + n->add_geometry_out(); + n->init(); + + this->node = n; + m_ossia_process = std::make_shared(n); +} + +void ProcessExecutorComponent::cleanup() +{ + ProcessComponent_T::cleanup(); +} +} diff --git a/src/plugins/score-plugin-threedim/Threedim/Splat/Executor.hpp b/src/plugins/score-plugin-threedim/Threedim/SceneFilter/Executor.hpp similarity index 68% rename from src/plugins/score-plugin-threedim/Threedim/Splat/Executor.hpp rename to src/plugins/score-plugin-threedim/Threedim/SceneFilter/Executor.hpp index a171183670..7e37bba397 100644 --- a/src/plugins/score-plugin-threedim/Threedim/Splat/Executor.hpp +++ b/src/plugins/score-plugin-threedim/Threedim/SceneFilter/Executor.hpp @@ -3,13 +3,14 @@ #include -namespace Gfx::Splat +namespace Gfx::SceneFilter { class Model; class ProcessExecutorComponent final - : public Execution::ProcessComponent_T + : public Execution:: + ProcessComponent_T { - COMPONENT_METADATA("1df594a9-f028-4c73-82d3-4d8c4a2ebc5b") + COMPONENT_METADATA("f1a2b3c4-d5e6-4a7b-8c9d-0e1f2a3b4c5d") public: ProcessExecutorComponent( Model& element, const Execution::Context& ctx, QObject* parent); diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneFilter/Metadata.hpp b/src/plugins/score-plugin-threedim/Threedim/SceneFilter/Metadata.hpp new file mode 100644 index 0000000000..e527a011ca --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/SceneFilter/Metadata.hpp @@ -0,0 +1,22 @@ +#pragma once +#include + +namespace Gfx::SceneFilter +{ +class Model; +} + +PROCESS_METADATA( + , Gfx::SceneFilter::Model, "c2d8e9a4-3f5b-4e7c-9a1d-6b7e8c2f1a3b", + "scenefilter", + "Scene Filter", + Process::ProcessCategory::Visual, + "Visuals/3D/Scene", + "Filter the hierarchy of a scene_spec (visibility, layers, names)", + "ossia team", + (QStringList{"gfx", "scene", "filter", "3d", "hierarchy"}), + {}, + {}, + QUrl{}, + Process::ProcessFlags::SupportsAll +) diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneFilter/Process.cpp b/src/plugins/score-plugin-threedim/Threedim/SceneFilter/Process.cpp new file mode 100644 index 0000000000..82fdb4a812 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/SceneFilter/Process.cpp @@ -0,0 +1,76 @@ +#include "Process.hpp" + +#include + +#include +#include + +#include +#include + +#include + +W_OBJECT_IMPL(Gfx::SceneFilter::Model) +namespace Gfx::SceneFilter +{ + +Model::Model( + const TimeVal& duration, const Id& id, QObject* parent) + : Process::ProcessModel{duration, id, "gfxProcess", parent} +{ + metadata().setInstanceName(*this); + init(); +} + +Model::~Model() = default; + +void Model::init() +{ + if(m_inlets.empty() && m_outlets.empty()) + { + m_inlets.push_back(new GeometryInlet{"Scene In", Id(0), this}); + m_inlets.push_back(new Process::ComboBox{ + std::vector>{ + {QStringLiteral("pass through"), 0}, + {QStringLiteral("keep visible only"),1}}, + 0, "Mode", Id(1), this}); + m_outlets.push_back(new GeometryOutlet{"Scene Out", Id(0), this}); + } +} + +QString Model::prettyName() const noexcept +{ + return tr("Scene Filter"); +} + +} + +template <> +void DataStreamReader::read(const Gfx::SceneFilter::Model& proc) +{ + readPorts(*this, proc.m_inlets, proc.m_outlets); + insertDelimiter(); +} + +template <> +void DataStreamWriter::write(Gfx::SceneFilter::Model& proc) +{ + writePorts( + *this, components.interfaces(), proc.m_inlets, + proc.m_outlets, &proc); + checkDelimiter(); +} + +template <> +void JSONReader::read(const Gfx::SceneFilter::Model& proc) +{ + readPorts(*this, proc.m_inlets, proc.m_outlets); +} + +template <> +void JSONWriter::write(Gfx::SceneFilter::Model& proc) +{ + writePorts( + *this, components.interfaces(), proc.m_inlets, + proc.m_outlets, &proc); +} diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneFilter/Process.hpp b/src/plugins/score-plugin-threedim/Threedim/SceneFilter/Process.hpp new file mode 100644 index 0000000000..2c2b3e140e --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/SceneFilter/Process.hpp @@ -0,0 +1,37 @@ +#pragma once +#include +#include +#include +#include + +namespace Gfx::SceneFilter +{ +class Model final : public Process::ProcessModel +{ + SCORE_SERIALIZE_FRIENDS + PROCESS_METADATA_IMPL(Gfx::SceneFilter::Model) + W_OBJECT(Model) + +public: + Model( + const TimeVal& duration, + const Id& id, + QObject* parent); + + template + Model(Impl& vis, QObject* parent) + : Process::ProcessModel{vis, parent} + { + vis.writeTo(*this); + init(); + } + + ~Model() override; + +private: + void init(); + QString prettyName() const noexcept override; +}; + +using ProcessFactory = Process::ProcessFactory_T; +} diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneFromMeshes.cpp b/src/plugins/score-plugin-threedim/Threedim/SceneFromMeshes.cpp new file mode 100644 index 0000000000..f91909607d --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/SceneFromMeshes.cpp @@ -0,0 +1,211 @@ +#include "SceneFromMeshes.hpp" + +#include +#include +#include + +namespace Threedim +{ + +namespace +{ + +// Map a Threedim::mesh::extras[].semantic (which is halp::attribute_semantic) +// onto the ossia::attribute_semantic enum. Both use the same naming +// convention for the common cases; fall back to `custom` for anything the +// halp enum encodes that ossia doesn't name explicitly. The extra's +// `.name` field is kept alongside custom attributes so downstream shaders +// can match by string. +static ossia::attribute_semantic +translateExtraSemantic(const Threedim::extra_attribute& e) noexcept +{ + using H = halp::attribute_semantic; + switch(e.semantic) + { + case H::position: return ossia::attribute_semantic::position; + case H::normal: return ossia::attribute_semantic::normal; + case H::tangent: return ossia::attribute_semantic::tangent; + case H::bitangent: return ossia::attribute_semantic::bitangent; + case H::texcoord0: return ossia::attribute_semantic::texcoord0; + case H::texcoord1: return ossia::attribute_semantic::texcoord1; + case H::color0: return ossia::attribute_semantic::color0; + case H::color1: return ossia::attribute_semantic::color1; + default: return ossia::attribute_semantic::custom; + } +} + +// Translate halp::attribute_format → ossia::vertex_format. halp encodes +// both base type and component count in a single enum value (float1..4, +// sint1..4, uint1..4, etc). `components` is carried separately on the +// extra_attribute for shader layout but the conversion is format-driven. +// Unknown → float3 as a safe default. +static ossia::vertex_format +translateFormat(halp::attribute_format f, int /*components*/) noexcept +{ + using F = halp::attribute_format; + switch(f) + { + case F::float1: return ossia::vertex_format::float1; + case F::float2: return ossia::vertex_format::float2; + case F::float3: return ossia::vertex_format::float3; + case F::float4: return ossia::vertex_format::float4; + case F::half1: return ossia::vertex_format::half1; + case F::half2: return ossia::vertex_format::half2; + case F::half3: return ossia::vertex_format::half3; + case F::half4: return ossia::vertex_format::half4; + case F::uint1: return ossia::vertex_format::uint32x1; + case F::uint2: return ossia::vertex_format::uint32x2; + case F::uint3: return ossia::vertex_format::uint32x3; + case F::uint4: return ossia::vertex_format::uint32x4; + case F::sint1: return ossia::vertex_format::sint32x1; + case F::sint2: return ossia::vertex_format::sint32x2; + case F::sint3: return ossia::vertex_format::sint32x3; + case F::sint4: return ossia::vertex_format::sint32x4; + case F::unormbyte1: return ossia::vertex_format::unorm8x1; + case F::unormbyte2: return ossia::vertex_format::unorm8x2; + case F::unormbyte4: return ossia::vertex_format::unorm8x4; + case F::ushort1: return ossia::vertex_format::uint16x1; + case F::ushort2: return ossia::vertex_format::uint16x2; + case F::ushort4: return ossia::vertex_format::uint16x4; + case F::sshort1: return ossia::vertex_format::sint16x1; + case F::sshort2: return ossia::vertex_format::sint16x2; + case F::sshort4: return ossia::vertex_format::sint16x4; + default: break; + } + return ossia::vertex_format::float3; +} + +} // namespace + +std::shared_ptr sceneStateFromMeshes( + std::vector meshes, + Threedim::float_vec buffer, + std::string_view source_label) +{ + if(meshes.empty() || buffer.empty()) + return nullptr; + + // One CPU buffer shared across every mesh part. The buffer_resource holds + // a shared_ptr; we stash the float_vec inside a shared_ptr + // deleter to preserve its lifetime and keep the .data() address stable. + // vertex_count == total element count across all attrs is irrelevant to + // the consumer — each mesh_primitive carries its own per-primitive count. + const int64_t buffer_bytes = (int64_t)(buffer.size() * sizeof(float)); + auto buf_owner = std::make_shared(std::move(buffer)); + std::shared_ptr buf_handle(buf_owner, buf_owner->data()); + + auto vertex_buf = std::make_shared(); + vertex_buf->resource = ossia::buffer_data{ + .data = std::move(buf_handle), + .byte_size = buffer_bytes, + .usage_hint = ossia::buffer_data::usage::vertex_buffer}; + vertex_buf->content_hash = (uint64_t)(uintptr_t)buf_owner->data(); + ossia::buffer_resource_ptr shared_buf{std::move(vertex_buf)}; + + auto roots = std::make_shared>(); + roots->reserve(meshes.size()); + + for(std::size_t i = 0; i < meshes.size(); ++i) + { + const auto& m = meshes[i]; + if(m.vertices <= 0) + continue; + + ossia::mesh_primitive prim; + // Stable id keyed on the shared buffer pointer + index, matching + // the scene_node id below. Required by the registry's mesh-slab + // allocator: a 0 id makes the slab uncacheable and the mesh + // disappears from rendering. + prim.stable_id + = (uint64_t)((uintptr_t)shared_buf.get()) ^ ((uint64_t)i + 1); + prim.vertex_buffers.push_back(shared_buf); + prim.vertex_count = (uint32_t)m.vertices; + prim.topology = m.points ? ossia::primitive_topology::points + : ossia::primitive_topology::triangles; + prim.index_type = ossia::index_format::none; + + // Local-space AABB over the position stream (tightly packed float3). + // buf_owner owns the floats; m.pos_offset is the element offset to + // the first position component. Enables per-draw GPU culling. + { + const float* positions = buf_owner->data() + m.pos_offset; + prim.bounds = ossia::compute_aabb_from_positions( + positions, (std::size_t)m.vertices); + } + + // Byte-offset of each non-interleaved attribute block in the shared + // vertex buffer. Convert element-offset (floats) to bytes. + auto push_attr = [&](ossia::attribute_semantic sem, + ossia::vertex_format fmt, int64_t elem_offset, + uint32_t stride) + { + ossia::vertex_attribute a{}; + a.semantic = sem; + a.format = fmt; + a.buffer_index = 0; + a.byte_offset = (uint32_t)(elem_offset * (int64_t)sizeof(float)); + a.byte_stride = stride; + a.rate = ossia::vertex_attribute::input_rate::per_vertex; + prim.attributes.push_back(a); + }; + + push_attr(ossia::attribute_semantic::position, + ossia::vertex_format::float3, m.pos_offset, + 3 * sizeof(float)); + if(m.normals) + push_attr(ossia::attribute_semantic::normal, + ossia::vertex_format::float3, m.normal_offset, + 3 * sizeof(float)); + if(m.texcoord) + push_attr(ossia::attribute_semantic::texcoord0, + ossia::vertex_format::float2, m.texcoord_offset, + 2 * sizeof(float)); + if(m.colors) + push_attr(ossia::attribute_semantic::color0, + ossia::vertex_format::float4, m.color_offset, + 4 * sizeof(float)); + if(m.tangents) + push_attr(ossia::attribute_semantic::tangent, + ossia::vertex_format::float4, m.tangent_offset, + 4 * sizeof(float)); + + for(const auto& extra : m.extras) + { + auto sem = translateExtraSemantic(extra); + auto fmt = translateFormat(extra.format, extra.components); + const uint32_t stride = (uint32_t)(extra.components * sizeof(float)); + push_attr(sem, fmt, extra.offset, stride); + } + + auto mesh_comp = std::make_shared(); + mesh_comp->primitives.push_back(std::move(prim)); + + auto children = std::make_shared>(); + children->push_back(ossia::mesh_component_ptr{std::move(mesh_comp)}); + + auto node = std::make_shared(); + node->id.value = (uint64_t)((uintptr_t)shared_buf.get()) + ^ ((uint64_t)i + 1); + node->name = source_label.empty() + ? std::string("mesh_" + std::to_string(i)) + : std::string(source_label); + if(meshes.size() > 1) + { + node->name += '#'; + node->name += std::to_string(i); + } + node->children = std::move(children); + roots->push_back(std::move(node)); + } + + if(roots->empty()) + return nullptr; + + auto state = std::make_shared(); + state->roots = std::move(roots); + state->version = 1; + state->dirty_index = 1; + return state; +} + +} // namespace Threedim diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneFromMeshes.hpp b/src/plugins/score-plugin-threedim/Threedim/SceneFromMeshes.hpp new file mode 100644 index 0000000000..8d6df3aaea --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/SceneFromMeshes.hpp @@ -0,0 +1,33 @@ +#pragma once +#include + +#include + +#include +#include + +namespace Threedim +{ + +// Convert a vector of Threedim::mesh (produced by TinyObjFromFile, +// PlyFromFile, or the new vcglib-STL / vcglib-OFF bridges) into a +// scene_state containing one scene_node per mesh part, each with a +// mesh_component backing onto a single shared CPU buffer. +// +// All mesh parts share the same `float_vec` — the scene's mesh_primitives +// reference it via buffer_resource_ptr with per-attribute byte offsets +// into the same vertex buffer. This matches the layout tinyobj / miniply +// already produce: attributes are non-interleaved, each one a contiguous +// span in the parent buffer, with pos_offset / texcoord_offset / … +// in *elements* (floats), not bytes. +// +// `source_label` is used as the scene_node name; it should be the source +// filename (or `.` when unknown), purely for inspector readability. +// +// On empty input returns a null pointer; caller keeps the previous state. +std::shared_ptr sceneStateFromMeshes( + std::vector meshes, + Threedim::float_vec buffer, + std::string_view source_label = {}); + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneGraphFilter.cpp b/src/plugins/score-plugin-threedim/Threedim/SceneGraphFilter.cpp new file mode 100644 index 0000000000..f88d9cfb3d --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/SceneGraphFilter.cpp @@ -0,0 +1,568 @@ +#include "SceneGraphFilter.hpp" + +#include + +#include + +namespace Threedim +{ + +namespace +{ + +// ───── Glob matching ───────────────────────────────────────────────── +// Minimal glob: `*` matches anything except `/`, `**` matches across +// slashes, `?` matches a single non-slash character, everything else +// is literal. Good enough for path-style filters; `std::regex` is the +// fallback if users want full regex later. +bool glob_match(std::string_view pattern, std::string_view text) noexcept +{ + std::size_t pi = 0, ti = 0; + std::size_t star_pi = std::string_view::npos; + std::size_t star_ti = 0; + bool star_double = false; + + while(ti < text.size()) + { + if(pi < pattern.size()) + { + char pc = pattern[pi]; + if(pc == '*') + { + // Detect `**` for slash-crossing wildcard. + star_double = (pi + 1 < pattern.size() && pattern[pi + 1] == '*'); + if(star_double) + pi += 2; + else + pi += 1; + star_pi = pi; + star_ti = ti; + continue; + } + if(pc == '?') + { + if(text[ti] == '/') + { + // `?` can't cross slashes; bail to backtrack below. + } + else + { + ++pi; + ++ti; + continue; + } + } + else if(pc == text[ti]) + { + ++pi; + ++ti; + continue; + } + } + // Mismatch — backtrack to last star. + if(star_pi != std::string_view::npos) + { + // `*` can't eat a slash; `**` can. + if(!star_double && text[star_ti] == '/') + return false; + pi = star_pi; + ++star_ti; + ti = star_ti; + continue; + } + return false; + } + // Consume trailing stars. + while(pi < pattern.size() && pattern[pi] == '*') + ++pi; + return pi == pattern.size(); +} + +// Return true if any pattern in `patterns` matches `text`. +bool any_match( + const std::vector& patterns, std::string_view text) noexcept +{ + for(const auto& pat : patterns) + if(glob_match(pat, text)) + return true; + return false; +} + +// ───── Predicate context ───────────────────────────────────────────── + +struct FilterCtx +{ + SceneGraphFilter::Mode mode; + bool invert; + SceneGraphFilter::Component component; + const std::vector& paths; + const std::vector& names; + const std::vector& material_tags; + const ossia::scene_state* state; + // Tier-1 extensions: schema-field + property predicates. + SceneGraphFilter::AlphaMode alpha_mode; + SceneGraphFilter::Purpose purpose; + bool caster_flag; + std::string_view prop_key; + SceneGraphFilter::PropertyOp prop_op; + std::string_view prop_value; +}; + +// True if the payload carried by a scene_node has the component kind +// we're looking for. Used by ByComponent mode. +bool node_has_component( + const ossia::scene_node& n, SceneGraphFilter::Component which) noexcept +{ + if(!n.has_children()) + return false; + for(const auto& p : *n.children) + { + switch(which) + { + case SceneGraphFilter::Mesh: + if(ossia::get_if(&p)) + return true; + break; + case SceneGraphFilter::Light: + if(ossia::get_if(&p)) + return true; + break; + case SceneGraphFilter::Camera: + if(ossia::get_if(&p)) + return true; + break; + case SceneGraphFilter::Instance: + if(ossia::get_if(&p)) + return true; + break; + case SceneGraphFilter::Skeleton: + if(ossia::get_if(&p)) + return true; + break; + } + } + return false; +} + +// Does this node match the current mode's predicate before `invert` is +// applied? `path` is the slash-joined name chain from the root. +bool node_matches( + const ossia::scene_node& n, std::string_view path, + const FilterCtx& ctx) noexcept +{ + switch(ctx.mode) + { + case SceneGraphFilter::PassThrough: + return true; + case SceneGraphFilter::VisibleOnly: + return n.visible; + case SceneGraphFilter::ByPath: + return any_match(ctx.paths, path); + case SceneGraphFilter::ByName: + return any_match(ctx.names, n.name); + case SceneGraphFilter::ByComponent: + return node_has_component(n, ctx.component); + case SceneGraphFilter::ByMaterialTag: { + // Check every mesh_component primitive's material tag against + // the pattern list. mesh_primitive holds a direct + // material_component_ptr — no index lookup into scene_state.materials. + if(!n.has_children()) + return false; + for(const auto& p : *n.children) + { + const auto* mesh = ossia::get_if(&p); + if(!mesh || !*mesh) + continue; + for(const auto& prim : (*mesh)->primitives) + { + if(prim.material + && any_match(ctx.material_tags, prim.material->tag)) + return true; + } + } + return false; + } + + case SceneGraphFilter::SetVisibility: + // SetVisibility uses the same predicate chain as ByName in the + // caller — this case is a hint to the walker, not a true filter. + // Fall through to "match everything" so the flag flip runs on + // every node. The real gating happens at the caller level using + // name-list matching. + return true; + + // ─── Schema-field predicates (Tier 1 extension) ───────────────── + case SceneGraphFilter::ByAlphaMode: { + // Match when any primitive under this node has a material with + // the selected alphaMode. Per-primitive check because one + // scene_node can hold a mesh with multiple primitives using + // different alpha modes. + if(!n.has_children()) + return false; + const auto want = static_cast(ctx.alpha_mode); + for(const auto& p : *n.children) + { + const auto* mesh = ossia::get_if(&p); + if(!mesh || !*mesh) + continue; + for(const auto& prim : (*mesh)->primitives) + { + if(prim.material && prim.material->alpha == want) + return true; + } + } + return false; + } + + case SceneGraphFilter::ByShadowCaster: + case SceneGraphFilter::ByReflectionCaster: { + // Read the selected bool flag from any of this node's materials. + // Matches when any primitive's material has the flag == caster_flag. + if(!n.has_children()) + return false; + for(const auto& p : *n.children) + { + const auto* mesh = ossia::get_if(&p); + if(!mesh || !*mesh) + continue; + for(const auto& prim : (*mesh)->primitives) + { + if(!prim.material) + continue; + const bool flag + = (ctx.mode == SceneGraphFilter::ByShadowCaster) + ? prim.material->shadow_caster + : prim.material->reflection_caster; + if(flag == ctx.caster_flag) + return true; + } + } + return false; + } + + case SceneGraphFilter::ByPurpose: + return static_cast(n.purpose) + == static_cast(ctx.purpose); + + case SceneGraphFilter::ByNodeProperty: + case SceneGraphFilter::ByMaterialProperty: { + if(ctx.prop_key.empty()) + return false; + auto match_prop + = [&](const ossia::scene_property_map& props) -> bool { + auto it = props.find(std::string(ctx.prop_key)); + if(it == props.end()) + return false; + // Stringify the stored value for comparison. ossia::value is + // variant-typed; value_to_pretty_string covers int/float/ + // string/bool/impulse uniformly. + const std::string lhs = ossia::value_to_pretty_string(it->second); + const std::string_view rhs = ctx.prop_value; + switch(ctx.prop_op) + { + case SceneGraphFilter::PropEqual: return lhs == rhs; + case SceneGraphFilter::PropNotEqual: return lhs != rhs; + case SceneGraphFilter::PropContains: return lhs.find(rhs) != std::string::npos; + case SceneGraphFilter::PropLessThan: + case SceneGraphFilter::PropGreaterThan: { + // Numeric compare when both sides parse as float; fall + // back to lexicographic compare otherwise. Covers the + // common "alpha_cutoff > 0.5" case without a full DSL. + try + { + const double l = std::stod(lhs); + const double r = std::stod(std::string(rhs)); + return ctx.prop_op == SceneGraphFilter::PropLessThan + ? l < r : l > r; + } + catch(...) + { + return ctx.prop_op == SceneGraphFilter::PropLessThan + ? lhs < rhs : lhs > rhs; + } + } + } + return false; + }; + + if(ctx.mode == SceneGraphFilter::ByNodeProperty) + return match_prop(n.properties); + + // ByMaterialProperty — check every primitive's material. + if(!n.has_children()) + return false; + for(const auto& p : *n.children) + { + const auto* mesh = ossia::get_if(&p); + if(!mesh || !*mesh) + continue; + for(const auto& prim : (*mesh)->primitives) + { + if(prim.material && match_prop(prim.material->properties)) + return true; + } + } + return false; + } + } + return true; +} + +// ───── Tree walker ─────────────────────────────────────────────────── +// Recursively copy the subtree, dropping nodes whose (possibly +// inverted) predicate says no. Subtrees with no match anywhere are +// returned as the original shared_ptr (structural sharing). + +struct Walker +{ + const FilterCtx& ctx; + + // Does `node` or any descendant match? Memoization would help here + // if the tree gets big; for now linear scan on each parent. glTF + // scenes are typically shallow enough that this is fine. + bool subtree_has_match( + const ossia::scene_node& n, std::string path) const noexcept + { + if(node_matches(n, path, ctx)) + return true; + if(!n.has_children()) + return false; + for(const auto& p : *n.children) + { + if(auto* sub = ossia::get_if(&p)) + { + if(!*sub) + continue; + std::string childPath + = path + '/' + (*sub)->name; + if(subtree_has_match(**sub, std::move(childPath))) + return true; + } + } + return false; + } + + // Returns the rewritten node, or nullptr if this node (and its + // entire subtree) should be dropped. + ossia::scene_node_ptr rewrite( + const ossia::scene_node_ptr& src, const std::string& path) const + { + if(!src) + return nullptr; + + const bool self_matches = node_matches(*src, path, ctx); + + // SetVisibility mode: don't drop anything, just toggle `visible` + // on matches. `invert` flips the sense: Invert=false → matches + // become hidden; Invert=true → matches become visible. + if(ctx.mode == SceneGraphFilter::SetVisibility) + { + const bool target_visible = ctx.invert; + const bool need_change + = self_matches && (src->visible != target_visible); + + // Recurse so descendants can also toggle. + ossia::scene_node_ptr recursed_self = src; + if(src->has_children()) + { + auto new_children + = std::make_shared>(); + new_children->reserve(src->children->size()); + bool child_changed = false; + for(const auto& payload : *src->children) + { + if(auto* sub = ossia::get_if(&payload)) + { + if(!*sub) + { + new_children->push_back(payload); + continue; + } + std::string childPath = path + '/' + (*sub)->name; + auto rw = rewrite(*sub, childPath); + if(rw.get() != sub->get()) + child_changed = true; + new_children->push_back(rw ? rw : *sub); + } + else + { + new_children->push_back(payload); + } + } + if(child_changed) + { + auto copy = std::make_shared(*src); + copy->children = std::move(new_children); + copy->dirty_index = src->dirty_index + 1; + recursed_self = copy; + } + } + + if(need_change) + { + auto copy = std::make_shared(*recursed_self); + copy->visible = target_visible; + copy->dirty_index = recursed_self->dirty_index + 1; + return copy; + } + return recursed_self; + } + + const bool keep_self = ctx.invert ? !self_matches : self_matches; + + // In modes other than PassThrough: if this node doesn't match AND + // no descendant does, drop the whole subtree. + if(ctx.mode != SceneGraphFilter::PassThrough && !keep_self + && !subtree_has_match(*src, path)) + return nullptr; + + // If no filtering is active (mode 0) and we reach here, share. + if(ctx.mode == SceneGraphFilter::PassThrough) + return src; + + // Recurse into children, rebuilding the payload list. + if(!src->has_children()) + return keep_self ? src : nullptr; + + auto new_children + = std::make_shared>(); + new_children->reserve(src->children->size()); + bool any_dropped = false; + for(const auto& payload : *src->children) + { + if(auto* sub = ossia::get_if(&payload)) + { + if(!*sub) + { + new_children->push_back(payload); + continue; + } + std::string childPath = path + '/' + (*sub)->name; + auto rw = rewrite(*sub, childPath); + if(rw) + new_children->push_back(rw); + else + any_dropped = true; + } + else + { + // Non-scene_node payloads (meshes, lights, transforms, etc.) + // follow the node they're on: keep iff the node was kept. + if(keep_self) + new_children->push_back(payload); + else + any_dropped = true; + } + } + + if(!keep_self && new_children->empty()) + return nullptr; // nothing survived; drop the node wrapper too + + // Share-if-unchanged: when no child was rewritten AND no child + // was dropped AND the node itself is kept, just return the + // original pointer. + if(!any_dropped && new_children->size() == src->children->size()) + { + bool identical = true; + for(std::size_t i = 0; i < new_children->size(); ++i) + { + if(auto* a = ossia::get_if(&(*new_children)[i])) + { + auto* b = ossia::get_if( + &(*src->children)[i]); + if(!b || a->get() != b->get()) + { + identical = false; + break; + } + } + } + if(identical) + return src; + } + + auto copy = std::make_shared(*src); + copy->children = std::move(new_children); + copy->dirty_index = src->dirty_index + 1; + return copy; + } +}; + +} // namespace + +void SceneGraphFilter::rebuild() +{ + const auto& in = inputs.scene_in.scene; + if(!in.state) + { + m_cached_out.reset(); + m_pending_dirty = 0; + return; + } + + const auto* in_state = in.state.get(); + const int64_t in_version = in.state->version; + + // PassThrough is the free path. + if(inputs.mode.value == PassThrough) + { + m_cached_out = in.state; + m_cached_in_state = in_state; + m_cached_in_version = in_version; + m_pending_dirty = 0xFF; + return; + } + + FilterCtx ctx{ + .mode = Mode(inputs.mode.value), + .invert = inputs.invert.value, + .component = Component(inputs.component.value), + .paths = inputs.paths.value, + .names = inputs.names.value, + .material_tags = inputs.material_tags.value, + .state = in.state.get(), + .alpha_mode = AlphaMode(inputs.alpha_mode.value), + .purpose = Purpose(inputs.purpose.value), + .caster_flag = inputs.caster_flag.value, + .prop_key = inputs.prop_key.value, + .prop_op = PropertyOp(inputs.prop_op.value), + .prop_value = inputs.prop_value.value}; + + Walker w{ctx}; + auto new_roots + = std::make_shared>(); + if(in.state->roots) + { + new_roots->reserve(in.state->roots->size()); + for(const auto& r : *in.state->roots) + { + if(auto rw = w.rewrite(r, r ? ("/" + r->name) : std::string{})) + new_roots->push_back(std::move(rw)); + } + } + + auto new_state = std::make_shared(*in.state); + new_state->roots = std::move(new_roots); + new_state->version = ++m_version_counter; + new_state->dirty_index = in.state->dirty_index + 1; + + m_cached_out = std::move(new_state); + m_cached_in_state = in_state; + m_cached_in_version = in_version; + m_pending_dirty = 0xFF; +} + +void SceneGraphFilter::operator()() +{ + const auto* in_state = inputs.scene_in.scene.state.get(); + const int64_t in_version = in_state ? in_state->version : -1; + const bool upstream_changed + = m_cached_in_state != in_state || m_cached_in_version != in_version; + if(upstream_changed || (!m_cached_out && in_state)) + rebuild(); + outputs.scene_out.scene.state = m_cached_out; + outputs.scene_out.dirty = m_pending_dirty; + m_pending_dirty = 0; +} + +} // namespace Threedim diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneGraphFilter.hpp b/src/plugins/score-plugin-threedim/Threedim/SceneGraphFilter.hpp new file mode 100644 index 0000000000..c54de80f9f --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/SceneGraphFilter.hpp @@ -0,0 +1,227 @@ +#pragma once +#include +#include + +#include + +#include +#include +#include +#include + +namespace Threedim +{ + +// Scene-graph filter. Takes a scene in, emits a scene out whose node +// tree has been culled/pruned by a predicate selected via `mode`. The +// dropped nodes and their descendants are excluded from flattening +// downstream. +// +// Predicates run against each scene_node during the walk. Subtrees +// whose nodes all match are returned by shared_ptr identity (no +// cloning) so downstream caches stay warm on untouched branches. +// +// Path syntax: slash-joined scene_node::name chain from roots, glob +// wildcards (`*` matches anything except `/`, `**` matches across +// slashes). Example: `/*/Wheels/**` includes everything under any +// root whose first-level child is named "Wheels". +class SceneGraphFilter +{ +public: + halp_meta(name, "Scene Graph Filter") + halp_meta(category, "Visuals/3D/Scene") + halp_meta(c_name, "scene_graph_filter") + halp_meta(authors, "ossia team") + halp_meta( + manual_url, + "https://ossia.io/score-docs/processes/scene-graph-filter.html") + halp_meta(uuid, "3c7e9a5d-2f4b-4e6c-8b1a-0d5f7e3a9c8b") + + enum Mode + { + PassThrough, + VisibleOnly, + ByPath, + ByName, + ByComponent, + ByMaterialTag, + // SetVisibility mode: matching nodes have their `visible` flag + // flipped to !Invert (Invert=false → hidden, Invert=true → shown). + // Non-matching nodes kept untouched. Unlike the filter modes above + // this DOESN'T drop nodes — they stay in the tree so downstream + // material / transform / light data is preserved, just + // render-invisible. + SetVisibility, + + // Schema-field predicates. Operate on well-known + // material_component / scene_node fields — no string hashing, + // no glob. Each mode reads one field and compares against the + // inline control. + ByAlphaMode, // material.alpha == (selected enum) + ByShadowCaster, // material.shadow_caster == (selected bool) + ByReflectionCaster, // material.reflection_caster == (selected bool) + ByPurpose, // scene_node.purpose == (selected enum) + + // Property-dict predicates. Read scene_node::properties or + // material_component::properties by key and compare against a + // literal. Value type is inferred from the control (string/float/ + // int). Useful for user-authored metadata — USD extra attributes, + // glTF material.extras JSON, custom layer tags. + ByNodeProperty, // scene_node.properties[key] matches value + ByMaterialProperty // material.properties[key] matches value + }; + + enum Component + { + Mesh, + Light, + Camera, + Instance, + Skeleton + }; + + enum AlphaMode + { + AlphaOpaque = 0, + AlphaMask = 1, + AlphaBlend = 2 + }; + + enum Purpose + { + PurposeDefault = 0, + PurposeRender = 1, + PurposeProxy = 2, + PurposeGuide = 3 + }; + + // Operator for property matches — extends beyond string-glob to + // support numeric thresholds without a full predicate-DSL rollout. + enum PropertyOp + { + PropEqual, + PropNotEqual, + PropLessThan, + PropGreaterThan, + PropContains // substring match when value is string + }; + + struct ins + { + struct + { + halp_meta(name, "Scene In"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_in; + + // Port-driven rebuild: controls trigger rebuild() via update(); + // upstream scene_in changes detected in operator()(). + struct : halp::combobox_t<"Mode", Mode> + { + struct range + { + std::string_view values[13]{ + "Pass through", "Visible only", "By path", "By name", + "By component", "By material tag", "Set visibility", + "By alpha mode", "By shadow caster", "By reflection caster", + "By purpose", "By node property", "By material property"}; + int init{0}; + }; + void update(SceneGraphFilter& n) { n.rebuild(); } + } mode; + + // When true: drop nodes that match the predicate (the list acts + // as an exclude filter). When false (default): keep matching + // nodes, drop the rest. + struct : halp::toggle<"Invert"> + { void update(SceneGraphFilter& n) { n.rebuild(); } } invert; + + // List inlets — user edits inline in the inspector. A halp + // `val_port>` renders an editable N-row widget. + // Each mode uses the relevant list; others are ignored. + struct : halp::val_port<"Paths", std::vector> + { void update(SceneGraphFilter& n) { n.rebuild(); } } paths; + struct : halp::val_port<"Names", std::vector> + { void update(SceneGraphFilter& n) { n.rebuild(); } } names; + struct : halp::val_port<"Material tags", std::vector> + { void update(SceneGraphFilter& n) { n.rebuild(); } } material_tags; + + struct : halp::combobox_t<"Component", Component> + { + struct range + { + std::string_view values[5]{ + "Mesh", "Light", "Camera", "Instance", "Skeleton"}; + int init{0}; + }; + void update(SceneGraphFilter& n) { n.rebuild(); } + } component; + + // Schema-field selectors. Unused in most modes; each dropdown is + // read only by its corresponding Mode. + struct : halp::combobox_t<"Alpha mode", AlphaMode> + { + struct range + { std::string_view values[3]{"Opaque", "Mask", "Blend"}; int init{0}; }; + void update(SceneGraphFilter& n) { n.rebuild(); } + } alpha_mode; + + struct : halp::combobox_t<"Purpose", Purpose> + { + struct range + { + std::string_view values[4]{"Default", "Render", "Proxy", "Guide"}; + int init{0}; + }; + void update(SceneGraphFilter& n) { n.rebuild(); } + } purpose; + + struct : halp::toggle<"Caster flag"> + { void update(SceneGraphFilter& n) { n.rebuild(); } } caster_flag; + + // Property-match inputs (ByNodeProperty / ByMaterialProperty). + // Key + operator + literal; value parsed as float when numeric, + // string otherwise. Missing keys never match (predicate false). + struct : halp::val_port<"Property key", std::string> + { void update(SceneGraphFilter& n) { n.rebuild(); } } prop_key; + + struct : halp::combobox_t<"Property op", PropertyOp> + { + struct range + { + std::string_view values[5]{ + "equal", "not equal", "less than", "greater than", + "contains (string)"}; + int init{0}; + }; + void update(SceneGraphFilter& n) { n.rebuild(); } + } prop_op; + + struct : halp::val_port<"Property value", std::string> + { void update(SceneGraphFilter& n) { n.rebuild(); } } prop_value; + } inputs; + + struct outs + { + struct + { + halp_meta(name, "Scene Out"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_out; + } outputs; + + void rebuild(); + void operator()(); + + // Cache the last emitted scene_state so unchanged inputs don't churn + // downstream identity caches. + std::shared_ptr m_cached_out; + uint8_t m_pending_dirty{0xFF}; + const ossia::scene_state* m_cached_in_state{}; + int64_t m_cached_in_version{-1}; + int64_t m_version_counter{0}; +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneGroup.cpp b/src/plugins/score-plugin-threedim/Threedim/SceneGroup.cpp new file mode 100644 index 0000000000..972edf985a --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/SceneGroup.cpp @@ -0,0 +1,235 @@ +#include "SceneGroup.hpp" + +#include +#include + +#include + +#include +#include + +namespace Threedim +{ + +namespace +{ +// Concatenate a shared vector from two nullable inputs while deduping +// by shared_ptr identity. Reuses the lone non-null input's shared_ptr +// when only one contributes — the same identity-preserving passthrough +// merge_scenes does. When both contribute, an entry from `b` is dropped +// when its underlying object pointer already appeared in `a`. This is +// the SceneGroup safety net for users who wire the same upstream to +// more than one of the four input slots: each slot would otherwise +// contribute the same component vectors and the downstream visitor +// would walk every cloud / mesh / light N times. +template +std::shared_ptr> mergeSharedVec( + const std::shared_ptr>& a, + const std::shared_ptr>& b) +{ + if(!a || a->empty()) + return b; + if(!b || b->empty()) + return a; + // Same shared_ptr-vector instance on both sides: nothing to dedup, + // return one copy. Cheaper than building a fresh vector + ptr_set. + if(a == b) + return a; + auto merged = std::make_shared>(); + merged->reserve(a->size() + b->size()); + ossia::ptr_set seen; + for(const auto& x : *a) + { + if(x && seen.insert(x.get()).second) + merged->push_back(x); + } + for(const auto& x : *b) + { + if(x && seen.insert(x.get()).second) + merged->push_back(x); + } + return merged; +} +} // namespace + +void SceneGroup::rebuild() +{ + const ossia::scene_spec* inputs_list[4] = { + &inputs.scene0.scene, &inputs.scene1.scene, + &inputs.scene2.scene, &inputs.scene3.scene}; + + // Refresh upstream identity cache (used by operator()() to detect + // changes) and TRS / name caches. + for(int i = 0; i < 4; ++i) + { + const ossia::scene_state* s = inputs_list[i]->state.get(); + int64_t v = s ? s->version : -1; + m_cached_in[i] = s; + m_cached_ver[i] = v; + } + // Collect roots from all non-empty inputs; also concat materials / + // animations / cameras / skeletons additively. Dedup roots by + // shared_ptr identity — wiring the same upstream into more than one + // SceneGroup input slot is a common authoring shape (especially when + // a user re-uses an AssetLoader output to position the same asset in + // multiple slots), and without this the same scene_node would land + // in the parent's children list four times. The downstream + // ScenePreprocessor visitor would then walk it four times and emit + // four cloud-bucket entries, quadrupling the GPU upload of every + // primitive_cloud / mesh / light reachable through that root. + auto merged_roots + = std::make_shared>(); + ossia::ptr_set seen_roots; + std::shared_ptr> mats; + std::shared_ptr> anims; + std::shared_ptr> cams; + std::shared_ptr> skels; + ossia::scene_environment env{}; + ossia::scene_node_id active_cam{}; + + for(int i = 0; i < 4; ++i) + { + const auto& s = inputs_list[i]->state; + if(!s) + continue; + if(s->roots) + for(const auto& r : *s->roots) + if(r && seen_roots.insert(r.get()).second) + merged_roots->push_back(r); + mats = mergeSharedVec(mats, s->materials); + anims = mergeSharedVec(anims, s->animations); + cams = mergeSharedVec(cams, s->cameras); + skels = mergeSharedVec(skels, s->skeletons); + // First contributor's environment + active_camera wins. + if(i == 0 || !env.skybox_texture.native_handle) + env = s->environment; + if(active_cam.value == 0 && s->active_camera_id.value != 0) + active_cam = s->active_camera_id; + } + + // Build the wrapping parent node. + ossia::scene_transform xform; + xform.translation[0] = inputs.position.value.x; + xform.translation[1] = inputs.position.value.y; + xform.translation[2] = inputs.position.value.z; + auto q = QQuaternion::fromEulerAngles( + inputs.rotation.value.x, inputs.rotation.value.y, + inputs.rotation.value.z); + xform.rotation[0] = q.x(); + xform.rotation[1] = q.y(); + xform.rotation[2] = q.z(); + xform.rotation[3] = q.scalar(); + xform.scale[0] = inputs.scale.value.x; + xform.scale[1] = inputs.scale.value.y; + xform.scale[2] = inputs.scale.value.z; + xform.raw_slot = m_xform_ref; + + auto children + = std::make_shared>(); + children->reserve(merged_roots->size() + 1); + children->push_back(xform); + for(auto& r : *merged_roots) + children->push_back(r); + + auto parent = std::make_shared(); + parent->name + = inputs.name.value.empty() ? std::string{"Group"} + : inputs.name.value; + parent->children = std::move(children); + + auto roots = std::make_shared>(); + roots->push_back(std::move(parent)); + + auto state = std::make_shared(); + state->roots = std::move(roots); + state->materials = std::move(mats); + state->animations = std::move(anims); + state->cameras = std::move(cams); + state->skeletons = std::move(skels); + state->environment = std::move(env); + state->active_camera_id = active_cam; + state->version = ++m_version_counter; + state->dirty_index = 1; + + m_cached_out = std::move(state); + m_pending_dirty = 0xFF; +} + +void SceneGroup::operator()() +{ + // Detect upstream scene inputs + republish cached. Control changes + // come through their update() callbacks. + const ossia::scene_spec* inputs_list[4] = { + &inputs.scene0.scene, &inputs.scene1.scene, + &inputs.scene2.scene, &inputs.scene3.scene}; + bool upstream_changed = false; + for(int i = 0; i < 4; ++i) + { + const auto* s = inputs_list[i]->state.get(); + const int64_t v = s ? s->version : -1; + if(m_cached_in[i] != s || m_cached_ver[i] != v) + upstream_changed = true; + } + if(!m_cached_out || upstream_changed) + rebuild(); + outputs.scene_out.scene.state = m_cached_out; + outputs.scene_out.dirty = m_pending_dirty; + m_pending_dirty = 0; +} + +// Order invariant: called by GfxRenderer::initState BEFORE the first +// operator()() and BEFORE processControlIn fires any rebuild() callback. +// m_xform_ref populated here is therefore safe to read in rebuild() +// without a guard. Adding prepare() to this node breaks the invariant — +// see CpuFilterNode.hpp for details. +void SceneGroup::init(score::gfx::RenderList& r, QRhiResourceUpdateBatch& res) +{ + if(!raw_transform_slot.valid()) + { + raw_transform_slot = r.registry().allocate( + score::gfx::GpuResourceRegistry::Arena::RawTransform, + sizeof(score::gfx::RawLocalTransform)); + m_xform_ref = r.registry().toOssiaRef(raw_transform_slot); + } + if(raw_transform_slot.valid()) + { + score::gfx::RawLocalTransform seed{}; + r.registry().updateSlot(res, raw_transform_slot, &seed, sizeof(seed)); + } +} + +void SceneGroup::update( + score::gfx::RenderList& r, QRhiResourceUpdateBatch& res, score::gfx::Edge*) +{ + if(!raw_transform_slot.valid()) + return; + + score::gfx::RawLocalTransform xform{}; + xform.translation[0] = inputs.position.value.x; + xform.translation[1] = inputs.position.value.y; + xform.translation[2] = inputs.position.value.z; + QQuaternion q = QQuaternion::fromEulerAngles( + inputs.rotation.value.x, inputs.rotation.value.y, + inputs.rotation.value.z); + xform.rotation[0] = q.x(); + xform.rotation[1] = q.y(); + xform.rotation[2] = q.z(); + xform.rotation[3] = q.scalar(); + xform.scale[0] = inputs.scale.value.x; + xform.scale[1] = inputs.scale.value.y; + xform.scale[2] = inputs.scale.value.z; + r.registry().updateSlot(res, raw_transform_slot, &xform, sizeof(xform)); +} + +void SceneGroup::release(score::gfx::RenderList& r) +{ + if(raw_transform_slot.valid()) + r.registry().free(raw_transform_slot); + m_xform_ref = {}; + // Producer-state-drift Option A — see Light::release. + m_cached_out.reset(); + for(auto& in : m_cached_in) + in = nullptr; +} + +} // namespace Threedim diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneGroup.hpp b/src/plugins/score-plugin-threedim/Threedim/SceneGroup.hpp new file mode 100644 index 0000000000..d49de27fb4 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/SceneGroup.hpp @@ -0,0 +1,93 @@ +#pragma once +#include +#include +#include + +#include + +#include + +#include + +#include +#include +#include + +class QRhiResourceUpdateBatch; + +namespace score::gfx +{ +class RenderList; +struct Edge; +} + +namespace Threedim +{ + +// Wraps up to 4 scene inputs under a single named parent scene_node +// with its own TRS. The group name becomes addressable by downstream +// filters / overrides via the usual path pattern — so +// `SceneGroup(name="ProsceniumSet")` + +// `SceneGraphFilter(paths=["/ProsceniumSet/**"])` is the canonical +// "bundle and tag a subset" pattern. +// +// Materials / animations / cameras / skeletons / environment are +// merged additively from all inputs (first-wins on singletons like +// active_camera_id and environment), same convention as +// MergeGeometries / merge_scenes. +class SceneGroup +{ +public: + halp_meta(name, "Scene Group") + halp_meta(category, "Visuals/3D/Scene") + halp_meta(c_name, "scene_group") + halp_meta(authors, "ossia team") + halp_meta( + manual_url, + "https://ossia.io/score-docs/processes/scene-group.html") + halp_meta(uuid, "8a3b5e2d-7c4f-4b9e-9d1a-6f8e2c5d3a7b") + + struct ins + { + struct { halp_meta(name, "Scene 0"); ossia::scene_spec scene; uint8_t dirty{0}; } scene0; + struct { halp_meta(name, "Scene 1"); ossia::scene_spec scene; uint8_t dirty{0}; } scene1; + struct { halp_meta(name, "Scene 2"); ossia::scene_spec scene; uint8_t dirty{0}; } scene2; + struct { halp_meta(name, "Scene 3"); ossia::scene_spec scene; uint8_t dirty{0}; } scene3; + + // Port-driven rebuild: controls trigger rebuild(); upstream scene + // inputs detected in operator()(). + struct : halp::lineedit<"Name", ""> + { void update(SceneGroup& n) { n.rebuild(); } } name; + struct : PositionControl + { void update(SceneGroup& n) { n.rebuild(); } } position; + struct : RotationControl + { void update(SceneGroup& n) { n.rebuild(); } } rotation; + struct : ScaleControl + { void update(SceneGroup& n) { n.rebuild(); } } scale; + } inputs; + + struct outs + { + struct { halp_meta(name, "Scene Out"); ossia::scene_spec scene; uint8_t dirty{0}; } scene_out; + } outputs; + + void rebuild(); + void operator()(); + + void init(score::gfx::RenderList& r, QRhiResourceUpdateBatch& res); + void update( + score::gfx::RenderList& r, QRhiResourceUpdateBatch& res, + score::gfx::Edge* e); + void release(score::gfx::RenderList& r); + + std::shared_ptr m_cached_out; + uint8_t m_pending_dirty{0xFF}; + const ossia::scene_state* m_cached_in[4]{}; + int64_t m_cached_ver[4]{-1, -1, -1, -1}; + int64_t m_version_counter{0}; + + score::gfx::GpuResourceRegistry::Slot raw_transform_slot; + ossia::gpu_slot_ref m_xform_ref{}; +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneInspector.cpp b/src/plugins/score-plugin-threedim/Threedim/SceneInspector.cpp new file mode 100644 index 0000000000..04355ca1e4 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/SceneInspector.cpp @@ -0,0 +1,395 @@ +#include "SceneInspector.hpp" + +#include + +#include + +namespace Threedim +{ + +namespace +{ + +struct ComponentFlags +{ + bool mesh{false}; + bool light{false}; + bool camera{false}; + bool skeleton{false}; + bool instance{false}; + bool transform{false}; + std::string material_tag; // First mesh primitive's material tag (or empty) + int vertex_count{0}; // Summed across all mesh primitives + int triangle_count{0}; + + // Space-separated compact tag string, e.g. "[mesh][trans][mat=3][v=1024 t=512]" + std::string tags(bool show_components, bool show_stats) const + { + std::string out; + if(show_components) + { + if(mesh) out += "[mesh]"; + if(light) out += "[light]"; + if(camera) out += "[cam]"; + if(skeleton) out += "[skel]"; + if(instance) out += "[inst]"; + if(transform) out += "[trans]"; + } + if(show_stats) + { + if(mesh && (vertex_count > 0 || triangle_count > 0)) + out += fmt::format("[v={} t={}]", vertex_count, triangle_count); + if(mesh && !material_tag.empty()) + out += fmt::format("[mat={}]", material_tag); + } + return out; + } +}; + +// Scan this node's DIRECT children for non-scene-node payloads and +// record which kinds appear. Meshes additionally contribute their +// vertex/triangle counts for the stats output. +ComponentFlags detectComponents(const ossia::scene_node& node) noexcept +{ + ComponentFlags f; + if(!node.has_children()) + return f; + for(const auto& payload : *node.children) + { + if(auto* m = ossia::get_if(&payload)) + { + if(*m) + { + f.mesh = true; + for(const auto& prim : (*m)->primitives) + { + f.vertex_count += int(prim.vertex_count); + // Source primitive count for this topology. index_count == 0 + // means non-indexed; fall back to vertex_count. + const int ic = int(prim.index_count); + const int n = (ic > 0 ? ic : int(prim.vertex_count)); + switch(prim.topology) + { + using T = ossia::primitive_topology; + case T::points: + f.triangle_count += n; + break; + case T::lines: + f.triangle_count += n / 2; + break; + case T::line_strip: + f.triangle_count += std::max(0, n - 1); + break; + case T::triangles: + f.triangle_count += n / 3; + break; + case T::triangle_strip: + case T::triangle_fan: + f.triangle_count += std::max(0, n - 2); + break; + case T::patches: + case T::meshlets: + // Not a "primitive count" in the user sense; skip. + break; + } + if(f.material_tag.empty() && prim.material) + f.material_tag = prim.material->tag; + } + } + } + else if(ossia::get_if(&payload)) + f.light = true; + else if(ossia::get_if(&payload)) + f.camera = true; + else if(ossia::get_if(&payload)) + f.skeleton = true; + else if(ossia::get_if(&payload)) + f.instance = true; + else if(ossia::get_if(&payload)) + f.transform = true; + } + return f; +} + +// ───── Walker ──────────────────────────────────────────────────────── +// +// Accumulates rows + readable + running stats. Called recursively on +// the scene_node subtree; handles Paths/Names/Tree modes inline so the +// tree glyphs are emitted at the right place (only Tree mode uses +// indentation prefixes, Paths and Names emit flat rows). + +struct State +{ + SceneInspector::Mode mode; + bool show_components; + bool show_stats; + bool include_hidden; + int max_depth; // -1 = unlimited + + std::vector* rows; + std::string* readable; + + // Running stats. + int node_count{0}; + int mesh_count{0}; + int light_count{0}; + int camera_count{0}; + int total_vertices{0}; + int total_triangles{0}; +}; + +// Emit a single row for `node` in the current mode. `path` is the +// canonical slash-path from the root; `tree_prefix` is the box-drawing +// indentation used only in Tree mode (e.g., "│ ├── "). +void emitRow( + State& s, const ossia::scene_node& node, const std::string& path, + const std::string& tree_prefix) +{ + auto comp = detectComponents(node); + + // Update running stats. + s.node_count++; + if(comp.mesh) + s.mesh_count++; + if(comp.light) + s.light_count++; + if(comp.camera) + s.camera_count++; + s.total_vertices += comp.vertex_count; + s.total_triangles += comp.triangle_count; + + const std::string tag_suffix = comp.tags(s.show_components, s.show_stats); + const char* hidden_suffix = (!node.visible) ? "[hidden]" + : (!node.active) ? "[inactive]" + : ""; + + std::string row; + switch(s.mode) + { + case SceneInspector::Paths: + row = path.empty() ? std::string("/") : path; + if(!tag_suffix.empty()) + { + row += ' '; + row += tag_suffix; + } + if(*hidden_suffix) + { + row += ' '; + row += hidden_suffix; + } + break; + case SceneInspector::Names: + row = node.name.empty() ? std::string{"(unnamed)"} : node.name; + if(!tag_suffix.empty()) + { + row += ' '; + row += tag_suffix; + } + if(*hidden_suffix) + { + row += ' '; + row += hidden_suffix; + } + break; + case SceneInspector::Tree: + row = tree_prefix; + row += node.name.empty() ? std::string{"(unnamed)"} : node.name; + if(!tag_suffix.empty()) + { + row += ' '; + row += tag_suffix; + } + if(*hidden_suffix) + { + row += ' '; + row += hidden_suffix; + } + break; + case SceneInspector::Summary: + // Summary mode emits roots only at top level; leaves handled by + // the outer walker. Skip here. + return; + } + + s.rows->push_back(row); + *s.readable += row; + *s.readable += '\n'; +} + +// Depth-first walk. `depth` is 0 at the root. `prefix_trunk` is the +// continuation prefix inherited from ancestors ("│ " for "still more +// siblings on that ancestor", " " for "ancestor was last child"). +// `is_last_child` is whether this node is its parent's last child — +// controls the ├── vs └── glyph. +void walk( + State& s, const ossia::scene_node_ptr& node, const std::string& path, + const std::string& prefix_trunk, bool is_last_child, int depth) +{ + if(!node) + return; + if(!s.include_hidden && (!node->active || !node->visible)) + return; + if(s.max_depth >= 0 && depth > s.max_depth) + return; + + // Tree-mode glyph for this node. + std::string tree_prefix; + if(s.mode == SceneInspector::Tree && depth > 0) + tree_prefix = prefix_trunk + (is_last_child ? "└── " : "├── "); + // depth == 0 (root) gets no glyph — it stands alone. + + emitRow(s, *node, path, tree_prefix); + + if(!node->has_children()) + return; + + // Collect the subset of children that are scene_node_ptrs (we only + // recurse into those; scene_transform / component payloads have + // already been folded into the parent's row via detectComponents). + std::vector child_nodes; + child_nodes.reserve(node->children->size()); + for(const auto& p : *node->children) + { + if(auto* sub = ossia::get_if(&p)) + if(*sub) + child_nodes.push_back(sub); + } + + const std::string next_trunk = (depth == 0) + ? std::string{} + : (prefix_trunk + (is_last_child ? " " : "│ ")); + + for(std::size_t i = 0; i < child_nodes.size(); ++i) + { + const bool last = (i + 1 == child_nodes.size()); + const auto& sub = *child_nodes[i]; + std::string childPath = path + '/' + sub->name; + walk(s, sub, childPath, next_trunk, last, depth + 1); + } +} + +} // namespace + +void SceneInspector::operator()() +{ + const auto& in = inputs.scene_in.scene; + const ossia::scene_state* in_state = in.state.get(); + const int64_t in_version = in_state ? in_state->version : -1; + + const bool unchanged + = m_cached_valid && m_cached_in_state == in_state + && m_cached_in_version == in_version + && m_cached_mode == inputs.mode.value + && m_cached_show_components == inputs.show_components.value + && m_cached_show_stats == inputs.show_stats.value + && m_cached_include_hidden == inputs.include_hidden.value + && m_cached_max_depth == inputs.max_depth.value; + if(unchanged) + return; + + m_cached_in_state = in_state; + m_cached_in_version = in_version; + m_cached_mode = inputs.mode.value; + m_cached_show_components = inputs.show_components.value; + m_cached_show_stats = inputs.show_stats.value; + m_cached_include_hidden = inputs.include_hidden.value; + m_cached_max_depth = inputs.max_depth.value; + m_cached_valid = true; + + auto& rows = outputs.rows.value; + auto& readable = outputs.readable.value; + rows.clear(); + readable.clear(); + + outputs.node_count.value = 0; + outputs.mesh_count.value = 0; + outputs.light_count.value = 0; + outputs.camera_count.value = 0; + outputs.material_count.value = 0; + outputs.total_vertices.value = 0; + outputs.total_triangles.value = 0; + + if(!in_state) + { + rows.push_back("(empty scene)"); + readable = "(empty scene)\n"; + return; + } + + // Material count comes straight from the state. + outputs.material_count.value + = in_state->materials ? int(in_state->materials->size()) : 0; + + State s{ + Mode(inputs.mode.value), + inputs.show_components.value, + inputs.show_stats.value, + inputs.include_hidden.value, + inputs.max_depth.value, + &rows, + &readable, + 0, 0, 0, 0, 0, 0}; + + if(inputs.mode.value == Summary) + { + // Summary: one block per root with aggregate stats, plus a global + // materials section + active camera if set. + if(in_state->roots) + { + fmt::format_to( + std::back_inserter(readable), "Scene: {} root(s)\n", + in_state->roots->size()); + for(const auto& r : *in_state->roots) + { + if(!r) + continue; + State local = s; + local.rows = &rows; + local.readable = &readable; + walk(local, r, "/" + r->name, std::string{}, true, 0); + s.node_count += local.node_count; + s.mesh_count += local.mesh_count; + s.light_count += local.light_count; + s.camera_count += local.camera_count; + s.total_vertices += local.total_vertices; + s.total_triangles += local.total_triangles; + } + } + std::string hdr = fmt::format( + "== Scene Summary ==\n" + " nodes: {}\n" + " meshes: {}\n" + " lights: {}\n" + " cameras: {}\n" + " materials: {}\n" + " vertices: {}\n" + " triangles: {}\n", + s.node_count, s.mesh_count, s.light_count, s.camera_count, + outputs.material_count.value, s.total_vertices, s.total_triangles); + readable.insert(0, hdr); + rows.insert(rows.begin(), std::move(hdr)); + } + else + { + if(in_state->roots) + { + for(const auto& r : *in_state->roots) + { + if(!r) + continue; + const std::string rootPath = "/" + r->name; + walk(s, r, rootPath, std::string{}, true, 0); + } + } + } + + outputs.node_count.value = s.node_count; + outputs.mesh_count.value = s.mesh_count; + outputs.light_count.value = s.light_count; + outputs.camera_count.value = s.camera_count; + outputs.total_vertices.value = s.total_vertices; + outputs.total_triangles.value = s.total_triangles; +} + +} // namespace Threedim diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneInspector.hpp b/src/plugins/score-plugin-threedim/Threedim/SceneInspector.hpp new file mode 100644 index 0000000000..98a5e883c4 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/SceneInspector.hpp @@ -0,0 +1,105 @@ +#pragma once +#include +#include + +#include + +#include +#include +#include + +namespace Threedim +{ + +// Read-only introspection node for scene_spec. Walks the incoming +// scene tree and emits: +// - `Rows`: a list of strings, one per node. In Paths mode each row +// is a canonical slash-path (`/Root/Body/Wheels`) you can copy +// directly into SceneGraphFilter(paths=...) / ConfigurePrimitive / +// SceneSelector. In Tree mode each row is indented with +// box-drawing glyphs for visual hierarchy. In Names mode each row +// is a bare node name. +// - `Readable`: a formatted multi-line dump of the same information, +// suitable to pipe into Ui::TextBox for a wider-view inspector. +// - Scalar stats: node / mesh / light / camera / material counts +// plus totalled triangle and vertex counts. +// +// Bridges the "what paths exist in this scene?" question that was +// previously unanswerable from the user's side — filter/selector +// nodes need string patterns, and without a way to enumerate the +// tree the user has to guess. Drop this node between a loader and a +// filter, read the Rows list, paste the path you want into the +// downstream node. +class SceneInspector +{ +public: + halp_meta(name, "Scene Inspector") + halp_meta(category, "Visuals/3D/Scene") + halp_meta(c_name, "scene_inspector") + halp_meta(authors, "ossia team") + halp_meta( + manual_url, + "https://ossia.io/score-docs/processes/scene-inspector.html") + halp_meta(uuid, "b5f2c8a3-4d1e-4b7f-9e6c-3a8d5f0b2c9e") + + enum Mode + { + Paths, // canonical slash-paths, directly copy-pasteable + Names, // bare node names (may have duplicates) + Tree, // indented with ├──/└── glyphs + Summary // high-level per-root summary + counts + }; + + struct ins + { + struct + { + halp_meta(name, "Scene In"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_in; + + struct : halp::combobox_t<"Mode", Mode> + { + struct range + { + std::string_view values[4]{"Paths", "Names", "Tree", "Summary"}; + int init{0}; + }; + } mode; + + halp::toggle<"Show components"> show_components; + halp::toggle<"Show stats"> show_stats; + halp::toggle<"Include hidden"> include_hidden; + halp::spinbox_i32<"Max depth", halp::irange{-1, 64, -1}> max_depth; + } inputs; + + struct outs + { + halp::val_port<"Rows", std::vector> rows; + halp::val_port<"Readable", std::string> readable; + + halp::val_port<"Node count", int> node_count; + halp::val_port<"Mesh count", int> mesh_count; + halp::val_port<"Light count", int> light_count; + halp::val_port<"Camera count", int> camera_count; + halp::val_port<"Material count", int> material_count; + halp::val_port<"Total triangles", int> total_triangles; + halp::val_port<"Total vertices", int> total_vertices; + } outputs; + + void operator()(); + + // Identity + version cache: if inputs haven't changed we skip the + // whole walk. Matches the pattern used by SceneGraphFilter etc. + const ossia::scene_state* m_cached_in_state{}; + int64_t m_cached_in_version{-1}; + int m_cached_mode{-1}; + bool m_cached_show_components{false}; + bool m_cached_show_stats{false}; + bool m_cached_include_hidden{false}; + int m_cached_max_depth{-2}; + bool m_cached_valid{false}; +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/ScenePreprocessor/Executor.cpp b/src/plugins/score-plugin-threedim/Threedim/ScenePreprocessor/Executor.cpp new file mode 100644 index 0000000000..9acf7a5c23 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/ScenePreprocessor/Executor.cpp @@ -0,0 +1,65 @@ +#include "Executor.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +namespace Gfx::ScenePreprocessor +{ +class scene_preprocessor_exec_node final : public gfx_exec_node +{ +public: + scene_preprocessor_exec_node(GfxExecutionAction& ctx) + : gfx_exec_node{ctx} + { + } + + void init() + { + auto node = std::make_unique(); + id = exec_context->ui->register_node(std::move(node)); + } + + ~scene_preprocessor_exec_node() { exec_context->ui->unregister_node(id); } + + std::string label() const noexcept override { return "Gfx::ScenePreprocessor_node"; } +}; + +ProcessExecutorComponent::ProcessExecutorComponent( + Gfx::ScenePreprocessor::Model& element, + const Execution::Context& ctx, + QObject* parent) + : ProcessComponent_T{element, ctx, "scenePreprocessorComponent", parent} +{ + auto n = ossia::make_node( + *ctx.execState, ctx.doc.plugin().exec); + + // Port 0: Scene input + n->add_geometry(); + // Single Geometry outlet — material-texture arrays (base_color, + // metal_rough, normal, emissive) and the skybox ride along as + // auxiliary_texture entries on the emitted geometry; scene-wide + // UBOs/SSBOs (camera, env, scene_lights/materials, per_draws, + // indirect, scene_counts) ride along as auxiliary_buffer entries. + // Consumer shaders bind everything by name. + n->add_geometry_out(); + + n->init(); + + this->node = n; + m_ossia_process = std::make_shared(n); +} + +void ProcessExecutorComponent::cleanup() +{ + ProcessComponent_T::cleanup(); +} +} diff --git a/src/plugins/score-plugin-threedim/Threedim/ScenePreprocessor/Executor.hpp b/src/plugins/score-plugin-threedim/Threedim/ScenePreprocessor/Executor.hpp new file mode 100644 index 0000000000..449087e5b1 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/ScenePreprocessor/Executor.hpp @@ -0,0 +1,24 @@ +#pragma once +#include + +#include + +namespace Gfx::ScenePreprocessor +{ +class Model; +class ProcessExecutorComponent final + : public Execution:: + ProcessComponent_T +{ + COMPONENT_METADATA("d7e2f8b4-9a3c-4e1b-8f6d-0c5a2b7e9f1d") +public: + ProcessExecutorComponent( + Model& element, + const Execution::Context& ctx, + QObject* parent); + void cleanup() override; +}; + +using ProcessExecutorComponentFactory + = Execution::ProcessComponentFactory_T; +} diff --git a/src/plugins/score-plugin-threedim/Threedim/ScenePreprocessor/Metadata.hpp b/src/plugins/score-plugin-threedim/Threedim/ScenePreprocessor/Metadata.hpp new file mode 100644 index 0000000000..422142fe8f --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/ScenePreprocessor/Metadata.hpp @@ -0,0 +1,22 @@ +#pragma once +#include + +namespace Gfx::ScenePreprocessor +{ +class Model; +} + +PROCESS_METADATA( + , Gfx::ScenePreprocessor::Model, "a8f2c6d0-1b4e-4c7a-9d3f-5e8b2c1a7f0d", + "scenepreprocessor", // Internal name + "Scene Preprocessor", // Pretty name + Process::ProcessCategory::Visual, // Category + "Visuals/3D/Scene", // Category + "Flattens a scene_spec hierarchy into a GPU-resident geometry_spec", // Description + "ossia team", // Author + (QStringList{"gfx", "scene", "geometry", "3d"}), // Tags + {}, // Inputs + {}, // Outputs + QUrl{}, // Doc url + Process::ProcessFlags::SupportsAll // Flags +) diff --git a/src/plugins/score-plugin-threedim/Threedim/ScenePreprocessor/Process.cpp b/src/plugins/score-plugin-threedim/Threedim/ScenePreprocessor/Process.cpp new file mode 100644 index 0000000000..804b887a5a --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/ScenePreprocessor/Process.cpp @@ -0,0 +1,83 @@ +#include "Process.hpp" + +#include + +#include +#include + +#include +#include + +#include + +W_OBJECT_IMPL(Gfx::ScenePreprocessor::Model) +namespace Gfx::ScenePreprocessor +{ + +Model::Model( + const TimeVal& duration, const Id& id, QObject* parent) + : Process::ProcessModel{duration, id, "gfxProcess", parent} +{ + metadata().setInstanceName(*this); + init(); +} + +Model::~Model() = default; + +void Model::init() +{ + if(m_inlets.empty() && m_outlets.empty()) + { + m_inlets.push_back(new GeometryInlet{"Scene In", Id(0), this}); + // Single Geometry Out — all material-texture arrays (base_color, + // metal_rough, normal, emissive), camera / env / scene UBOs and the + // environment skybox ride along as auxiliary_buffer / auxiliary_texture + // entries on the emitted geometry. Consumer shaders auto-resolve them + // by name via try_bind_from_geometry / try_bind_texture_from_geometry; + // no manual cable needed. + m_outlets.push_back(new GeometryOutlet{"Geometry Out", Id(0), this}); + } +} + +QString Model::prettyName() const noexcept +{ + return tr("Scene Preprocessor"); +} + +} + +template <> +void DataStreamReader::read(const Gfx::ScenePreprocessor::Model& proc) +{ + readPorts(*this, proc.m_inlets, proc.m_outlets); + insertDelimiter(); +} + +template <> +void DataStreamWriter::write(Gfx::ScenePreprocessor::Model& proc) +{ + writePorts( + *this, + components.interfaces(), + proc.m_inlets, + proc.m_outlets, + &proc); + checkDelimiter(); +} + +template <> +void JSONReader::read(const Gfx::ScenePreprocessor::Model& proc) +{ + readPorts(*this, proc.m_inlets, proc.m_outlets); +} + +template <> +void JSONWriter::write(Gfx::ScenePreprocessor::Model& proc) +{ + writePorts( + *this, + components.interfaces(), + proc.m_inlets, + proc.m_outlets, + &proc); +} diff --git a/src/plugins/score-plugin-threedim/Threedim/ScenePreprocessor/Process.hpp b/src/plugins/score-plugin-threedim/Threedim/ScenePreprocessor/Process.hpp new file mode 100644 index 0000000000..0cf96bf394 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/ScenePreprocessor/Process.hpp @@ -0,0 +1,37 @@ +#pragma once +#include +#include +#include +#include + +namespace Gfx::ScenePreprocessor +{ +class Model final : public Process::ProcessModel +{ + SCORE_SERIALIZE_FRIENDS + PROCESS_METADATA_IMPL(Gfx::ScenePreprocessor::Model) + W_OBJECT(Model) + +public: + Model( + const TimeVal& duration, + const Id& id, + QObject* parent); + + template + Model(Impl& vis, QObject* parent) + : Process::ProcessModel{vis, parent} + { + vis.writeTo(*this); + init(); + } + + ~Model() override; + +private: + void init(); + QString prettyName() const noexcept override; +}; + +using ProcessFactory = Process::ProcessFactory_T; +} diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneResourceRoute.cpp b/src/plugins/score-plugin-threedim/Threedim/SceneResourceRoute.cpp new file mode 100644 index 0000000000..44894ff71b --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/SceneResourceRoute.cpp @@ -0,0 +1,68 @@ +#include "SceneResourceRoute.hpp" + +namespace Threedim +{ + +void SceneResourceRoute::rebuild() +{ + if(!m_state) + m_state = std::make_shared(); + + // Reset the environment / shadow bits we own. Partial-producer + // contract: this node contributes exactly one field; everything else + // stays at defaults and gets filtered out by merge_scenes' per-field + // overlay (it only picks up texture handles with non-null + // native_handle, and params_set bits we don't light — we don't). + m_state->environment = {}; + m_state->shadow_cascades = {}; + + void* handle = inputs.texture.texture.handle; + if(handle) + { + switch(inputs.target.value) + { + case SceneResourceTarget::Skybox: + m_state->environment.skybox_texture.native_handle = handle; + break; + case SceneResourceTarget::IrradianceMap: + m_state->environment.irradiance_map.native_handle = handle; + break; + case SceneResourceTarget::PrefilteredMap: + m_state->environment.prefiltered_map.native_handle = handle; + break; + case SceneResourceTarget::BRDFLut: + m_state->environment.brdf_lut.native_handle = handle; + break; + case SceneResourceTarget::ShadowMapArray: + m_state->shadow_cascades.shadow_map_array.native_handle = handle; + break; + } + } + + m_state->version = ++m_version; + m_state->dirty_index = m_version; + + m_cached_handle = handle; + m_cached_target = inputs.target.value; + m_pending_dirty = 0xFF; +} + +void SceneResourceRoute::operator()() +{ + // The halp GPU-texture input doesn't fire a port-update event on + // native-handle swap (only on port re-wiring), so we poll here and + // rebuild when either the handle or the target changed. Stable + // scene_state identity means the no-change case is a cheap + // shared_ptr forward without re-allocating. + void* handle = inputs.texture.texture.handle; + const bool changed = !m_state || handle != m_cached_handle + || inputs.target.value != m_cached_target; + if(changed) + rebuild(); + + outputs.scene_out.scene.state = m_state; + outputs.scene_out.dirty = m_pending_dirty; + m_pending_dirty = 0; +} + +} // namespace Threedim diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneResourceRoute.hpp b/src/plugins/score-plugin-threedim/Threedim/SceneResourceRoute.hpp new file mode 100644 index 0000000000..a247397199 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/SceneResourceRoute.hpp @@ -0,0 +1,93 @@ +#pragma once +#include +#include +#include + +#include + +#include +#include + +namespace Threedim +{ + +// Level 1 of the "resource → scene field" routing design. Takes a GPU +// texture handle from any upstream producer (CSF shader output, video, +// ISF post-pass, asset loader, …) and stamps it onto a named field of +// `scene_spec`. The emitted scene_spec is a partial contribution with +// only that one field populated — `merge_scenes` overlays it onto the +// rest of the scene_state the ScenePreprocessor receives from other +// producers, so this node composes freely with EnvironmentLoader / +// CubemapLoader / further SceneResourceRoute instances. +// +// Core use case is IBL wiring: an IrradianceConvolve / PrefilterGGX / +// BrdfLut shader's output plugs in here and lands on +// `scene_environment.{irradiance_map, prefiltered_map, brdf_lut}` with +// zero bespoke glue code per target. Shadow-map generation passes will +// target `scene_state.shadow_cascades.shadow_map_array` the same way. +// +// Pattern mirrors CubemapComposer / InjectTexture: CPU-side producer, +// port-driven rebuild + handle-change detection in operator()(). +enum class SceneResourceTarget : int +{ + Skybox, // scene_environment.skybox_texture + IrradianceMap, // scene_environment.irradiance_map + PrefilteredMap, // scene_environment.prefiltered_map + BRDFLut, // scene_environment.brdf_lut + ShadowMapArray, // scene_state.shadow_cascades.shadow_map_array +}; + +class SceneResourceRoute +{ +public: + halp_meta(name, "Scene Resource Route") + halp_meta(category, "Visuals/3D/Scene") + halp_meta(c_name, "scene_resource_route") + halp_meta(authors, "ossia team") + halp_meta( + manual_url, + "https://ossia.io/score-docs/processes/scene-resource-route.html") + halp_meta(uuid, "c2f7a341-8e69-4b0d-b3f8-2d7e4c5a9f1b") + + struct ins + { + // Accepts any GPU texture kind — 2D, cubemap, array. Downstream + // consumer shaders (classic_pbr_ibl, classic_pbr_shadowed) declare + // their own sampler shape (samplerCube / sampler2DArray / sampler2D) + // and it's the authoring's responsibility to match the two. + halp::gpu_texture_input<"Texture"> texture; + + // Port-driven rebuild: target changes fire rebuild(); upstream + // handle flips are caught by operator()() since the halp GPU-texture + // input doesn't emit a port-update event when only the native + // handle swaps. + struct : halp::enum_t + { + void update(SceneResourceRoute& n) { n.rebuild(); } + } target; + } inputs; + + struct outs + { + struct + { + halp_meta(name, "Scene"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_out; + } outputs; + + void rebuild(); + void operator()(); + + // Cached output scene_state — stable identity across frames (so + // downstream scene-identity caches stay hot) and mutated in place on + // target / handle changes. + std::shared_ptr m_state; + int64_t m_version{0}; + void* m_cached_handle{}; + SceneResourceTarget m_cached_target{SceneResourceTarget::Skybox}; + uint8_t m_pending_dirty{0xFF}; +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneSelector.cpp b/src/plugins/score-plugin-threedim/Threedim/SceneSelector.cpp new file mode 100644 index 0000000000..b300f71063 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/SceneSelector.cpp @@ -0,0 +1,206 @@ +#include "SceneSelector.hpp" + +#include + +namespace Threedim +{ + +namespace +{ + +// Duplicated glob matcher; tiny, cheaper than adding a shared header. +bool selector_glob_match(std::string_view pattern, std::string_view text) noexcept +{ + std::size_t pi = 0, ti = 0; + std::size_t star_pi = std::string_view::npos; + std::size_t star_ti = 0; + bool star_double = false; + while(ti < text.size()) + { + if(pi < pattern.size()) + { + char pc = pattern[pi]; + if(pc == '*') + { + star_double = (pi + 1 < pattern.size() && pattern[pi + 1] == '*'); + pi += star_double ? 2 : 1; + star_pi = pi; + star_ti = ti; + continue; + } + if(pc == '?' && text[ti] != '/') + { + ++pi; + ++ti; + continue; + } + if(pc == text[ti]) + { + ++pi; + ++ti; + continue; + } + } + if(star_pi != std::string_view::npos) + { + if(!star_double && text[star_ti] == '/') + return false; + pi = star_pi; + ++star_ti; + ti = star_ti; + continue; + } + return false; + } + while(pi < pattern.size() && pattern[pi] == '*') + ++pi; + return pi == pattern.size(); +} + +// DFS until the first match. Accumulates the found-node plus a hint +// whether the found node itself is the root of the subtree (so we +// know whether to apply the ZeroOut transform rebase). +ossia::scene_node_ptr selector_findByPath( + const ossia::scene_node_ptr& n, std::string_view pat, const std::string& path) +{ + if(!n) + return nullptr; + if(selector_glob_match(pat, path)) + return n; + if(!n->has_children()) + return nullptr; + for(const auto& p : *n->children) + { + if(auto* sub = ossia::get_if(&p)) + { + if(!*sub) + continue; + std::string childPath = path + '/' + (*sub)->name; + if(auto r = selector_findByPath(*sub, pat, childPath)) + return r; + } + } + return nullptr; +} + +ossia::scene_node_ptr +findByName(const ossia::scene_node_ptr& n, std::string_view wanted) +{ + if(!n) + return nullptr; + if(n->name == wanted) + return n; + if(!n->has_children()) + return nullptr; + for(const auto& p : *n->children) + { + if(auto* sub = ossia::get_if(&p)) + if(auto r = findByName(*sub, wanted)) + return r; + } + return nullptr; +} + +// Strip the first scene_transform payload from a scene_node's children +// list — used for the ZeroOut rebase mode. The node layout convention +// (GltfParser / FbxParser / ConfigurePrimitive / etc.) puts the TRS +// as the first child payload; dropping it leaves the subtree at the +// world origin. +ossia::scene_node_ptr stripLeadingTransform(const ossia::scene_node_ptr& n) +{ + if(!n || !n->has_children()) + return n; + if(n->children->empty()) + return n; + if(!ossia::get_if(&(*n->children)[0])) + return n; + + auto clone_children + = std::make_shared>( + n->children->begin() + 1, n->children->end()); + auto copy = std::make_shared(*n); + copy->children = std::move(clone_children); + copy->dirty_index = n->dirty_index + 1; + return copy; +} + +} // namespace + +void SceneSelector::rebuild() +{ + const auto& in = inputs.scene_in.scene; + if(!in.state) + { + m_cached_out.reset(); + m_pending_dirty = 0; + return; + } + + const auto* s = in.state.get(); + const int64_t v = in.state->version; + + ossia::scene_node_ptr found; + const auto mode = Mode(inputs.mode.value); + if(in.state->roots) + { + switch(mode) + { + case ByIndex: { + const auto idx = std::size_t(std::max(0, inputs.index.value)); + if(idx < in.state->roots->size()) + found = (*in.state->roots)[idx]; + break; + } + case ByName: { + for(const auto& r : *in.state->roots) + { + if((found = findByName(r, inputs.path.value))) + break; + } + break; + } + default: { + for(const auto& r : *in.state->roots) + { + const std::string base = r ? ("/" + r->name) : std::string{}; + if((found = selector_findByPath(r, inputs.path.value, base))) + break; + } + break; + } + } + } + + if(found && inputs.rebase.value == ZeroOut) + found = stripLeadingTransform(found); + + auto new_roots + = std::make_shared>(); + if(found) + new_roots->push_back(std::move(found)); + + auto new_state = std::make_shared(*in.state); + new_state->roots = std::move(new_roots); + new_state->version = ++m_version_counter; + new_state->dirty_index = in.state->dirty_index + 1; + + m_cached_out = std::move(new_state); + m_cached_in_state = s; + m_cached_in_version = v; + m_pending_dirty = 0xFF; +} + +void SceneSelector::operator()() +{ + const auto* in_state = inputs.scene_in.scene.state.get(); + const int64_t in_version = in_state ? in_state->version : -1; + const bool upstream_changed + = m_cached_in_state != in_state || m_cached_in_version != in_version; + if(upstream_changed || (!m_cached_out && in_state)) + rebuild(); + outputs.scene_out.scene.state = m_cached_out; + outputs.scene_out.dirty = m_pending_dirty; + m_pending_dirty = 0; +} + +} // namespace Threedim diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneSelector.hpp b/src/plugins/score-plugin-threedim/Threedim/SceneSelector.hpp new file mode 100644 index 0000000000..785ae9d3a6 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/SceneSelector.hpp @@ -0,0 +1,114 @@ +#pragma once +#include +#include + +#include + +#include +#include +#include + +namespace Threedim +{ + +// Extracts a subtree from an incoming scene and emits it as a fresh +// scene_spec. The Solaris "Extract" pattern: if SceneGraphFilter is +// Prune (keeps the tree shape, drops non-matches), SceneSelector is +// Extract (gathers the matches and forgets the ancestors). +// +// Use case: pull out the camera, a light rig, or a character subtree +// so it can be re-transformed / re-materialized and then merged back +// in via SceneGroup. +// +// Rebase modes: +// Preserve : emit the subtree root as-is, so its transform +// remains in its original parent frame (the +// ancestors are gone but the transform still +// matches where it was). +// ZeroOut : drop the subtree's own TRS so it renders at the +// world origin. Useful when you want to re-place +// the extracted subtree via an upstream +// Transform3D / SceneGroup. +class SceneSelector +{ +public: + halp_meta(name, "Scene Selector") + halp_meta(category, "Visuals/3D/Scene") + halp_meta(c_name, "scene_selector") + halp_meta(authors, "ossia team") + halp_meta( + manual_url, + "https://ossia.io/score-docs/processes/scene-selector.html") + halp_meta(uuid, "6c4d8b3f-5e2a-4d1f-9c7b-8a3e5f0d7b4c") + + enum Mode + { + ByPath, + ByName, + ByIndex // index into the root list (0 = first root) + }; + + enum Rebase + { + Preserve, + ZeroOut + }; + + struct ins + { + struct + { + halp_meta(name, "Scene In"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_in; + + // Port-driven rebuild: controls trigger rebuild(); upstream + // scene_in changes detected in operator()(). + struct : halp::combobox_t<"Mode", Mode> + { + struct range + { + std::string_view values[3]{"By path", "By name", "By index"}; + int init{0}; + }; + void update(SceneSelector& n) { n.rebuild(); } + } mode; + + struct : halp::lineedit<"Path / Name", ""> + { void update(SceneSelector& n) { n.rebuild(); } } path; + struct : halp::spinbox_i32<"Index", halp::irange{0, 1024, 0}> + { void update(SceneSelector& n) { n.rebuild(); } } index; + + struct : halp::combobox_t<"Rebase", Rebase> + { + struct range + { + std::string_view values[2]{"Preserve transform", "Zero out"}; + int init{0}; + }; + void update(SceneSelector& n) { n.rebuild(); } + } rebase; + } inputs; + + struct outs + { + struct + { + halp_meta(name, "Scene Out"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_out; + } outputs; + + void rebuild(); + void operator()(); + + std::shared_ptr m_cached_out; + uint8_t m_pending_dirty{0xFF}; + const ossia::scene_state* m_cached_in_state{}; + int64_t m_cached_in_version{-1}; + int64_t m_version_counter{0}; +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/SceneSwitch.hpp b/src/plugins/score-plugin-threedim/Threedim/SceneSwitch.hpp new file mode 100644 index 0000000000..c56c25710b --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/SceneSwitch.hpp @@ -0,0 +1,110 @@ +#pragma once +#include +#include + +#include + +#include + +namespace Threedim +{ + +// N-way scene_spec switch. Pick one of up to 4 scene inputs to pass +// through by index; live VJ-style A/B/C/D scene cutting. +// +// Unwired inputs are skipped — if `index` points at an empty slot, the +// node emits an empty scene, which downstream treats as "nothing to +// render" (no error). This makes it safe to leave slots open during +// authoring and fill them in incrementally. +// +// For blending between scenes: don't do it at the scene-graph level. +// Render each scene to its own texture (ScenePreprocessor → classic_pbr +// → BackgroundNode with a texture output) and ISF-crossfade the +// textures. Scene-level blending has no meaningful semantics for +// arbitrarily-different scene trees. +class SceneSwitch +{ +public: + halp_meta(name, "Scene Switch") + halp_meta(category, "Visuals/3D/Scene") + halp_meta(c_name, "scene_switch") + halp_meta(authors, "ossia team") + halp_meta( + manual_url, + "https://ossia.io/score-docs/processes/scene-switch.html") + halp_meta(uuid, "7d5c3f8a-2e9b-4a1c-8f6d-5b3e0d9a7c4f") + + struct ins + { + struct + { + halp_meta(name, "Scene 0"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene0; + struct + { + halp_meta(name, "Scene 1"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene1; + struct + { + halp_meta(name, "Scene 2"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene2; + struct + { + halp_meta(name, "Scene 3"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene3; + + halp::spinbox_i32<"Index", halp::irange{0, 3, 0}> index; + } inputs; + + // Cache for upstream-change detection (mirrors CameraSwitch.Select). + const ossia::scene_state* m_cached_state{}; + int64_t m_cached_version{-1}; + int m_cached_index{-1}; + + struct outs + { + struct + { + halp_meta(name, "Scene Out"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_out; + } outputs; + + void operator()() + { + const int idx = inputs.index.value; + const ossia::scene_spec* picked = nullptr; + switch(idx) + { + case 0: picked = &inputs.scene0.scene; break; + case 1: picked = &inputs.scene1.scene; break; + case 2: picked = &inputs.scene2.scene; break; + case 3: picked = &inputs.scene3.scene; break; + default: picked = &inputs.scene0.scene; break; + } + outputs.scene_out.scene = *picked; + + // Dirty flag drives downstream re-evaluation. Raise it only on + // real change: index switch, picked-input pointer change, or + // picked-input version bump. Empty slots stay quiet. + const auto* s = picked->state.get(); + const int64_t v = s ? s->version : -1; + const bool changed = (idx != m_cached_index) || (s != m_cached_state) + || (v != m_cached_version); + outputs.scene_out.dirty = (s && changed) ? 0xFF : 0; + m_cached_index = idx; + m_cached_state = s; + m_cached_version = v; + } +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/ShadowCascadeSetup.cpp b/src/plugins/score-plugin-threedim/Threedim/ShadowCascadeSetup.cpp new file mode 100644 index 0000000000..8dcf1403d9 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/ShadowCascadeSetup.cpp @@ -0,0 +1,337 @@ +#include "ShadowCascadeSetup.hpp" + +#include +#include +#include + +#include + +#include +#include + +namespace Threedim +{ + +namespace +{ + +// Compute one cascade's orthographic light view_projection matrix such +// that every corner of the camera-frustum slice between `near` and +// `far` maps inside the unit cube [-1, 1]³ after the light transform. +// +// Steps: +// 1. Build 8 frustum-slice world-space corners from the camera +// view_proj inverse + near/far clip-space Zs. +// 2. Transform them into light view-space (camera facing -Z along +// `lightDir`, up arbitrary-but-orthogonal). +// 3. Axis-aligned-bounding-box → light-space ortho projection. +QMatrix4x4 cascadeLightVP( + const QMatrix4x4& cameraVPInv, float nearZ, float farZ, + const QVector3D& lightDir) +{ + // Frustum corner coords in NDC. Using OpenGL-ish [-1, 1] — the host + // clip-space correction matrix handles the Vulkan flip downstream. + QVector3D corners[8] = { + QVector3D(-1.f, -1.f, nearZ), QVector3D( 1.f, -1.f, nearZ), + QVector3D(-1.f, 1.f, nearZ), QVector3D( 1.f, 1.f, nearZ), + QVector3D(-1.f, -1.f, farZ), QVector3D( 1.f, -1.f, farZ), + QVector3D(-1.f, 1.f, farZ), QVector3D( 1.f, 1.f, farZ)}; + + QVector3D world_corners[8]; + QVector3D centroid(0, 0, 0); + for(int i = 0; i < 8; ++i) + { + // Unproject to world. + QVector4D clip(corners[i], 1.f); + QVector4D w = cameraVPInv * clip; + world_corners[i] = w.toVector3D() / w.w(); + centroid += world_corners[i]; + } + centroid /= 8.f; + + // Light view: looking along lightDir, centered at the slice centroid. + QVector3D up(0, 1, 0); + if(std::abs(QVector3D::dotProduct(lightDir.normalized(), up)) > 0.95f) + up = QVector3D(1, 0, 0); + QMatrix4x4 lightView; + lightView.lookAt(centroid - lightDir.normalized() * 1.f, centroid, up); + + // Compute AABB of slice corners in light-view space. + QVector3D minLS(std::numeric_limits::max(), + std::numeric_limits::max(), + std::numeric_limits::max()); + QVector3D maxLS = -minLS; + for(int i = 0; i < 8; ++i) + { + QVector3D ls = lightView.map(world_corners[i]); + minLS.setX(std::min(minLS.x(), ls.x())); + minLS.setY(std::min(minLS.y(), ls.y())); + minLS.setZ(std::min(minLS.z(), ls.z())); + maxLS.setX(std::max(maxLS.x(), ls.x())); + maxLS.setY(std::max(maxLS.y(), ls.y())); + maxLS.setZ(std::max(maxLS.z(), ls.z())); + } + // Expand the depth range a bit so occluders just outside the camera + // frustum can still cast shadows into it. + const float zPad = (maxLS.z() - minLS.z()) * 0.25f + 1.f; + minLS.setZ(minLS.z() - zPad); + + QMatrix4x4 lightProj; + lightProj.ortho( + minLS.x(), maxLS.x(), minLS.y(), maxLS.y(), + -maxLS.z(), -minLS.z()); + + return lightProj * lightView; +} + +// Resolve the first directional light's world direction from the scene +// tree. Recurses through scene_nodes, accumulating parent TRS, and +// matches any light_component whose type == directional — regardless of +// which source node emitted it. Returns false when no directional light +// is found. +bool findDirectionalLight( + const ossia::scene_node& n, const QMatrix4x4& parentWorld, + QVector3D& outDir) noexcept +{ + QMatrix4x4 local; + if(n.children) + { + for(const auto& p : *n.children) + { + if(auto* xf = ossia::get_if(&p)) + { + local.translate(xf->translation[0], xf->translation[1], xf->translation[2]); + local.rotate(QQuaternion( + xf->rotation[3], xf->rotation[0], xf->rotation[1], xf->rotation[2])); + local.scale(xf->scale[0], xf->scale[1], xf->scale[2]); + break; + } + } + } + const QMatrix4x4 world = parentWorld * local; + if(n.children) + { + for(const auto& p : *n.children) + { + if(auto* lc = ossia::get_if(&p)) + { + if(*lc && (*lc)->type == ossia::light_type::directional) + { + // Directional light convention (the Light node encodes the + // user's direction as a rotation of canonical local -Z via + // QQuaternion::rotationTo, so local -Z points along the + // configured direction). World direction is therefore the + // -Z column of the world matrix. + QVector3D nZ = world.mapVector(QVector3D(0, 0, -1)); + if(nZ.lengthSquared() > 1e-5f) + { + outDir = nZ.normalized(); + return true; + } + } + } + if(auto* sub = ossia::get_if(&p)) + if(*sub && findDirectionalLight(**sub, world, outDir)) + return true; + } + } + return false; +} + +// Resolve the active camera's view + projection matrices from the scene +// tree. Walks the same way as findDirectionalLight: per-node TRS +// accumulation into a world matrix, then on hitting a camera_component +// we invert the world matrix to obtain the view. Matching policy: +// - if `state.active_camera_id` is non-zero, only the scene_node whose +// id equals it is accepted; +// - otherwise the first camera encountered wins (matches the "single +// Camera node is auto-picked" convention from Camera.hpp). +bool findActiveCamera( + const ossia::scene_node& n, const QMatrix4x4& parentWorld, + const ossia::scene_state& state, float aspect, + QMatrix4x4& outView, QMatrix4x4& outProj) noexcept +{ + QMatrix4x4 local; + if(n.children) + { + for(const auto& p : *n.children) + { + if(auto* xf = ossia::get_if(&p)) + { + local.translate(xf->translation[0], xf->translation[1], xf->translation[2]); + local.rotate(QQuaternion( + xf->rotation[3], xf->rotation[0], xf->rotation[1], xf->rotation[2])); + local.scale(xf->scale[0], xf->scale[1], xf->scale[2]); + break; + } + } + } + const QMatrix4x4 world = parentWorld * local; + const bool id_filter = state.active_camera_id.value != 0; + const bool id_matches = !id_filter || n.id == state.active_camera_id; + if(n.children) + { + for(const auto& p : *n.children) + { + if(id_matches) + { + if(auto* cc = ossia::get_if(&p)) + { + if(*cc) + { + const auto& cam = **cc; + outView = world.inverted(); + outProj = QMatrix4x4{}; + outProj.perspective( + cam.yfov * 180.f / float(M_PI), aspect, cam.znear, cam.zfar); + return true; + } + } + } + if(auto* sub = ossia::get_if(&p)) + if(*sub && findActiveCamera(**sub, world, state, aspect, outView, outProj)) + return true; + } + } + return false; +} + +} // namespace + +void ShadowCascadeSetup::rebuild() +{ + const auto& in = inputs.scene_in.scene; + const ossia::scene_state* in_state = in.state.get(); + const int64_t in_version = in_state ? in_state->version : -1; + + const int count = std::clamp(inputs.cascade_count.value, 1, 8); + const float cur_dir[3]{ + inputs.light_direction.value.x, inputs.light_direction.value.y, + inputs.light_direction.value.z}; + + m_cached_in_state = in_state; + m_cached_in_version = in_version; + m_cached_count = count; + m_cached_distance = inputs.shadow_distance.value; + m_cached_lambda = inputs.lambda.value; + m_cached_near = inputs.camera_near.value; + m_cached_far = inputs.camera_far.value; + std::copy(cur_dir, cur_dir + 3, m_cached_dir); + + if(!in_state) + { + m_cached_out = in.state; + m_pending_dirty = 0xFF; + return; + } + + // Gather inputs for cascade computation. + const float nearZ = inputs.camera_near.value; + const float farZ = std::min(inputs.camera_far.value, inputs.shadow_distance.value); + const float lambda = std::clamp(inputs.lambda.value, 0.f, 1.f); + + // Scene-derived light direction if the control is left at (0,0,0). + QVector3D lightDir(cur_dir[0], cur_dir[1], cur_dir[2]); + if(lightDir.lengthSquared() < 1e-6f) + { + lightDir = QVector3D(-0.4f, -0.8f, -0.6f); + if(in_state->roots) + { + for(const auto& r : *in_state->roots) + { + QVector3D found; + if(r && findDirectionalLight(*r, QMatrix4x4{}, found)) + { + lightDir = found; + break; + } + } + } + } + lightDir.normalize(); + + // Find the active camera's view_projection by walking the scene tree + // the same way findDirectionalLight does. The camera's placement lives + // on its owning scene_node's scene_transform, so view = inverse(world). + // Fall back to identity when the scene has no camera (the cascades + // will be approximate but the node stays safe to wire in early). + // + // Aspect is unknown at this stage (ScenePreprocessor is the canonical + // source of the render-target aspect); 16:9 is a reasonable default + // and the cascade fit is approximate anyway. + QMatrix4x4 cameraVP; + const float aspect = 16.f / 9.f; + if(in_state->roots) + { + QMatrix4x4 view, proj; + for(const auto& r : *in_state->roots) + { + if(r && findActiveCamera(*r, QMatrix4x4{}, *in_state, aspect, view, proj)) + { + cameraVP = proj * view; + break; + } + } + } + + const QMatrix4x4 cameraVPInv = cameraVP.inverted(); + + // Practical split scheme (Engel/Tabellion). + ossia::shadow_cascades_info info{}; + info.cascade_count = uint32_t(count); + info.shadow_distance = inputs.shadow_distance.value; + info.light_direction[0] = lightDir.x(); + info.light_direction[1] = lightDir.y(); + info.light_direction[2] = lightDir.z(); + + info.split_view_depths[0] = nearZ; + for(int i = 1; i < count; ++i) + { + const float p = float(i) / float(count); + const float logSplit = nearZ * std::pow(farZ / nearZ, p); + const float uniSplit = nearZ + (farZ - nearZ) * p; + info.split_view_depths[i] = lambda * logSplit + (1.f - lambda) * uniSplit; + } + info.split_view_depths[count] = farZ; + + // NDC-Z range for each cascade slice. glClipSpace uses [-1, 1]; Vulkan + // uses [0, 1] after clipSpaceCorr — here we work in camera clip-space + // pre-correction, so [-1, 1] is correct. + for(int i = 0; i < count; ++i) + { + // Convert view-space Z to NDC Z via the projection we computed above. + // Re-derive via the projection: ndcZ = (proj.z * view.z + proj.w.z) / + // (-view.z). Easier: just probe two world-space points at known view + // depths through cameraVP and read their .z. + QVector4D p0 = cameraVP * QVector4D(0, 0, -info.split_view_depths[i], 1); + QVector4D p1 = cameraVP * QVector4D(0, 0, -info.split_view_depths[i + 1], 1); + const float ndc0 = p0.w() != 0.f ? p0.z() / p0.w() : -1.f; + const float ndc1 = p1.w() != 0.f ? p1.z() / p1.w() : 1.f; + QMatrix4x4 m = cascadeLightVP(cameraVPInv, ndc0, ndc1, lightDir); + std::memcpy(info.light_view_proj[i], m.constData(), sizeof(float) * 16); + } + + // Clone scene_state with the new cascades info. + auto state = std::make_shared(*in_state); + state->shadow_cascades = info; + state->version = ++m_version_counter; + state->dirty_index = m_version_counter; + + m_cached_out = state; + m_pending_dirty = 0xFF; +} + +void ShadowCascadeSetup::operator()() +{ + const auto* in_state = inputs.scene_in.scene.state.get(); + const int64_t in_version = in_state ? in_state->version : -1; + const bool upstream_changed + = m_cached_in_state != in_state || m_cached_in_version != in_version; + if(!m_cached_out || upstream_changed) + rebuild(); + outputs.scene_out.scene.state = m_cached_out; + outputs.scene_out.dirty = m_pending_dirty; + m_pending_dirty = 0; +} + +} // namespace Threedim diff --git a/src/plugins/score-plugin-threedim/Threedim/ShadowCascadeSetup.hpp b/src/plugins/score-plugin-threedim/Threedim/ShadowCascadeSetup.hpp new file mode 100644 index 0000000000..398fb30d98 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/ShadowCascadeSetup.hpp @@ -0,0 +1,98 @@ +#pragma once +#include +#include + +#include + +#include +#include + +namespace Threedim +{ + +// Authors a `shadow_cascades_info` for the scene from the active camera +// frustum and a directional-light direction. Consumed by: +// - a depth-only shadow_cascades pass (one draw per cascade) +// - classic_pbr_full's PCF sampling at final shading +// +// Practical-split strategy: blend uniform and logarithmic splits with a +// λ parameter (Engel / Tabellion). λ=0 → pure uniform (equal depth +// intervals, wastes near-plane resolution), λ=1 → pure log (near-plane +// heavy, far cascades get almost no area). λ≈0.5 is a good default for +// interactive scenes. +// +// Each cascade's light view_projection fits the camera frustum slice to +// a square orthographic light-space box centered at the slice's world- +// space center, oriented along the light direction. +class ShadowCascadeSetup +{ +public: + halp_meta(name, "Shadow Cascade Setup") + halp_meta(category, "Visuals/3D/Scene") + halp_meta(c_name, "shadow_cascade_setup") + halp_meta(authors, "ossia team") + halp_meta( + manual_url, + "https://ossia.io/score-docs/processes/shadow-cascade-setup.html") + halp_meta(uuid, "7f4d8c2a-9e5b-4f6a-a3d2-1e8c6b9d7f4a") + + struct ins + { + struct + { + halp_meta(name, "Scene In"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_in; + + // Port-driven rebuild: controls trigger rebuild(); upstream + // scene_in changes detected in operator()(). + struct : halp::spinbox_i32<"Cascade count", halp::irange{1, 8, 4}> + { void update(ShadowCascadeSetup& n) { n.rebuild(); } } cascade_count; + struct : halp::hslider_f32<"Shadow distance", halp::range{1., 10000., 100.}> + { void update(ShadowCascadeSetup& n) { n.rebuild(); } } shadow_distance; + struct : halp::hslider_f32<"Split lambda", halp::range{0., 1., 0.5}> + { void update(ShadowCascadeSetup& n) { n.rebuild(); } } lambda; + // Manual near/far override for the camera (the scene_state doesn't + // currently expose the active camera's near/far on an accessible + // path — these let the user match them). Typical defaults work for + // the Camera node's default near=0.1 / far=1000. + struct : halp::hslider_f32<"Camera near", halp::range{0.001, 10., 0.1}> + { void update(ShadowCascadeSetup& n) { n.rebuild(); } } camera_near; + struct : halp::hslider_f32<"Camera far", halp::range{1., 100000., 1000.}> + { void update(ShadowCascadeSetup& n) { n.rebuild(); } } camera_far; + // Directional-light override. Normally inherited from the first + // directional light in the scene, but some pipelines (e.g. a single + // orbiting light without a Light node) benefit from setting this + // directly. + struct : halp::xyz_spinboxes_f32<"Light direction", halp::range{-1., 1., 0.}> + { void update(ShadowCascadeSetup& n) { n.rebuild(); } } light_direction; + } inputs; + + struct outs + { + struct + { + halp_meta(name, "Scene Out"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_out; + } outputs; + + void rebuild(); + void operator()(); + + std::shared_ptr m_cached_out; + uint8_t m_pending_dirty{0xFF}; + const ossia::scene_state* m_cached_in_state{}; + int64_t m_cached_in_version{-1}; + int m_cached_count{-1}; + float m_cached_distance{-1.f}; + float m_cached_lambda{-1.f}; + float m_cached_near{-1.f}; + float m_cached_far{-1.f}; + float m_cached_dir[3]{}; + int64_t m_version_counter{0}; +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/Splat/Executor.cpp b/src/plugins/score-plugin-threedim/Threedim/Splat/Executor.cpp deleted file mode 100644 index 6f8cee8eda..0000000000 --- a/src/plugins/score-plugin-threedim/Threedim/Splat/Executor.cpp +++ /dev/null @@ -1,87 +0,0 @@ -#include "Executor.hpp" - -#include -#include - -#include -#include -#include -#include - -#include - -#include - -#include -#include - -namespace Gfx::Splat -{ -class model_display_node final : public gfx_exec_node -{ -public: - model_display_node(GfxExecutionAction& ctx) - : gfx_exec_node{ctx} - { - } - - void init() - { - auto node = std::make_unique(); - id = exec_context->ui->register_node(std::move(node)); - } - - ~model_display_node() { exec_context->ui->unregister_node(id); } - - std::string label() const noexcept override { return "Gfx::Splat_node"; } -}; - -ProcessExecutorComponent::ProcessExecutorComponent( - Gfx::Splat::Model& element, const Execution::Context& ctx, QObject* parent) - : ProcessComponent_T{element, ctx, "modelComponent", parent} -{ - auto n = ossia::make_node( - *ctx.execState, ctx.doc.plugin().exec); - - for(auto* outlet : element.outlets()) - { - if(auto out = qobject_cast(outlet)) - { - out->nodeId = n->id; - } - } - // Buffer input (port 0) - element.inlets()[0]->setupExecution(*n->add_texture(), this); - - // Camera controls: Position(1), Center(2), FOV(3), Near(4), Far(5) - for(std::size_t i = 1; i <= 9; i++) - { - auto ctrl = qobject_cast(element.inlets()[i]); - auto& p = n->add_control(); - ctrl->setupExecution(*n->root_inputs().back(), this); - p->value = ctrl->value(); - - QObject::connect( - ctrl, &Process::ControlInlet::valueChanged, this, - con_unvalidated{ctx, i - 1, 0, n}); - } - - n->add_texture_out(); - - n->init(); - this->node = n; - m_ossia_process = std::make_shared(n); -} - -void ProcessExecutorComponent::cleanup() -{ - for(auto* outlet : this->process().outlets()) - { - if(auto out = qobject_cast(outlet)) - { - out->nodeId = score::gfx::invalid_node_index; - } - } - ProcessComponent_T::cleanup(); -} -} diff --git a/src/plugins/score-plugin-threedim/Threedim/Splat/GaussianSplatNode.cpp b/src/plugins/score-plugin-threedim/Threedim/Splat/GaussianSplatNode.cpp deleted file mode 100644 index c701c1037d..0000000000 --- a/src/plugins/score-plugin-threedim/Threedim/Splat/GaussianSplatNode.cpp +++ /dev/null @@ -1,1064 +0,0 @@ -#include "GaussianSplatNode.hpp" - -#include "Gfx/Graph/RhiComputeBarrier.hpp" - -#include - -#include - -#include - -#if defined(near) -#undef near -#undef far -#endif - -namespace score::gfx -{ - -GaussianSplatNode::GaussianSplatNode() -{ - qDebug() << "[GaussianSplat] Node created"; - - // Input port: Raw splat buffer (256 bytes per splat) - auto splatBuffer = new Port{this, {}, Types::Buffer, {}}; - - // Output port: Rendered image - auto out = new Port{this, {}, Types::Image, {}}; - - input.push_back(splatBuffer); - output.push_back(out); - - this->requiresDepth = false; -} - -GaussianSplatNode::~GaussianSplatNode() = default; - -void GaussianSplatNode::process(Message&& msg) -{ - ProcessNode::process(msg.token); - - int32_t p = 0; - for(const gfx_input& m : msg.input) - { - if(auto val = ossia::get_if(&m)) - { - switch(p) - { - case 1: - this->modelPosition = ossia::convert(*val); - break; - case 2: - this->modelRotation = ossia::convert(*val); - break; - case 3: - this->modelScale = ossia::convert(*val); - break; - case 4: - this->position = ossia::convert(*val); - break; - case 5: - this->center = ossia::convert(*val); - break; - case 6: - this->fov = ossia::convert(*val); - break; - case 7: - this->near = ossia::convert(*val); - break; - case 8: - this->far = ossia::convert(*val); - break; - } - } - p++; - } - this->materialChange(); -} - -score::gfx::NodeRenderer* -GaussianSplatNode::createRenderer(RenderList& r) const noexcept -{ - qDebug() << "[GaussianSplat] createRenderer called, splatCount=" << splatCount; - return new GaussianSplatRenderer{*this}; -} - -GaussianSplatRenderer::GaussianSplatRenderer(const GaussianSplatNode& node) - : GenericNodeRenderer{node} - , m_node{node} -{ - qDebug() << "[GaussianSplat] Renderer constructed"; -} - -GaussianSplatRenderer::~GaussianSplatRenderer() = default; - -// ───────────────────────────────────────────────────────────────────────────── -// Preprocess pipeline: raw 256B splats → compact 64B rendering splats -// ───────────────────────────────────────────────────────────────────────────── - -void GaussianSplatRenderer::createPreprocessPipeline(RenderList& renderer) -{ - qDebug() << "[GaussianSplat] createPreprocessPipeline: splatCount=" - << m_node.splatCount - << "rawBuf=" << (void*)m_rawSplatBuffer; - - if(!renderer.state.rhi->isFeatureSupported(QRhi::Compute)) - { - qWarning() << "[GaussianSplat] Compute shaders NOT supported!"; - return; - } - - auto& rhi = *renderer.state.rhi; - const int64_t splatCount = m_node.splatCount; - if(splatCount <= 0) - { - qWarning() << "[GaussianSplat] splatCount <= 0, skipping preprocess pipeline"; - return; - } - - // Create compact output buffer (64 bytes per splat) - const int64_t renderBufSize = splatCount * 64; - delete m_renderSplatBuffer; - m_renderSplatBuffer - = rhi.newBuffer(QRhiBuffer::Immutable, QRhiBuffer::StorageBuffer, renderBufSize); - if(!m_renderSplatBuffer->create()) - { - qWarning() << "[GaussianSplat] Failed to create renderSplatBuffer size=" << renderBufSize; - delete m_renderSplatBuffer; - m_renderSplatBuffer = nullptr; - return; - } - qDebug() << "[GaussianSplat] renderSplatBuffer created, size=" << renderBufSize; - - // Preprocess uniform buffer - if(!m_preprocessUniformBuffer) - { - m_preprocessUniformBuffer - = rhi.newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, 96); - if(!m_preprocessUniformBuffer->create()) - { - qWarning() << "[GaussianSplat] Failed to create preprocessUniformBuffer"; - delete m_preprocessUniformBuffer; - m_preprocessUniformBuffer = nullptr; - return; - } - } - - // Compile preprocess shader - QShader preprocessShader = score::gfx::makeCompute( - renderer.state, GaussianSplatShaders::preprocess_shader); - if(!preprocessShader.isValid()) - { - qWarning() << "[GaussianSplat] preprocess_shader compilation FAILED"; - return; - } - qDebug() << "[GaussianSplat] preprocess_shader compiled OK"; - - // Cleanup old pipeline - delete m_preprocessSrb; - delete m_preprocessPipeline; - - m_preprocessSrb = rhi.newShaderResourceBindings(); - m_preprocessSrb->setBindings({ - QRhiShaderResourceBinding::bufferLoad( - 0, QRhiShaderResourceBinding::ComputeStage, m_rawSplatBuffer), - QRhiShaderResourceBinding::bufferLoadStore( - 1, QRhiShaderResourceBinding::ComputeStage, m_renderSplatBuffer), - QRhiShaderResourceBinding::uniformBuffer( - 2, QRhiShaderResourceBinding::ComputeStage, m_preprocessUniformBuffer), - }); - if(!m_preprocessSrb->create()) - { - qWarning() << "[GaussianSplat] preprocess SRB creation FAILED"; - return; - } - - m_preprocessPipeline = rhi.newComputePipeline(); - m_preprocessPipeline->setShaderResourceBindings(m_preprocessSrb); - m_preprocessPipeline->setShaderStage( - {QRhiShaderStage::Compute, preprocessShader}); - if(!m_preprocessPipeline->create()) - { - qWarning() << "[GaussianSplat] preprocess pipeline creation FAILED"; - delete m_preprocessPipeline; - m_preprocessPipeline = nullptr; - return; - } - - qDebug() << "[GaussianSplat] preprocess pipeline created OK"; - m_preprocessResourcesCreated = true; -} - -// ───────────────────────────────────────────────────────────────────────────── -// Sort pipelines -// ───────────────────────────────────────────────────────────────────────────── - -void GaussianSplatRenderer::createSortPipelines(RenderList& renderer) -{ - qDebug() << "[GaussianSplat] createSortPipelines"; - - if(!renderer.state.rhi->isFeatureSupported(QRhi::Compute)) - { - qWarning() << "[GaussianSplat] Compute not supported, no sorting"; - return; - } - if(!m_renderSplatBuffer) - { - qWarning() << "[GaussianSplat] No renderSplatBuffer, cannot create sort pipelines"; - return; - } - - auto& rhi = *renderer.state.rhi; - const int64_t splatCount = m_node.splatCount; - if(splatCount <= 0) - return; - - const int64_t numWorkgroups - = (splatCount + SORT_WORKGROUP_SIZE - 1) / SORT_WORKGROUP_SIZE; - const int64_t keyBufferSize = splatCount * sizeof(uint32_t); - const int64_t indexBufferSize = splatCount * sizeof(uint32_t); - const int64_t histogramSize = numWorkgroups * NUM_BUCKETS * sizeof(uint32_t); - - auto createOrResizeBuffer - = [&](QRhiBuffer*& buf, int64_t size, QRhiBuffer::UsageFlags usage) { - if(buf && buf->size() >= size) - return; - delete buf; - buf = rhi.newBuffer(QRhiBuffer::Immutable, usage, size); - buf->create(); - }; - - createOrResizeBuffer( - m_sortKeysBuffer, keyBufferSize, QRhiBuffer::StorageBuffer); - createOrResizeBuffer( - m_sortKeysAltBuffer, keyBufferSize, QRhiBuffer::StorageBuffer); - createOrResizeBuffer( - m_sortIndicesBuffer, indexBufferSize, QRhiBuffer::StorageBuffer); - createOrResizeBuffer( - m_sortIndicesAltBuffer, indexBufferSize, QRhiBuffer::StorageBuffer); - createOrResizeBuffer( - m_histogramBuffer, histogramSize, QRhiBuffer::StorageBuffer); - - // Depth key pass uses its own uniform layout: {mat4 view, uint splatCount, float near, float far, uint pad} - if(!m_sortUniformBuffer) - { - m_sortUniformBuffer - = rhi.newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, 256); - m_sortUniformBuffer->create(); - } - - // Histogram/scatter passes use: {uint splatCount, uint bitOffset, uint numWorkgroups, uint pad} - if(!m_sortPassUniformBuffer) - { - m_sortPassUniformBuffer - = rhi.newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, 16); - m_sortPassUniformBuffer->create(); - } - - // Prefix sum pass uses: {uint numWorkgroups, uint pad0, uint pad1, uint pad2} - if(!m_prefixSumUniformBuffer) - { - m_prefixSumUniformBuffer - = rhi.newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, 16); - m_prefixSumUniformBuffer->create(); - } - - // Compile compute shaders - QShader depthKeyShader = score::gfx::makeCompute( - renderer.state, GaussianSplatShaders::depth_key_shader); - QShader histogramShader = score::gfx::makeCompute( - renderer.state, GaussianSplatShaders::histogram_shader); - QShader prefixSumShader = score::gfx::makeCompute( - renderer.state, GaussianSplatShaders::prefix_sum_shader); - QShader sortScatterShader = score::gfx::makeCompute( - renderer.state, GaussianSplatShaders::sort_scatter_shader); - - if(!depthKeyShader.isValid()) - qWarning() << "[GaussianSplat] depth_key_shader compilation FAILED"; - if(!histogramShader.isValid()) - qWarning() << "[GaussianSplat] histogram_shader compilation FAILED"; - if(!prefixSumShader.isValid()) - qWarning() << "[GaussianSplat] prefix_sum_shader compilation FAILED"; - if(!sortScatterShader.isValid()) - qWarning() << "[GaussianSplat] sort_scatter_shader compilation FAILED"; - - // Depth key pipeline — reads from compact m_renderSplatBuffer - delete m_depthKeySrb; - delete m_depthKeyPipeline; - - m_depthKeySrb = rhi.newShaderResourceBindings(); - m_depthKeySrb->setBindings({ - QRhiShaderResourceBinding::bufferLoad( - 0, QRhiShaderResourceBinding::ComputeStage, m_renderSplatBuffer), - QRhiShaderResourceBinding::bufferLoadStore( - 1, QRhiShaderResourceBinding::ComputeStage, m_sortKeysBuffer), - QRhiShaderResourceBinding::bufferLoadStore( - 2, QRhiShaderResourceBinding::ComputeStage, m_sortIndicesBuffer), - QRhiShaderResourceBinding::uniformBuffer( - 3, QRhiShaderResourceBinding::ComputeStage, m_sortUniformBuffer), - }); - m_depthKeySrb->create(); - - m_depthKeyPipeline = rhi.newComputePipeline(); - m_depthKeyPipeline->setShaderResourceBindings(m_depthKeySrb); - m_depthKeyPipeline->setShaderStage( - {QRhiShaderStage::Compute, depthKeyShader}); - if(!m_depthKeyPipeline->create()) - qWarning() << "[GaussianSplat] depthKey pipeline creation FAILED"; - - // Histogram pipeline (two SRBs for ping-pong: even reads keysBuffer, odd reads keysAltBuffer) - delete m_histogramSrb; - delete m_histogramSrbAlt; - delete m_histogramPipeline; - - m_histogramSrb = rhi.newShaderResourceBindings(); - m_histogramSrb->setBindings({ - QRhiShaderResourceBinding::bufferLoad( - 0, QRhiShaderResourceBinding::ComputeStage, m_sortKeysBuffer), - QRhiShaderResourceBinding::bufferLoadStore( - 1, QRhiShaderResourceBinding::ComputeStage, m_histogramBuffer), - QRhiShaderResourceBinding::uniformBuffer( - 2, QRhiShaderResourceBinding::ComputeStage, m_sortPassUniformBuffer), - }); - m_histogramSrb->create(); - - m_histogramSrbAlt = rhi.newShaderResourceBindings(); - m_histogramSrbAlt->setBindings({ - QRhiShaderResourceBinding::bufferLoad( - 0, QRhiShaderResourceBinding::ComputeStage, m_sortKeysAltBuffer), - QRhiShaderResourceBinding::bufferLoadStore( - 1, QRhiShaderResourceBinding::ComputeStage, m_histogramBuffer), - QRhiShaderResourceBinding::uniformBuffer( - 2, QRhiShaderResourceBinding::ComputeStage, m_sortPassUniformBuffer), - }); - m_histogramSrbAlt->create(); - - m_histogramPipeline = rhi.newComputePipeline(); - m_histogramPipeline->setShaderResourceBindings(m_histogramSrb); - m_histogramPipeline->setShaderStage( - {QRhiShaderStage::Compute, histogramShader}); - if(!m_histogramPipeline->create()) - qWarning() << "[GaussianSplat] histogram pipeline creation FAILED"; - - // Prefix sum pipeline - delete m_prefixSumSrb; - delete m_prefixSumPipeline; - - m_prefixSumSrb = rhi.newShaderResourceBindings(); - m_prefixSumSrb->setBindings({ - QRhiShaderResourceBinding::bufferLoadStore( - 0, QRhiShaderResourceBinding::ComputeStage, m_histogramBuffer), - QRhiShaderResourceBinding::uniformBuffer( - 1, QRhiShaderResourceBinding::ComputeStage, m_prefixSumUniformBuffer), - }); - m_prefixSumSrb->create(); - - m_prefixSumPipeline = rhi.newComputePipeline(); - m_prefixSumPipeline->setShaderResourceBindings(m_prefixSumSrb); - m_prefixSumPipeline->setShaderStage( - {QRhiShaderStage::Compute, prefixSumShader}); - if(!m_prefixSumPipeline->create()) - qWarning() << "[GaussianSplat] prefixSum pipeline creation FAILED"; - - // Sort scatter pipeline (ping-pong: separate read/write buffers) - delete m_sortSrb; - delete m_sortSrbAlt; - delete m_sortPipeline; - - // Even passes: read keys/indices → write keysAlt/indicesAlt - m_sortSrb = rhi.newShaderResourceBindings(); - m_sortSrb->setBindings({ - QRhiShaderResourceBinding::bufferLoad( - 0, QRhiShaderResourceBinding::ComputeStage, m_sortKeysBuffer), - QRhiShaderResourceBinding::bufferLoad( - 1, QRhiShaderResourceBinding::ComputeStage, m_sortIndicesBuffer), - QRhiShaderResourceBinding::bufferLoadStore( - 2, QRhiShaderResourceBinding::ComputeStage, m_sortKeysAltBuffer), - QRhiShaderResourceBinding::bufferLoadStore( - 3, QRhiShaderResourceBinding::ComputeStage, m_sortIndicesAltBuffer), - QRhiShaderResourceBinding::bufferLoadStore( - 4, QRhiShaderResourceBinding::ComputeStage, m_histogramBuffer), - QRhiShaderResourceBinding::uniformBuffer( - 5, QRhiShaderResourceBinding::ComputeStage, m_sortPassUniformBuffer), - }); - m_sortSrb->create(); - - // Odd passes: read keysAlt/indicesAlt → write keys/indices - m_sortSrbAlt = rhi.newShaderResourceBindings(); - m_sortSrbAlt->setBindings({ - QRhiShaderResourceBinding::bufferLoad( - 0, QRhiShaderResourceBinding::ComputeStage, m_sortKeysAltBuffer), - QRhiShaderResourceBinding::bufferLoad( - 1, QRhiShaderResourceBinding::ComputeStage, m_sortIndicesAltBuffer), - QRhiShaderResourceBinding::bufferLoadStore( - 2, QRhiShaderResourceBinding::ComputeStage, m_sortKeysBuffer), - QRhiShaderResourceBinding::bufferLoadStore( - 3, QRhiShaderResourceBinding::ComputeStage, m_sortIndicesBuffer), - QRhiShaderResourceBinding::bufferLoadStore( - 4, QRhiShaderResourceBinding::ComputeStage, m_histogramBuffer), - QRhiShaderResourceBinding::uniformBuffer( - 5, QRhiShaderResourceBinding::ComputeStage, m_sortPassUniformBuffer), - }); - m_sortSrbAlt->create(); - - m_sortPipeline = rhi.newComputePipeline(); - m_sortPipeline->setShaderResourceBindings(m_sortSrb); - m_sortPipeline->setShaderStage( - {QRhiShaderStage::Compute, sortScatterShader}); - if(!m_sortPipeline->create()) - qWarning() << "[GaussianSplat] sort pipeline creation FAILED"; - - m_sortResourcesCreated = true; - m_lastSplatCount = splatCount; - qDebug() << "[GaussianSplat] Sort pipelines created OK, workgroups=" << numWorkgroups; -} - -// ───────────────────────────────────────────────────────────────────────────── -// Render pipeline -// ───────────────────────────────────────────────────────────────────────────── - -void GaussianSplatRenderer::createRenderPipeline(RenderList& renderer) -{ - qDebug() << "[GaussianSplat] createRenderPipeline: renderSplatBuf=" - << (void*)m_renderSplatBuffer - << "sortIndicesBuf=" << (void*)m_sortIndicesBuffer - << "enableSorting=" << m_node.enableSorting; - - if(!m_renderSplatBuffer) - { - qWarning() << "[GaussianSplat] No renderSplatBuffer, cannot create render pipeline"; - return; - } - - delete m_bindings; - delete m_pipeline; - m_bindings = nullptr; - m_pipeline = nullptr; - - auto& rhi = *renderer.state.rhi; - - auto [vertex, fragment] = score::gfx::makeShaders( - renderer.state, GaussianSplatShaders::vertex_shader, - GaussianSplatShaders::fragment_shader); - - if(!vertex.isValid()) - qWarning() << "[GaussianSplat] vertex_shader compilation FAILED"; - if(!fragment.isValid()) - qWarning() << "[GaussianSplat] fragment_shader compilation FAILED"; - - // All 3 bindings must always be present (the shader declares them all). - QRhiBuffer* indicesBuf = (m_sortIndicesBuffer && m_node.enableSorting) - ? m_sortIndicesBuffer - : m_dummyStorageBuffer; - - qDebug() << "[GaussianSplat] Render bindings: b0=renderSplat(" - << m_renderSplatBuffer->size() << ") b1=indices(" - << indicesBuf->size() << ") b2=uniform(" - << m_uniformBuffer->size() << ")"; - - m_bindings = rhi.newShaderResourceBindings(); - m_bindings->setBindings({ - QRhiShaderResourceBinding::bufferLoad( - 0, QRhiShaderResourceBinding::VertexStage, m_renderSplatBuffer), - QRhiShaderResourceBinding::bufferLoad( - 1, QRhiShaderResourceBinding::VertexStage, indicesBuf), - QRhiShaderResourceBinding::uniformBuffer( - 2, QRhiShaderResourceBinding::VertexStage, m_uniformBuffer), - }); - if(!m_bindings->create()) - { - qWarning() << "[GaussianSplat] Render SRB creation FAILED"; - return; - } - - m_pipeline = rhi.newGraphicsPipeline(); - m_pipeline->setName("GaussianSplat::pipeline"); - - m_pipeline->setShaderStages( - {{QRhiShaderStage::Vertex, vertex}, - {QRhiShaderStage::Fragment, fragment}}); - - // No vertex input — quad vertices generated in shader - QRhiVertexInputLayout inputLayout; - m_pipeline->setVertexInputLayout(inputLayout); - - m_pipeline->setTopology(QRhiGraphicsPipeline::Triangles); - m_pipeline->setCullMode(QRhiGraphicsPipeline::None); - // Depth test + write: provides correct occlusion as a safety net. - // Framework clears depth to 1.0 (far), so all valid splats pass initially. - // With back-to-front sorting, depth test always passes (each splat is closer). - // Without sorting, depth write ensures near splats occlude far ones. - m_pipeline->setDepthTest(true); - m_pipeline->setDepthWrite(true); - - // Front-to-back "under" compositing (premultiplied alpha). - // Mathematically equivalent to back-to-front "over", but much more stable: - // sort-order errors among back splats are hidden by accumulated front alpha. - // Under: result = src * (1 - dst.alpha) + dst - QRhiGraphicsPipeline::TargetBlend blend; - blend.enable = true; - blend.srcColor = QRhiGraphicsPipeline::OneMinusDstAlpha; - blend.dstColor = QRhiGraphicsPipeline::One; - blend.srcAlpha = QRhiGraphicsPipeline::OneMinusDstAlpha; - blend.dstAlpha = QRhiGraphicsPipeline::One; - m_pipeline->setTargetBlends({blend}); - - m_pipeline->setShaderResourceBindings(m_bindings); - - // Find the destination render target so we can match its sample count - // (must agree exactly with renderTarget->sampleCount() — Vulkan rejects - // pipelines whose sampleCount differs from the render pass). - bool foundRenderPass = false; - int rtSamples = renderer.samples(); - for(auto* edge : node.output[0]->edges) - { - auto rt = renderer.renderTargetForOutput(*edge); - if(rt.renderTarget) - { - m_pipeline->setRenderPassDescriptor(rt.renderPass); - const int s = rt.sampleCount(); - if(s > 0) - rtSamples = s; - foundRenderPass = true; - break; - } - } - if(!foundRenderPass) - qWarning() << "[GaussianSplat] No render pass descriptor found from output edges!"; - - m_pipeline->setSampleCount(rtSamples); - - if(!m_pipeline->create()) - { - qWarning() << "[GaussianSplat] Render pipeline creation FAILED"; - delete m_pipeline; - m_pipeline = nullptr; - return; - } - - qDebug() << "[GaussianSplat] Render pipeline created OK"; -} - -// ───────────────────────────────────────────────────────────────────────────── -// Init / Update -// ───────────────────────────────────────────────────────────────────────────── - -void GaussianSplatRenderer::init(RenderList& renderer, QRhiResourceUpdateBatch& res) -{ - qDebug() << "[GaussianSplat] init: splatCount=" << m_node.splatCount - << "enableSorting=" << m_node.enableSorting - << "shDegree=" << m_node.shDegree; - - auto& rhi = *renderer.state.rhi; - - qDebug() << "[GaussianSplat] RHI backend:" - << rhi.backendName() - << "compute=" << rhi.isFeatureSupported(QRhi::Compute); - - // Look up the pre-created input render target from the RenderList - auto rt_spec = m_node.resolveRenderTargetSpecs(0, renderer); - auto sampler = rhi.newSampler( - rt_spec.min_filter, rt_spec.mag_filter, QRhiSampler::Linear, - rt_spec.address_u, rt_spec.address_v, rt_spec.address_w); - sampler->setName("GaussianSplat::sampler"); - sampler->create(); - - auto inputRT = renderer.renderTargetForInputPort(*m_node.input[0]); - auto* texture = inputRT.texture ? inputRT.texture : &renderer.emptyTexture(); - m_samplers.push_back({sampler, texture}); - - // Render uniform buffer - const int64_t uniformSize = 3 * 64 + 16; - m_uniformBuffer - = rhi.newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, uniformSize); - m_uniformBuffer->create(); - - // Dummy storage buffer - m_dummyStorageBuffer - = rhi.newBuffer(QRhiBuffer::Immutable, QRhiBuffer::StorageBuffer, 16); - m_dummyStorageBuffer->create(); - - // Default mesh (required by base class) - const auto& mesh = renderer.defaultQuad(); - defaultMeshInit(renderer, mesh, res); - - qDebug() << "[GaussianSplat] init complete"; -} - -void GaussianSplatRenderer::update( - RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) -{ - const int64_t splatCount = m_node.splatCount; - - // Check for raw splat buffer input - bool bufferChanged = false; - if(!m_node.input.empty() && m_node.input[0]) - { - auto* inputPort = m_node.input[0]; - if(!inputPort->edges.empty()) - { - auto* inputEdge = inputPort->edges[0]; - if(inputEdge && inputEdge->source) - { - QRhiBuffer* newBuffer{}; - score::gfx::NodeRenderer* src_renderer - = inputEdge->source->node->renderedNodes.at(&renderer); - if(src_renderer) - { - auto bv = src_renderer->bufferForOutput(*inputEdge->source); - newBuffer = bv.handle; - } - if(newBuffer != m_rawSplatBuffer) - { - qDebug() << "[GaussianSplat] update: raw buffer changed," - << "old=" << (void*)m_rawSplatBuffer - << "new=" << (void*)newBuffer - << "size=" << newBuffer->size(); - m_rawSplatBuffer = newBuffer; - ((GaussianSplatNode&)this->node).splatCount - = newBuffer ? newBuffer->size() / 256 : 0; - bufferChanged = true; - qDebug() << "[GaussianSplat] Loaded splats:" - << ((GaussianSplatNode&)this->node).splatCount; - } - } - else - { - // Log only once - static bool logged = false; - if(!logged) - { - qDebug() << "[GaussianSplat] update: input edge exists but no value." - << "source=" << (void*)(inputEdge ? inputEdge->source : nullptr); - logged = true; - } - } - } - else - { - static bool logged = false; - if(!logged) - { - qDebug() << "[GaussianSplat] update: input port has no edges"; - logged = true; - } - } - } - else - { - static bool logged = false; - if(!logged) - { - qDebug() << "[GaussianSplat] update: no input ports"; - logged = true; - } - } - - // Recreate compute/render pipelines when buffer or count changes - if(bufferChanged || splatCount != m_lastSplatCount) - { - qDebug() << "[GaussianSplat] update: rebuilding pipelines," - << "bufferChanged=" << bufferChanged - << "splatCount=" << splatCount - << "lastSplatCount=" << m_lastSplatCount - << "rawBuf=" << (void*)m_rawSplatBuffer; - - if(m_rawSplatBuffer && splatCount > 0) - { - createPreprocessPipeline(renderer); - if(m_node.enableSorting) - createSortPipelines(renderer); - createRenderPipeline(renderer); - } - else - { - qDebug() << "[GaussianSplat] update: cannot build pipelines (no buffer or count=0)"; - } - m_lastSplatCount = splatCount; - } - - // Compute view and projection matrices from camera parameters - auto& state = renderer.state; - - // Build model matrix from position/rotation/scale - QMatrix4x4 model; - model.translate( - m_node.modelPosition[0], m_node.modelPosition[1], m_node.modelPosition[2]); - model.rotate(m_node.modelRotation[0], 1, 0, 0); // pitch - model.rotate(m_node.modelRotation[1], 0, 1, 0); // yaw - model.rotate(m_node.modelRotation[2], 0, 0, 1); // roll - model.scale(m_node.modelScale[0], m_node.modelScale[1], m_node.modelScale[2]); - - QMatrix4x4 view; - view.lookAt( - QVector3D{m_node.position[0], m_node.position[1], m_node.position[2]}, - QVector3D{m_node.center[0], m_node.center[1], m_node.center[2]}, - QVector3D{0, 1, 0}); - - // modelView bakes the model transform so shaders don't need a separate model matrix - QMatrix4x4 modelView = view * model; - - QMatrix4x4 proj; - const float aspect - = float(state.renderSize.width()) / float(state.renderSize.height()); - proj.perspective(m_node.fov, aspect, m_node.near, m_node.far); - - QMatrix4x4 clip = renderer.state.rhi->clipSpaceCorrMatrix(); - - struct - { - float viewport[2]; - float _pad0; - uint32_t useSorting; - } tail; - - tail.viewport[0] = float(state.renderSize.width()); - tail.viewport[1] = float(state.renderSize.height()); - tail._pad0 = 0.f; - tail.useSorting = m_node.enableSorting && m_sortResourcesCreated ? 1u : 0u; - - char buf[3 * 64 + 16]; - memcpy(buf, modelView.constData(), 64); - memcpy(buf + 64, proj.constData(), 64); - memcpy(buf + 128, clip.constData(), 64); - memcpy(buf + 192, &tail, 16); - - res.updateDynamicBuffer(m_uniformBuffer, 0, sizeof(buf), buf); - - // Update preprocess uniforms - if(m_preprocessUniformBuffer && m_rawSplatBuffer) - { - struct - { - float viewMatrix[16]; - float camPos[3]; - uint32_t splatCount; - uint32_t shDegree; - float scaleMod; - uint32_t _pad0; - uint32_t _pad1; - } ppUniforms; - - memcpy(ppUniforms.viewMatrix, modelView.constData(), 64); - - // Camera position in model space for SH evaluation - QVector3D worldCamPos{m_node.position[0], m_node.position[1], m_node.position[2]}; - QVector3D modelCamPos = model.inverted().map(worldCamPos); - ppUniforms.camPos[0] = modelCamPos.x(); - ppUniforms.camPos[1] = modelCamPos.y(); - ppUniforms.camPos[2] = modelCamPos.z(); - ppUniforms.splatCount = splatCount; - ppUniforms.shDegree = m_node.shDegree; - ppUniforms.scaleMod = m_node.scaleFactor; - - res.updateDynamicBuffer( - m_preprocessUniformBuffer, 0, sizeof(ppUniforms), &ppUniforms); - } - - // Update sort uniforms - if(m_sortUniformBuffer && m_node.enableSorting) - { - struct - { - float viewMatrix[16]; - uint32_t splatCount; - float nearPlane; - float farPlane; - uint32_t _pad; - } sortUniforms; - - memcpy(sortUniforms.viewMatrix, modelView.constData(), 64); - sortUniforms.splatCount = splatCount; - sortUniforms.nearPlane = m_node.near; - sortUniforms.farPlane = m_node.far; - - res.updateDynamicBuffer( - m_sortUniformBuffer, 0, sizeof(sortUniforms), &sortUniforms); - } -} - -// ───────────────────────────────────────────────────────────────────────────── -// Compute passes: preprocess → sort -// ───────────────────────────────────────────────────────────────────────────── - -void GaussianSplatRenderer::runInitialPasses( - RenderList& renderer, QRhiCommandBuffer& cb, - QRhiResourceUpdateBatch*& res, Edge& edge) -{ - const int64_t splatCount = m_node.splatCount; - if(splatCount <= 0 || !m_rawSplatBuffer) - { - static bool logged = false; - if(!logged) - { - qDebug() << "[GaussianSplat] runInitialPasses: SKIPPED (splatCount=" - << splatCount << "rawBuf=" << (void*)m_rawSplatBuffer << ")"; - logged = true; - } - return; - } - - const int64_t numWorkgroups - = (splatCount + SORT_WORKGROUP_SIZE - 1) / SORT_WORKGROUP_SIZE; - - // ── Pass 1: SH preprocess (raw → compact) ──────────────────────────── - if(m_preprocessResourcesCreated && m_preprocessPipeline) - { - cb.beginComputePass(res, QRhiCommandBuffer::BeginPassFlag::ExternalContent); - - cb.setComputePipeline(m_preprocessPipeline); - cb.setShaderResources(m_preprocessSrb); - cb.dispatch(numWorkgroups, 1, 1); - - cb.beginExternal(); - insertComputeBarrier(*renderer.state.rhi, cb); - cb.endExternal(); - cb.endComputePass(); - } - else - { - static bool logged = false; - if(!logged) - { - qDebug() << "[GaussianSplat] runInitialPasses: preprocess SKIPPED" - << "(created=" << m_preprocessResourcesCreated - << "pipeline=" << (void*)m_preprocessPipeline << ")"; - logged = true; - } - } - - // ── Pass 2..N: Depth sort ───────────────────────────────────────────── - if(!m_node.enableSorting || !m_sortResourcesCreated || !m_depthKeyPipeline - || !m_prefixSumPipeline) - { - static bool loggedSkip = false; - if(!loggedSkip) - { - qDebug() << "[GaussianSplat] SORT SKIPPED:" - << "enableSorting=" << m_node.enableSorting - << "sortResourcesCreated=" << m_sortResourcesCreated - << "depthKeyPipeline=" << (void*)m_depthKeyPipeline - << "prefixSumPipeline=" << (void*)m_prefixSumPipeline; - loggedSkip = true; - } - return; - } - - auto& rhi = *renderer.state.rhi; - - // Generate depth keys from compact buffer - cb.beginComputePass(res, QRhiCommandBuffer::BeginPassFlag::ExternalContent); - - cb.setComputePipeline(m_depthKeyPipeline); - cb.setShaderResources(m_depthKeySrb); - cb.dispatch(numWorkgroups, 1, 1); - - cb.beginExternal(); - insertComputeBarrier(*renderer.state.rhi, cb); - cb.endExternal(); - cb.endComputePass(); - - // Upload prefix sum uniforms (constant across all passes) - { - res = rhi.nextResourceUpdateBatch(); - struct - { - uint32_t numWorkgroups; - uint32_t _pad0; - uint32_t _pad1; - uint32_t _pad2; - } prefixUniforms; - prefixUniforms.numWorkgroups = numWorkgroups; - prefixUniforms._pad0 = 0; - prefixUniforms._pad1 = 0; - prefixUniforms._pad2 = 0; - res->updateDynamicBuffer( - m_prefixSumUniformBuffer, 0, sizeof(prefixUniforms), &prefixUniforms); - // Will be consumed by the first histogram pass below - } - - // Radix sort: 2 passes over the top 16 bits (depth key). - // Bottom 16 bits (splat index) are already in order from the depth key shader, - // and the radix sort is stable, so equal-depth splats keep their index order. - for(int pass = 0; pass < 2; ++pass) - { - const uint32_t bitOffset = 16 + pass * RADIX_BITS; // bits 16-23, then 24-31 - - // Upload per-pass uniforms for histogram + scatter - { - struct - { - uint32_t splatCount; - uint32_t bitOffset; - uint32_t numWorkgroups; - uint32_t _pad; - } sortPassUniforms; - sortPassUniforms.splatCount = splatCount; - sortPassUniforms.bitOffset = bitOffset; - sortPassUniforms.numWorkgroups = numWorkgroups; - sortPassUniforms._pad = 0; - - if(!res) - res = rhi.nextResourceUpdateBatch(); - res->updateDynamicBuffer( - m_sortPassUniformBuffer, 0, sizeof(sortPassUniforms), - &sortPassUniforms); - } - - // Histogram: count digits per workgroup - // Even passes read from keysBuffer, odd from keysAltBuffer - cb.beginComputePass(res, QRhiCommandBuffer::BeginPassFlag::ExternalContent); - res = nullptr; - cb.setComputePipeline(m_histogramPipeline); - cb.setShaderResources(pass % 2 == 0 ? m_histogramSrb : m_histogramSrbAlt); - cb.dispatch(numWorkgroups, 1, 1); - cb.beginExternal(); - insertComputeBarrier(*renderer.state.rhi, cb); - cb.endExternal(); - cb.endComputePass(); - - // Prefix sum: convert per-workgroup histograms to global prefix sums - // Single workgroup of 256 threads (one per digit) - cb.beginComputePass(res, QRhiCommandBuffer::BeginPassFlag::ExternalContent); - res = nullptr; - cb.setComputePipeline(m_prefixSumPipeline); - cb.setShaderResources(m_prefixSumSrb); - cb.dispatch(1, 1, 1); - cb.beginExternal(); - insertComputeBarrier(*renderer.state.rhi, cb); - cb.endExternal(); - cb.endComputePass(); - - // Scatter: reorder keys+indices using prefix sums (ping-pong) - cb.beginComputePass(res, QRhiCommandBuffer::BeginPassFlag::ExternalContent); - res = nullptr; - cb.setComputePipeline(m_sortPipeline); - cb.setShaderResources(pass % 2 == 0 ? m_sortSrb : m_sortSrbAlt); - cb.dispatch(numWorkgroups, 1, 1); - cb.endComputePass(); - } -} - -// ───────────────────────────────────────────────────────────────────────────── -// Render pass -// ───────────────────────────────────────────────────────────────────────────── - -void GaussianSplatRenderer::runRenderPass( - RenderList& renderer, QRhiCommandBuffer& cb, Edge& edge) -{ - if(!m_pipeline || !m_renderSplatBuffer) - { - static bool logged = false; - if(!logged) - { - qDebug() << "[GaussianSplat] runRenderPass: SKIPPED (pipeline=" - << (void*)m_pipeline - << "renderBuf=" << (void*)m_renderSplatBuffer << ")"; - logged = true; - } - return; - } - - const int64_t splatCount = m_node.splatCount; - if(splatCount <= 0) - return; - - static int frameCount = 0; - if(frameCount++ % 300 == 0) - { - bool sortActive = m_node.enableSorting && m_sortResourcesCreated; - qDebug() << "[GaussianSplat] runRenderPass: drawing" - << splatCount << "splats (frame" << frameCount << ")" - << "sorting=" << sortActive - << "preprocessOK=" << m_preprocessResourcesCreated - << "sortOK=" << m_sortResourcesCreated - << "viewport=" << renderer.state.renderSize; - } - - cb.setGraphicsPipeline(m_pipeline); - cb.setShaderResources(m_bindings); - cb.setViewport( - QRhiViewport{ - 0, 0, (float)renderer.state.renderSize.width(), - (float)renderer.state.renderSize.height()}); - - // 6 vertices (2 triangles) per splat, instanced - cb.draw(6, splatCount, 0, 0); -} - -// ───────────────────────────────────────────────────────────────────────────── -// Cleanup -// ───────────────────────────────────────────────────────────────────────────── - -void GaussianSplatRenderer::release(RenderList& r) -{ - qDebug() << "[GaussianSplat] release"; - - for(auto& sampler : m_samplers) - delete sampler.sampler; - m_samplers.clear(); - - // Render - delete m_uniformBuffer; - delete m_dummyStorageBuffer; - delete m_pipeline; - delete m_bindings; - m_uniformBuffer = nullptr; - m_dummyStorageBuffer = nullptr; - m_pipeline = nullptr; - m_bindings = nullptr; - - // Preprocess - delete m_renderSplatBuffer; - delete m_preprocessUniformBuffer; - delete m_preprocessPipeline; - delete m_preprocessSrb; - m_renderSplatBuffer = nullptr; - m_preprocessUniformBuffer = nullptr; - m_preprocessPipeline = nullptr; - m_preprocessSrb = nullptr; - m_preprocessResourcesCreated = false; - - // Sort - delete m_sortKeysBuffer; - delete m_sortKeysAltBuffer; - delete m_sortIndicesBuffer; - delete m_sortIndicesAltBuffer; - delete m_histogramBuffer; - delete m_sortUniformBuffer; - delete m_sortPassUniformBuffer; - delete m_prefixSumUniformBuffer; - delete m_depthKeyPipeline; - delete m_histogramPipeline; - delete m_prefixSumPipeline; - delete m_sortPipeline; - delete m_depthKeySrb; - delete m_histogramSrb; - delete m_histogramSrbAlt; - delete m_prefixSumSrb; - delete m_sortSrb; - delete m_sortSrbAlt; - m_sortKeysBuffer = nullptr; - m_sortKeysAltBuffer = nullptr; - m_sortIndicesBuffer = nullptr; - m_sortIndicesAltBuffer = nullptr; - m_histogramBuffer = nullptr; - m_sortUniformBuffer = nullptr; - m_sortPassUniformBuffer = nullptr; - m_prefixSumUniformBuffer = nullptr; - m_depthKeyPipeline = nullptr; - m_histogramPipeline = nullptr; - m_prefixSumPipeline = nullptr; - m_sortPipeline = nullptr; - m_depthKeySrb = nullptr; - m_histogramSrb = nullptr; - m_histogramSrbAlt = nullptr; - m_prefixSumSrb = nullptr; - m_sortSrb = nullptr; - m_sortSrbAlt = nullptr; - m_sortResourcesCreated = false; - - m_rawSplatBuffer = nullptr; -} - -} // namespace score::gfx diff --git a/src/plugins/score-plugin-threedim/Threedim/Splat/GaussianSplatNode.hpp b/src/plugins/score-plugin-threedim/Threedim/Splat/GaussianSplatNode.hpp deleted file mode 100644 index 1770d2d3b4..0000000000 --- a/src/plugins/score-plugin-threedim/Threedim/Splat/GaussianSplatNode.hpp +++ /dev/null @@ -1,830 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include - -// clang-format off -#if defined(near) -#undef near -#undef far -#endif -// clang-format on - -namespace score::gfx -{ - -/** - * @brief Gaussian Splat rendering node - * - * A full rendering node for 3D Gaussian Splatting. - * Uses instanced quad rendering with EWA (Elliptical Weighted Average) projection. - * - * Pipeline (per frame): - * 1. SH preprocess (compute): raw 256-byte splats → compact 64-byte splats - * Evaluates spherical harmonics, applies exp(scale), sigmoid(opacity) - * 2. Depth key generation (compute): writes sortable uint keys - * 3. Radix sort (compute): sorts indices back-to-front - * 4. Render pass: instanced alpha-blended quads using sorted indices - * - * Input ports: - * - Raw Splat Buffer: GPU storage buffer, 256 bytes per splat - * (layout matches GaussianSplatData from Ply.hpp) - * - * Output ports: - * - Rendered image - */ -struct GaussianSplatNode : public NodeModel -{ -public: - GaussianSplatNode(); - virtual ~GaussianSplatNode(); - - score::gfx::NodeRenderer* createRenderer(RenderList&) const noexcept override; - void process(Message&& msg) override; - - int splatCount{}; - float scaleFactor{1.0f}; - bool enableSorting{true}; - uint32_t shDegree{3}; // 0, 1, 2, or 3 - - // Model transform - ossia::vec3f modelPosition{0.f, 0.f, 0.f}; - ossia::vec3f modelRotation{0.f, 0.f, 0.f}; // Euler angles in degrees (pitch, yaw, roll) - ossia::vec3f modelScale{1.f, 1.f, 1.f}; - - // Camera parameters - ossia::vec3f position{-1.f, -1.f, -1.f}; - ossia::vec3f center{0.f, 0.f, 0.f}; - float fov{90.f}; - float near{0.001f}; - float far{10000.f}; -}; - -/** - * @brief Renderer for GaussianSplatNode - * - * Rendering pipeline: - * 1. runInitialPasses: Compute depth keys and perform GPU radix sort - * 2. runRenderPass: Draw sorted splats with alpha blending - */ -class GaussianSplatRenderer final : public score::gfx::GenericNodeRenderer -{ -public: - explicit GaussianSplatRenderer(const GaussianSplatNode& node); - ~GaussianSplatRenderer(); - - void init(RenderList& renderer, QRhiResourceUpdateBatch& res) override; - void update(RenderList& renderer, QRhiResourceUpdateBatch& res, Edge* edge) override; - void runInitialPasses( - RenderList&, QRhiCommandBuffer& commands, QRhiResourceUpdateBatch*& res, - Edge& edge) override; - void runRenderPass(RenderList&, QRhiCommandBuffer& cb, Edge& edge) override; - void release(RenderList&) override; - -private: - void createPreprocessPipeline(RenderList& renderer); - void createRenderPipeline(RenderList& renderer); - void createSortPipelines(RenderList& renderer); - - const GaussianSplatNode& m_node; - - // Render pipeline resources - QRhiBuffer* m_uniformBuffer{}; - QRhiBuffer* m_dummyStorageBuffer{}; // Small buffer for unused bindings - QRhiGraphicsPipeline* m_pipeline{}; - QRhiShaderResourceBindings* m_bindings{}; - - // SH preprocessing compute resources - // Converts raw 256-byte splats → compact 64-byte rendering splats - QRhiBuffer* m_rawSplatBuffer{}; // Input: raw PLY data (256 bytes/splat) - QRhiBuffer* m_renderSplatBuffer{}; // Output: compact (64 bytes/splat) - QRhiBuffer* m_preprocessUniformBuffer{}; - QRhiComputePipeline* m_preprocessPipeline{}; - QRhiShaderResourceBindings* m_preprocessSrb{}; - - // Sorting compute resources - QRhiBuffer* m_sortKeysBuffer{}; // Depth keys (float -> uint for sorting) - QRhiBuffer* m_sortKeysAltBuffer{}; // Double buffer for key ping-pong - QRhiBuffer* m_sortIndicesBuffer{}; // Sorted indices - QRhiBuffer* m_sortIndicesAltBuffer{}; // Double buffer for index ping-pong - QRhiBuffer* m_histogramBuffer{}; // Histogram for radix sort - QRhiBuffer* m_sortUniformBuffer{}; // Depth key pass uniforms - QRhiBuffer* m_sortPassUniformBuffer{}; // Histogram/scatter/prefix uniforms - QRhiBuffer* m_prefixSumUniformBuffer{}; // Prefix sum uniforms - - QRhiComputePipeline* m_depthKeyPipeline{}; - QRhiComputePipeline* m_histogramPipeline{}; - QRhiComputePipeline* m_prefixSumPipeline{}; - QRhiComputePipeline* m_sortPipeline{}; - - QRhiShaderResourceBindings* m_depthKeySrb{}; - QRhiShaderResourceBindings* m_histogramSrb{}; - QRhiShaderResourceBindings* m_histogramSrbAlt{}; // For odd passes - QRhiShaderResourceBindings* m_prefixSumSrb{}; - QRhiShaderResourceBindings* m_sortSrb{}; - QRhiShaderResourceBindings* m_sortSrbAlt{}; // For ping-pong - - ossia::small_vector m_samplers; - - int64_t m_lastSplatCount{0}; - bool m_preprocessResourcesCreated{false}; - bool m_sortResourcesCreated{false}; - - static constexpr int64_t MAX_SPLATS = 50000000; - static constexpr int SORT_WORKGROUP_SIZE = 256; - static constexpr int RADIX_BITS = 8; - static constexpr int NUM_BUCKETS = 256; // 2^RADIX_BITS -}; - -// Shader sources -namespace GaussianSplatShaders -{ - -//============================================================================= -// COMPUTE SHADER: SH PREPROCESSING (raw 256B → compact 64B per splat) -//============================================================================= - -/** - * Compute shader: Preprocess raw Gaussian Splat data - * - * Reads raw 256-byte PLY splats and writes compact 64-byte rendering splats: - * - Evaluates spherical harmonics for view-dependent color - * - Applies exp() to log-space scale - * - Applies sigmoid() to raw opacity - * - Normalizes quaternion - * - Reorders rotation from (w,x,y,z) to (x,y,z,w) for the vertex shader - */ -static constexpr auto preprocess_shader = R"_(#version 450 -layout(local_size_x = 256) in; - -// Raw splat: 64 floats = 256 bytes (matches PLY loader output) -// [0..2] position (x,y,z) -// [3..5] normal (nx,ny,nz) — unused -// [6..8] SH DC (f_dc_0, f_dc_1, f_dc_2) -// [9..53] SH rest (f_rest_0 .. f_rest_44) -// [54] opacity (pre-sigmoid) -// [55..57] scale (log-space) -// [58..61] rotation (w,x,y,z) -// [62..63] padding - -layout(std430, binding = 0) readonly buffer RawSplatBuffer { - float rawData[]; // 64 floats per splat -}; - -// Compact rendering splat: 16 floats = 64 bytes -// vec4 position (xyz, 0) -// vec4 scale (xyz, 0) — already exp'd -// vec4 rotation (x,y,z,w) — normalized -// vec4 color (r,g,b,a) — SH evaluated, alpha = sigmoid(opacity) - -struct RenderSplat { - vec4 position; - vec4 scale; - vec4 rotation; - vec4 color; -}; - -layout(std430, binding = 1) writeonly buffer RenderSplatBuffer { - RenderSplat renderSplats[]; -}; - -layout(std140, binding = 2) uniform Params { - mat4 view; - vec3 camPos; // Camera position in world space - uint splatCount; - uint shDegree; // 0, 1, 2, or 3 - float scaleMod; - uint _pad0; - uint _pad1; -}; - -// Spherical harmonics constants -const float SH_C0 = 0.28209479177387814; - -const float SH_C1 = 0.4886025119029199; - -const float SH_C2[5] = float[5]( - 1.0925484305920792, - -1.0925484305920792, - 0.31539156525252005, - -1.0925484305920792, - 0.5462742152960396 -); - -const float SH_C3[7] = float[7]( - -0.5900435899266435, - 2.890611442640554, - -0.4570457994644658, - 0.3731763325901154, - -0.4570457994644658, - 1.445305721320277, - -0.5900435899266435 -); - -vec3 evaluateSH(uint base, vec3 dir) { - // Degree 0 - vec3 result = SH_C0 * vec3( - rawData[base + 6], - rawData[base + 7], - rawData[base + 8] - ); - - if (shDegree < 1) { - return result + 0.5; - } - - // Degree 1 - float x = dir.x, y = dir.y, z = dir.z; - - // f_rest layout: [0..14] = R channel rest, [15..29] = G, [30..44] = B - // But the INRIA convention interleaves: [0..2] = degree1 for R,G,B etc. - // Actually the standard layout is: - // f_rest[0..14]: coeffs 1..15 for channel 0 (R) - // f_rest[15..29]: coeffs 1..15 for channel 1 (G) - // f_rest[30..44]: coeffs 1..15 for channel 2 (B) - - uint r = base + 9; // f_rest_0 start - // Degree 1: 3 coefficients per channel, interleaved as RGB triplets - // Coeff indices in f_rest: R=[0,1,2], G=[15,16,17], B=[30,31,32] - result += SH_C1 * ( - - y * vec3(rawData[r+0], rawData[r+15], rawData[r+30]) - + z * vec3(rawData[r+1], rawData[r+16], rawData[r+31]) - - x * vec3(rawData[r+2], rawData[r+17], rawData[r+32]) - ); - - if (shDegree < 2) { - return result + 0.5; - } - - // Degree 2: 5 coefficients per channel - // R=[3..7], G=[18..22], B=[33..37] - float xx = x*x, yy = y*y, zz = z*z, xy = x*y, yz = y*z, xz = x*z; - - result += SH_C2[0] * xy * vec3(rawData[r+3], rawData[r+18], rawData[r+33]); - result += SH_C2[1] * yz * vec3(rawData[r+4], rawData[r+19], rawData[r+34]); - result += SH_C2[2] * (2.*zz - xx - yy) - * vec3(rawData[r+5], rawData[r+20], rawData[r+35]); - result += SH_C2[3] * xz * vec3(rawData[r+6], rawData[r+21], rawData[r+36]); - result += SH_C2[4] * (xx - yy)* vec3(rawData[r+7], rawData[r+22], rawData[r+37]); - - if (shDegree < 3) { - return result + 0.5; - } - - // Degree 3: 7 coefficients per channel - // R=[8..14], G=[23..29], B=[38..44] - result += SH_C3[0] * y*(3.*xx - yy) - * vec3(rawData[r+8], rawData[r+23], rawData[r+38]); - result += SH_C3[1] * xy*z * vec3(rawData[r+9], rawData[r+24], rawData[r+39]); - result += SH_C3[2] * y*(4.*zz - xx - yy) - * vec3(rawData[r+10], rawData[r+25], rawData[r+40]); - result += SH_C3[3] * z*(2.*zz - 3.*xx - 3.*yy) - * vec3(rawData[r+11], rawData[r+26], rawData[r+41]); - result += SH_C3[4] * x*(4.*zz - xx - yy) - * vec3(rawData[r+12], rawData[r+27], rawData[r+42]); - result += SH_C3[5] * z*(xx - yy) - * vec3(rawData[r+13], rawData[r+28], rawData[r+43]); - result += SH_C3[6] * x*(xx - 3.*yy) - * vec3(rawData[r+14], rawData[r+29], rawData[r+44]); - - return result + 0.5; -} - -void main() { - uint idx = gl_GlobalInvocationID.x; - if (idx >= splatCount) return; - - uint base = idx * 64; // 64 floats per raw splat - - // Position - vec3 pos = vec3(rawData[base], rawData[base+1], rawData[base+2]); - - // View direction for SH evaluation (world space, from camera towards splat) - // Must match the INRIA training convention: dir = pos - campos - vec3 dir = normalize(pos - camPos); - - // Evaluate SH for view-dependent color - vec3 color = evaluateSH(base, dir); - color = clamp(color, 0.0, 1.0); - - // Opacity: sigmoid(raw_opacity) - float rawOpacity = rawData[base + 54]; - float alpha = 1.0 / (1.0 + exp(-rawOpacity)); - - // Scale: exp(log_scale) * scaleMod - vec3 scale = vec3( - exp(rawData[base + 55]), - exp(rawData[base + 56]), - exp(rawData[base + 57]) - ) * scaleMod; - - // Rotation: PLY stores (w,x,y,z), shader expects (x,y,z,w) - // Normalize quaternion - vec4 rawRot = vec4( - rawData[base + 58], // w - rawData[base + 59], // x - rawData[base + 60], // y - rawData[base + 61] // z - ); - rawRot = normalize(rawRot); - vec4 rot = vec4(rawRot.y, rawRot.z, rawRot.w, rawRot.x); // xyzw - - // Write compact rendering splat - renderSplats[idx].position = vec4(pos, 0.0); - renderSplats[idx].scale = vec4(scale, 0.0); - renderSplats[idx].rotation = rot; - renderSplats[idx].color = vec4(color, alpha); -} -)_"; - -//============================================================================= -// COMPUTE SHADERS FOR DEPTH SORTING -//============================================================================= - -/** - * Compute shader: Generate depth keys from compact rendering splats - * Transforms view-space Z to a sortable unsigned integer key - */ -static constexpr auto depth_key_shader = R"_(#version 450 -layout(local_size_x = 256) in; - -struct RenderSplat { - vec4 position; - vec4 scale; - vec4 rotation; - vec4 color; -}; - -layout(std430, binding = 0) readonly buffer SplatBuffer { - RenderSplat splats[]; -}; - -layout(std430, binding = 1) writeonly buffer KeyBuffer { - uint keys[]; -}; - -layout(std430, binding = 2) writeonly buffer IndexBuffer { - uint indices[]; -}; - -layout(std140, binding = 3) uniform Params { - mat4 view; - uint splatCount; - float nearPlane; - float farPlane; - uint _pad; -}; - -void main() { - uint idx = gl_GlobalInvocationID.x; - if (idx >= splatCount) return; - - // Transform to view space - vec4 viewPos = view * vec4(splats[idx].position.xyz, 1.0); - float depth = -viewPos.z; // Negate because view space Z is negative - - // Front-to-back sort key: top 16 bits = depth, bottom 16 bits = splat index. - // The depth gives correct rendering order; the index provides stable - // tie-breaking for splats at similar depths (same buffer order every frame). - // This eliminates the "wave" artifact from coherent sort-order swaps. - // Combined with "under" blending for correct front-to-back compositing. - const uint keyMax = 0xFFFFFFFFu; - uint key; - if (depth <= nearPlane) { - // Behind camera: draw last, but keep stable index-based sub-order - key = (0xFFFFu << 16u) | (idx & 0xFFFFu); - } else { - float t = log2(depth / nearPlane) / log2(farPlane / nearPlane); - t = clamp(t, 0.0, 1.0); - uint depthKey = uint(t * 65535.0); - key = (depthKey << 16u) | (idx & 0xFFFFu); - } - - keys[idx] = key; - indices[idx] = idx; -} -)_"; - -/** - * Compute shader: Histogram counting for radix sort - * Counts occurrences of each digit value - */ -static constexpr auto histogram_shader = R"_(#version 450 -layout(local_size_x = 256) in; - -layout(std430, binding = 0) readonly buffer KeyBuffer { - uint keys[]; -}; - -layout(std430, binding = 1) buffer HistogramBuffer { - uint histogram[]; // 256 buckets * num_workgroups -}; - -layout(std140, binding = 2) uniform Params { - uint splatCount; - uint bitOffset; // Which 8 bits to sort (0, 8, 16, 24) - uint numWorkgroups; - uint _pad; -}; - -shared uint localHistogram[256]; - -void main() { - uint localId = gl_LocalInvocationID.x; - uint globalId = gl_GlobalInvocationID.x; - uint workgroupId = gl_WorkGroupID.x; - - // Clear local histogram - localHistogram[localId] = 0; - barrier(); - - // Count digits in this workgroup - if (globalId < splatCount) { - uint key = keys[globalId]; - uint digit = (key >> bitOffset) & 0xFFu; - atomicAdd(localHistogram[digit], 1); - } - barrier(); - - // Write local histogram to global memory - histogram[workgroupId * 256 + localId] = localHistogram[localId]; -} -)_"; - -/** - * Compute shader: Prefix sum and scatter for radix sort - * Computes exclusive prefix sum and scatters elements to sorted positions - */ -static constexpr auto sort_scatter_shader = R"_(#version 450 -layout(local_size_x = 256) in; - -layout(std430, binding = 0) readonly buffer KeyBufferIn { - uint keysIn[]; -}; - -layout(std430, binding = 1) readonly buffer IndexBufferIn { - uint indicesIn[]; -}; - -layout(std430, binding = 2) writeonly buffer KeyBufferOut { - uint keysOut[]; -}; - -layout(std430, binding = 3) writeonly buffer IndexBufferOut { - uint indicesOut[]; -}; - -layout(std430, binding = 4) buffer HistogramBuffer { - uint histogram[]; // Global prefix sums -}; - -layout(std140, binding = 5) uniform Params { - uint splatCount; - uint bitOffset; - uint numWorkgroups; - uint _pad; -}; - -shared uint localDigits[256]; -shared uint localOffset[256]; - -void main() { - uint localId = gl_LocalInvocationID.x; - uint globalId = gl_GlobalInvocationID.x; - uint workgroupId = gl_WorkGroupID.x; - - // Load global prefix sum for this workgroup's digit - localOffset[localId] = histogram[workgroupId * 256 + localId]; - - // Load this thread's element - uint key = 0u; - uint idx = 0u; - uint digit = 256u; // invalid sentinel (> any real digit) - bool valid = globalId < splatCount; - if (valid) { - key = keysIn[globalId]; - idx = indicesIn[globalId]; - digit = (key >> bitOffset) & 0xFFu; - } - localDigits[localId] = digit; - barrier(); - - if (valid) { - // Stable rank: count threads with LOWER ID that share the same digit. - // This is deterministic (no atomicAdd race), so the sort is stable - // and identical across frames — eliminates flickering. - uint rank = 0u; - for (uint i = 0u; i < localId; i++) { - if (localDigits[i] == digit) - rank++; - } - - uint globalPos = localOffset[digit] + rank; - if (globalPos < splatCount) { - keysOut[globalPos] = key; - indicesOut[globalPos] = idx; - } - } -} -)_"; - -/** - * Compute shader: Global prefix sum on histogram - * Converts per-workgroup histograms to global exclusive prefix sums. - * - * Histogram layout: histogram[workgroup * 256 + digit] - * - * The output for each (workgroup, digit) pair must be the global position - * where that workgroup should start placing elements with that digit. - * This requires accounting for: - * 1. All elements with smaller digits (across ALL workgroups) - * 2. Same-digit elements from earlier workgroups - * - * Dispatch: (1, 1, 1) — single workgroup of 256 threads, one per digit. - */ -static constexpr auto prefix_sum_shader = R"_(#version 450 -layout(local_size_x = 256) in; - -layout(std430, binding = 0) buffer HistogramBuffer { - uint histogram[]; // Layout: histogram[workgroup * 256 + digit] -}; - -layout(std140, binding = 1) uniform Params { - uint numWorkgroups; - uint _pad0; - uint _pad1; - uint _pad2; -}; - -shared uint digitTotal[256]; -shared uint digitPrefix[256]; - -void main() { - uint digit = gl_LocalInvocationID.x; // 0-255, one thread per digit - - // Step 1: Sum all workgroup counts for this digit - uint total = 0; - for (uint wg = 0; wg < numWorkgroups; wg++) { - total += histogram[wg * 256 + digit]; - } - digitTotal[digit] = total; - barrier(); - - // Step 2: Thread 0 computes exclusive prefix sum across all digits - // This determines the global starting offset for each digit bucket - if (digit == 0) { - digitPrefix[0] = 0; - for (uint d = 1; d < 256; d++) { - digitPrefix[d] = digitPrefix[d-1] + digitTotal[d-1]; - } - } - barrier(); - - // Step 3: Convert per-workgroup counts to global offsets - // For each workgroup: offset = digitPrefix[digit] + sum of same-digit counts in earlier workgroups - uint running = digitPrefix[digit]; - for (uint wg = 0; wg < numWorkgroups; wg++) { - uint idx = wg * 256 + digit; - uint val = histogram[idx]; - histogram[idx] = running; - running += val; - } -} -)_"; - -//============================================================================= -// RENDER SHADERS -//============================================================================= - -static constexpr auto vertex_shader = R"_(#version 450 - -// Quad vertex positions -const vec2 positions[6] = vec2[6]( - vec2(-1.0, -1.0), - vec2( 1.0, -1.0), - vec2( 1.0, 1.0), - vec2(-1.0, -1.0), - vec2( 1.0, 1.0), - vec2(-1.0, 1.0) -); - -// Compact rendering splat (output of preprocess compute shader) -struct RenderSplat { - vec4 position; // xyz = position - vec4 scale; // xyz = scale (already exp'd) - vec4 rotation; // quaternion xyzw (already normalized) - vec4 color; // RGBA (SH evaluated, sigmoid applied) -}; - -layout(std430, binding = 0) readonly buffer SplatBuffer { - RenderSplat splats[]; -}; - -// Sorted indices from depth sort pass -layout(std430, binding = 1) readonly buffer SortedIndices { - uint sortedIndices[]; -}; - -layout(std140, binding = 2) uniform Uniforms { - mat4 view; - mat4 projection; - mat4 clipSpaceCorr; - vec2 viewport; - float _pad0; - uint useSorting; // 0 = no sorting, 1 = use sorted indices -}; - -layout(location = 0) out vec2 f_center; // screen-space splat center (pixels) -layout(location = 1) out vec4 f_color; -layout(location = 2) out vec3 f_conic; - -mat3 quatToMat(vec4 q) { - float x = q.x, y = q.y, z = q.z, w = q.w; - // GLSL mat3 is column-major: mat3(col0, col1, col2) - return mat3( - 1.0 - 2.0*(y*y + z*z), 2.0*(x*y + w*z), 2.0*(x*z - w*y), // col 0 - 2.0*(x*y - w*z), 1.0 - 2.0*(x*x + z*z), 2.0*(y*z + w*x), // col 1 - 2.0*(x*z + w*y), 2.0*(y*z - w*x), 1.0 - 2.0*(x*x + y*y) // col 2 - ); -} - -void main() { - // Get splat index (sorted or unsorted) - uint splatIdx = useSorting != 0 ? sortedIndices[gl_InstanceIndex] : gl_InstanceIndex; - RenderSplat splat = splats[splatIdx]; - vec2 quadPos = positions[gl_VertexIndex]; - - // Early opacity cull: skip splats that are nearly invisible - if (splat.color.a < 1.0 / 255.0) { - gl_Position = vec4(0.0, 0.0, 2.0, 1.0); - return; - } - - // View space position - vec4 viewPos = view * vec4(splat.position.xyz, 1.0); - - // Focal lengths in pixels - float focal = projection[0][0] * viewport.x * 0.5; - float focal_y = projection[1][1] * viewport.y * 0.5; - float tanFovX = 0.5 * viewport.x / focal; - float tanFovY = 0.5 * viewport.y / focal_y; - - // Frustum culling: project to clip space and check NDC bounds - // (matches INRIA reference: cull behind camera + outside 1.3x viewport) - vec4 clipPos = projection * viewPos; - if (clipPos.w <= 0.2) { - gl_Position = vec4(0.0, 0.0, 2.0, 1.0); - return; - } - vec3 ndc = clipPos.xyz / clipPos.w; - if (abs(ndc.x) > 1.3 || abs(ndc.y) > 1.3) { - gl_Position = vec4(0.0, 0.0, 2.0, 1.0); - return; - } - - // Clamp view-space position to prevent numerical issues at screen edges - // (matches INRIA CUDA reference: 1.3x FOV tangent) - float limX = 1.3 * tanFovX; - float limY = 1.3 * tanFovY; - float txtz = viewPos.x / viewPos.z; - float tytz = viewPos.y / viewPos.z; - viewPos.x = clamp(txtz, -limX, limX) * viewPos.z; - viewPos.y = clamp(tytz, -limY, limY) * viewPos.z; - - // Build 3D covariance from scale and rotation (already preprocessed) - // INRIA convention: Sigma = R * S * S^T * R^T = R * S² * R^T - // The principal axes are the COLUMNS of R. - vec3 scale = splat.scale.xyz; - mat3 R = quatToMat(splat.rotation); - mat3 S = mat3(scale.x, 0, 0, 0, scale.y, 0, 0, 0, scale.z); - mat3 M = R * S; - mat3 Sigma = M * transpose(M); - - // 2D covariance via EWA projection - mat3 W = mat3(view); - float z2 = viewPos.z * viewPos.z; - - // Jacobian of projection (column-major: mat3(col0, col1, col2)) - mat3 J = mat3( - focal / viewPos.z, 0.0, 0.0, // col 0 - 0.0, focal_y / viewPos.z, 0.0, // col 1 - -focal * viewPos.x / z2, -focal_y * viewPos.y / z2, 0.0 // col 2 - ); - - mat3 T = J * W; - mat3 cov = T * Sigma * transpose(T); - - float cov_xx = cov[0][0], cov_xy = cov[0][1], cov_yy = cov[1][1]; - - // Mip-Splatting 2D filter (Yu et al. 2024): approximate the pixel box filter - // as a Gaussian and convolve with the projected 2D covariance. - // Opacity is compensated to preserve each splat's total contribution: - // alpha' = alpha * sqrt(det(Sigma) / det(Sigma + kernel_size * I)) - float kernel_size = 0.3; - float det_0 = max(1e-6, cov_xx * cov_yy - cov_xy * cov_xy); - cov_xx += kernel_size; - cov_yy += kernel_size; - float det_1 = max(1e-6, cov_xx * cov_yy - cov_xy * cov_xy); - float mipCoef = sqrt(det_0 / det_1); - - float det = cov_xx * cov_yy - cov_xy * cov_xy; - float mid = 0.5 * (cov_xx + cov_yy); - float disc = max(0.0, mid * mid - det); - float lambda1 = mid + sqrt(disc); - float lambda2 = mid - sqrt(disc); - - // Eigenvectors of 2D covariance for ellipse-aligned quad - vec2 eigVec1; - if (abs(cov_xy) > 1e-6) { - eigVec1 = normalize(vec2(cov_xy, lambda1 - cov_xx)); - } else { - eigVec1 = (cov_xx >= cov_yy) ? vec2(1.0, 0.0) : vec2(0.0, 1.0); - } - vec2 eigVec2 = vec2(-eigVec1.y, eigVec1.x); - - float maxExtent = 2048.0; - float r1 = min(ceil(3.0 * sqrt(max(lambda1, 0.0))), maxExtent); - float r2 = min(ceil(3.0 * sqrt(max(lambda2, 0.0))), maxExtent); - - // Cull degenerate or invisible splats - if (det < 1e-3 || max(r1, r2) < 0.1) { - gl_Position = vec4(0.0, 0.0, 2.0, 1.0); - return; - } - - // Inverse covariance (conic) for fragment Gaussian evaluation. - // The cross-term sign must match the screen-space convention of gl_FragCoord: - // Vulkan/Metal/D3D (clipSpaceCorr[1][1] < 0): both screen axes flip - // relative to J-space, preserving the cross-product sign. - // OpenGL (clipSpaceCorr[1][1] > 0): only X flips, requiring correction. - float inv_det = 1.0 / det; - float crossSign = sign(clipSpaceCorr[1][1]); - f_conic = vec3(cov_yy * inv_det, crossSign * cov_xy * inv_det, cov_xx * inv_det); - - // Oriented quad: major axis along eigVec1, minor along eigVec2 - vec2 pixelOffset = quadPos.x * r1 * eigVec1 + quadPos.y * r2 * eigVec2; - vec2 center = ndc.xy; - vec2 ndcOffset = pixelOffset * 2.0 / viewport; - - gl_Position = clipSpaceCorr * vec4(center + ndcOffset, ndc.z, 1.0); - - // Score's texture compositing pipeline flips Y when sampling for Vulkan/HLSL/Metal. - // To match this convention (same as ISF shaders), we undo clipSpaceCorr's Y-flip here - // so the compositing re-flip produces a correctly oriented final image. - gl_Position.y = -gl_Position.y; - - // Screen-space center in pixels (matches gl_FragCoord coordinate system) - vec4 centerClip = clipSpaceCorr * vec4(ndc.xy, ndc.z, 1.0); - centerClip.y = -centerClip.y; - f_center = (centerClip.xy / centerClip.w * 0.5 + 0.5) * viewport; - - // Fade out excessively large projected splats. - float alpha = splat.color.a * mipCoef; - float maxR = max(r1, r2); - float fadeRadius = 512.0; - if (maxR > fadeRadius) { - float fade = fadeRadius / maxR; - alpha *= fade; - if (alpha < 1.0 / 255.0) { - gl_Position = vec4(0.0, 0.0, 2.0, 1.0); - return; - } - } - f_color = vec4(splat.color.rgb, alpha); -} -)_"; - -static constexpr auto fragment_shader = R"_(#version 450 - -layout(location = 0) in vec2 f_center; // screen-space splat center (pixels) -layout(location = 1) in vec4 f_color; -layout(location = 2) in vec3 f_conic; - -layout(location = 0) out vec4 fragColor; - -void main() { - // Pixel offset from splat center, computed per-fragment for precision. - // Unlike interpolated UVs, this is exact regardless of quad orientation. - vec2 d = gl_FragCoord.xy - f_center; - - float power = -0.5 * (f_conic.x * d.x * d.x + - 2.0 * f_conic.y * d.x * d.y + - f_conic.z * d.y * d.y); - - if (power > 0.0) discard; - - float gaussian = exp(power); - float alpha = min(0.99, gaussian * f_color.a); - if (alpha < 1.0/255.0) discard; - - fragColor = vec4(f_color.rgb * alpha, alpha); -} -)_"; - -} // namespace GaussianSplatShaders - -} // namespace score::gfx diff --git a/src/plugins/score-plugin-threedim/Threedim/Splat/Metadata.hpp b/src/plugins/score-plugin-threedim/Threedim/Splat/Metadata.hpp deleted file mode 100644 index 2433522d07..0000000000 --- a/src/plugins/score-plugin-threedim/Threedim/Splat/Metadata.hpp +++ /dev/null @@ -1,22 +0,0 @@ -#pragma once -#include - -namespace Gfx::Splat -{ -class Model; -} - -PROCESS_METADATA( - , Gfx::Splat::Model, "cdc15a16-e856-4e02-9339-7d9e48da10ce", - "Splat", // Internal name - "Splat", // Pretty name - Process::ProcessCategory::Visual, // Category - "Visuals/Render", // Category - "Display gaussian splats", // Description - "ossia team", // Author - (QStringList{"gfx", "model", "3d"}), // Tags - {}, // Inputs - {}, // Outputs - QUrl{}, // Doc url - Process::ProcessFlags::SupportsAll | Process::ProcessFlags::ControlSurface // Flags -) diff --git a/src/plugins/score-plugin-threedim/Threedim/Splat/Process.cpp b/src/plugins/score-plugin-threedim/Threedim/Splat/Process.cpp deleted file mode 100644 index 33d87fbeba..0000000000 --- a/src/plugins/score-plugin-threedim/Threedim/Splat/Process.cpp +++ /dev/null @@ -1,107 +0,0 @@ -#include "Process.hpp" - -#include -#include - -#include -#include - -#include -#include - -#include - -W_OBJECT_IMPL(Gfx::Splat::Model) -namespace Gfx::Splat -{ - -Model::Model( - const TimeVal& duration, const Id& id, QObject* parent) - : Process::ProcessModel{duration, id, "gfxProcess", parent} -{ - metadata().setInstanceName(*this); - - init(); -} - -Model::~Model() = default; - -void Model::init() -{ - if(m_inlets.empty() && m_outlets.empty()) - { - m_outlets.push_back(new TextureOutlet{"Texture Out", Id(0), this}); - m_inlets.push_back(new TextureInlet{"Buffer In", Id(0), this}); - - m_inlets.push_back(new Process::XYZSpinboxes{ - ossia::vec3f{-10000., -10000., -10000.}, ossia::vec3f{10000., 10000., 10000.}, - ossia::vec3f{0., 0., 0.}, false, "Position", Id(1), this}); - m_inlets.push_back(new Process::XYZSpinboxes{ - ossia::vec3f{0., 0., 0.}, ossia::vec3f{359.9999999, 359.9999999, 359.9999999}, - ossia::vec3f{}, false, "Rotation", Id(2), this}); - m_inlets.push_back(new Process::XYZSpinboxes{ - ossia::vec3f{0.00001, 0.00001, 0.00001}, ossia::vec3f{1000., 1000., 1000.}, - ossia::vec3f{1., 1., 1.}, false, "Scale", Id(3), this}); - - m_inlets.push_back(new Process::XYZSpinboxes{ - ossia::vec3f{-10000., -10000., -10000.}, ossia::vec3f{10000., 10000., 10000.}, - ossia::vec3f{-1., -1., -1.}, false, "Camera position", Id(4), - this}); - m_inlets.push_back(new Process::XYZSpinboxes{ - ossia::vec3f{-10000., -10000., -10000.}, ossia::vec3f{10000., 10000., 10000.}, - ossia::vec3f{}, false, "Camera direction", Id(5), this}); - - m_inlets.push_back( - new Process::FloatSlider{0.01, 359.999, 90., "FOV", Id(6), this}); - m_inlets.push_back(new Process::FloatSlider{ - 0.001, 1000., 0.001, "Near", Id(7), this}); - m_inlets.push_back(new Process::FloatSlider{ - 0.001, 10000., 100000., "Far", Id(8), this}); - } - - std::vector> projmodes{ - {"Perspective", 0}, - {"Fulldome (1-pass)", 1}, - }; - - m_inlets.push_back( - new Process::ComboBox{projmodes, 0, "Camera", Id(9), this}); -} - -QString Model::prettyName() const noexcept -{ - return tr("Model Display"); -} - -} -template <> -void DataStreamReader::read(const Gfx::Splat::Model& proc) -{ - readPorts(*this, proc.m_inlets, proc.m_outlets); - - insertDelimiter(); -} - -template <> -void DataStreamWriter::write(Gfx::Splat::Model& proc) -{ - writePorts( - *this, components.interfaces(), proc.m_inlets, - proc.m_outlets, &proc); - - checkDelimiter(); -} - -template <> -void JSONReader::read(const Gfx::Splat::Model& proc) -{ - readPorts(*this, proc.m_inlets, proc.m_outlets); -} - -template <> -void JSONWriter::write(Gfx::Splat::Model& proc) -{ - writePorts( - *this, components.interfaces(), proc.m_inlets, - proc.m_outlets, &proc); -} diff --git a/src/plugins/score-plugin-threedim/Threedim/TagAs.cpp b/src/plugins/score-plugin-threedim/Threedim/TagAs.cpp new file mode 100644 index 0000000000..ad9bcd15c8 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/TagAs.cpp @@ -0,0 +1,52 @@ +#include "TagAs.hpp" + +#include "PrimitiveCloud/FormatOverride.hpp" + +namespace Threedim +{ + +void TagAs::rebuild() +{ + const auto& in = inputs.scene_in.scene; + const ossia::scene_state* in_state = in.state.get(); + const int64_t in_version = in_state ? in_state->version : -1; + const auto& cur_format = inputs.format_id.value; + + m_cached_in_state = in_state; + m_cached_in_version = in_version; + m_cached_format_id = cur_format; + + if(!in_state) + { + m_cached_out = in.state; + m_pending_dirty = 0xFF; + return; + } + + // applyFormatOverride is the same helper AssetLoader uses, with the + // same passthrough-when-empty contract. Returns the input verbatim + // when format_id is empty so wiring stays cheap during edits. + m_cached_out = Threedim::PrimitiveCloud::applyFormatOverride( + in.state, cur_format); + m_pending_dirty = 0xFF; +} + +void TagAs::operator()() +{ + // The upstream scene_state ptr / version can change without a + // port-update event (e.g. when a producer republishes the same + // shared_ptr after an internal mutation). Detect and rebuild. + const auto* in_state = inputs.scene_in.scene.state.get(); + const int64_t in_version = in_state ? in_state->version : -1; + const bool upstream_changed + = m_cached_in_state != in_state + || m_cached_in_version != in_version; + if(!m_cached_out || upstream_changed) + rebuild(); + + outputs.scene_out.scene.state = m_cached_out; + outputs.scene_out.dirty = m_pending_dirty; + m_pending_dirty = 0; +} + +} // namespace Threedim diff --git a/src/plugins/score-plugin-threedim/Threedim/TagAs.hpp b/src/plugins/score-plugin-threedim/Threedim/TagAs.hpp new file mode 100644 index 0000000000..483b3bb459 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/TagAs.hpp @@ -0,0 +1,78 @@ +#pragma once +#include +#include + +#include + +#include +#include +#include + +namespace Threedim +{ + +// Mid-pipeline format-id stamp. Walks every primitive_cloud_component +// reachable from the upstream scene_state and shallow-clones it with +// `format_id = inputs.format_id.value`. Heavy fields (raw_data, +// extra_buffers, bounds) are shared via shared_ptr — no GPU upload +// duplicates. +// +// Wiring: +// ThirdPartyProducer → TagAs(format_id="my-custom-format") +// → ScenePreprocessor +// → FlattenedSceneFilter(mode=12, match="my-custom-format") +// → CustomDecode → CustomDraw → Window +// +// Use this when the upstream producer can't be modified (third-party +// node, legacy plugin) but the cloud needs to flow through a +// FlattenedSceneFilter in mode 12 (format_id == match_str). Empty +// `format_id` is passthrough — no rewrite, original scene_state +// forwarded as-is. +class TagAs +{ +public: + halp_meta(name, "Tag As Format") + halp_meta(category, "Visuals/3D/Scene") + halp_meta(c_name, "tag_as_format") + halp_meta(authors, "ossia team") + halp_meta( + manual_url, + "https://ossia.io/score-docs/processes/tag-as-format.html") + halp_meta(uuid, "8e3d7c2a-5f91-4b6c-a8e2-1d9f4c7b3e5a") + + struct ins + { + struct + { + halp_meta(name, "Scene In"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_in; + + struct : halp::lineedit<"Format ID", ""> + { void update(TagAs& n) { n.rebuild(); } } format_id; + } inputs; + + struct outs + { + struct + { + halp_meta(name, "Scene Out"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_out; + } outputs; + + void rebuild(); + void operator()(); + + // Cached output kept stable while inputs are unchanged — preserves + // ScenePreprocessor's fingerprint fast-path. + std::shared_ptr m_cached_out; + uint8_t m_pending_dirty{0xFF}; + const ossia::scene_state* m_cached_in_state{}; + int64_t m_cached_in_version{-1}; + std::string m_cached_format_id; +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/TangentUtils.hpp b/src/plugins/score-plugin-threedim/Threedim/TangentUtils.hpp new file mode 100644 index 0000000000..e584ec3a35 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/TangentUtils.hpp @@ -0,0 +1,139 @@ +#pragma once +#include + +#include +#include +#include +#include + +namespace Threedim +{ + +// Generate glTF-compatible float4 tangents (xyz = unit tangent, w = +// handedness ±1) using mikktspace from a mesh's position / normal / +// texcoord_0 streams and an optional uint32 index buffer. Returns a +// shared buffer of `vertex_count * 4` floats, or nullptr on failure +// (missing streams, degenerate mesh, etc). +// +// For indexed meshes: mikktspace's contract is unindexed ("DO NOT use +// an already existing index list"), but we're constrained to keep +// indexed data. We call the mikktspace callbacks against the EXPANDED +// (unindexed) triangle list via the index buffer, and write the +// generated tangent back through the same index lookup. When two +// triangles share a vertex with the same tangent (smooth surface), +// successive writes produce the same value. At UV seams they disagree +// and the last write wins — a known small artifact compared to +// un-indexing the whole mesh. Vertex duplication on import is a +// future enhancement tracked in docs/3d-pipeline-tasks.md. +inline std::shared_ptr> generate_tangents_mikktspace( + const std::shared_ptr>& positions, + const std::shared_ptr>& normals, + const std::shared_ptr>& texcoords, + const std::shared_ptr>& indices, + uint32_t vertex_count) +{ + if(!positions || !normals || !texcoords || vertex_count == 0) + return {}; + if(positions->size() < vertex_count * 3 + || normals->size() < vertex_count * 3 + || texcoords->size() < vertex_count * 2) + return {}; + + // Triangle count: indexed → indices/3, non-indexed → vertex_count/3. + const uint32_t num_faces + = indices ? uint32_t(indices->size() / 3) + : uint32_t(vertex_count / 3); + if(num_faces == 0) + return {}; + + auto tangents = std::make_shared>(vertex_count * 4, 0.f); + + struct UserData + { + const float* positions; + const float* normals; + const float* texcoords; + const uint32_t* indices; // null when un-indexed + uint32_t num_faces; + std::vector* tangents; + }; + UserData ud{positions->data(), + normals->data(), + texcoords->data(), + indices ? indices->data() : nullptr, + num_faces, + tangents.get()}; + + auto vertexIndex + = [](const UserData& u, int iFace, int iVert) -> uint32_t { + const uint32_t flat = uint32_t(iFace) * 3u + uint32_t(iVert); + return u.indices ? u.indices[flat] : flat; + }; + + SMikkTSpaceInterface iface{}; + iface.m_getNumFaces = [](const SMikkTSpaceContext* ctx) { + return int(static_cast(ctx->m_pUserData)->num_faces); + }; + iface.m_getNumVerticesOfFace = [](const SMikkTSpaceContext*, int) { + return 3; + }; + iface.m_getPosition = [](const SMikkTSpaceContext* ctx, float out[], + int iFace, int iVert) { + auto& u = *static_cast(ctx->m_pUserData); + auto vi = uint32_t(iFace) * 3u + uint32_t(iVert); + auto v = u.indices ? u.indices[vi] : vi; + out[0] = u.positions[v * 3 + 0]; + out[1] = u.positions[v * 3 + 1]; + out[2] = u.positions[v * 3 + 2]; + }; + iface.m_getNormal = [](const SMikkTSpaceContext* ctx, float out[], + int iFace, int iVert) { + auto& u = *static_cast(ctx->m_pUserData); + auto vi = uint32_t(iFace) * 3u + uint32_t(iVert); + auto v = u.indices ? u.indices[vi] : vi; + out[0] = u.normals[v * 3 + 0]; + out[1] = u.normals[v * 3 + 1]; + out[2] = u.normals[v * 3 + 2]; + }; + iface.m_getTexCoord = [](const SMikkTSpaceContext* ctx, float out[], + int iFace, int iVert) { + auto& u = *static_cast(ctx->m_pUserData); + auto vi = uint32_t(iFace) * 3u + uint32_t(iVert); + auto v = u.indices ? u.indices[vi] : vi; + out[0] = u.texcoords[v * 2 + 0]; + out[1] = u.texcoords[v * 2 + 1]; + }; + iface.m_setTSpaceBasic = [](const SMikkTSpaceContext* ctx, + const float tangent[], float sign, + int iFace, int iVert) { + auto& u = *static_cast(ctx->m_pUserData); + auto vi = uint32_t(iFace) * 3u + uint32_t(iVert); + auto v = u.indices ? u.indices[vi] : vi; + auto& t = *u.tangents; + t[v * 4 + 0] = tangent[0]; + t[v * 4 + 1] = tangent[1]; + t[v * 4 + 2] = tangent[2]; + t[v * 4 + 3] = sign; + }; + (void)vertexIndex; + + SMikkTSpaceContext ctx{&iface, &ud}; + if(!genTangSpaceDefault(&ctx)) + return {}; + + // Fallback for vertices never touched (rare; mostly for non-manifold + // meshes): orient any zero tangent along X so shader doesn't divide + // by zero when reconstructing the TBN. + for(uint32_t v = 0; v < vertex_count; ++v) + { + float* t = tangents->data() + v * 4; + const float len2 = t[0] * t[0] + t[1] * t[1] + t[2] * t[2]; + if(len2 < 1e-10f) + { + t[0] = 1.f; t[1] = 0.f; t[2] = 0.f; t[3] = 1.f; + } + } + return tangents; +} + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/TextToMesh.cpp b/src/plugins/score-plugin-threedim/Threedim/TextToMesh.cpp new file mode 100644 index 0000000000..41f6674dac --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/TextToMesh.cpp @@ -0,0 +1,454 @@ +#include "TextToMesh.hpp" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace Threedim +{ + +namespace +{ + +// ─── Ear-clipping triangulator ──────────────────────────────────────── +// +// Handles simple (non-self-intersecting, no holes) polygons in CCW +// winding order. For each emitted triangle, the resulting indices +// reference positions in the input polygon's order. +// +// Complexity: O(n²). Glyphs flatten to dozens of verts at most, so +// acceptable. For large pts-per-glyph or paragraph text later, swap +// for earcut.hpp. + +struct Vec2 { float x, y; }; + +inline float triSign(Vec2 a, Vec2 b, Vec2 c) noexcept +{ + return (b.x - a.x) * (c.y - a.y) - (b.y - a.y) * (c.x - a.x); +} + +inline bool pointInTri(Vec2 p, Vec2 a, Vec2 b, Vec2 c) noexcept +{ + const float d1 = triSign(p, a, b); + const float d2 = triSign(p, b, c); + const float d3 = triSign(p, c, a); + const bool neg = (d1 < 0.f) || (d2 < 0.f) || (d3 < 0.f); + const bool pos = (d1 > 0.f) || (d2 > 0.f) || (d3 > 0.f); + return !(neg && pos); +} + +// Signed area × 2. Positive = CCW in a Y-up frame. +float polyArea(const std::vector& p) noexcept +{ + float s = 0.f; + const std::size_t n = p.size(); + for(std::size_t i = 0; i < n; ++i) + { + const auto& a = p[i]; + const auto& b = p[(i + 1) % n]; + s += a.x * b.y - b.x * a.y; + } + return s; +} + +// Ear-clip `poly` into triangles; append indices (into `base_offset + +// original polygon index`) to `out_indices`. +void earClip( + const std::vector& poly, uint32_t base_offset, + std::vector& out_indices) +{ + const std::size_t n0 = poly.size(); + if(n0 < 3) + return; + + // Make a working copy of the polygon with flipped winding if needed + // so the triangulator always sees CCW. + std::vector idx(n0); + if(polyArea(poly) < 0.f) + { + for(std::size_t i = 0; i < n0; ++i) + idx[i] = int(n0 - 1 - i); + } + else + { + for(std::size_t i = 0; i < n0; ++i) + idx[i] = int(i); + } + + int n = (int)idx.size(); + int guard = n * 3; // bail to avoid infinite loop on degenerate input + while(n > 3 && guard-- > 0) + { + bool ear_found = false; + for(int i = 0; i < n; ++i) + { + const int ip = (i + n - 1) % n; + const int in_ = (i + 1) % n; + const Vec2 a = poly[idx[ip]]; + const Vec2 b = poly[idx[i]]; + const Vec2 c = poly[idx[in_]]; + if(triSign(a, b, c) <= 0.f) + continue; // reflex or collinear — not an ear + bool blocked = false; + for(int j = 0; j < n; ++j) + { + if(j == ip || j == i || j == in_) + continue; + if(pointInTri(poly[idx[j]], a, b, c)) + { + blocked = true; + break; + } + } + if(blocked) + continue; + out_indices.push_back(base_offset + uint32_t(idx[ip])); + out_indices.push_back(base_offset + uint32_t(idx[i])); + out_indices.push_back(base_offset + uint32_t(idx[in_])); + idx.erase(idx.begin() + i); + --n; + ear_found = true; + break; + } + if(!ear_found) + break; // give up on degenerate polygons + } + if(n == 3) + { + out_indices.push_back(base_offset + uint32_t(idx[0])); + out_indices.push_back(base_offset + uint32_t(idx[1])); + out_indices.push_back(base_offset + uint32_t(idx[2])); + } +} + +// Convert a QPainterPath's filled polygons into (positions, indices), +// appending to out_positions / out_indices. Positions are emitted as +// (x, y_flipped, 0). `scale` maps Qt pixel coords to world units. +void tessellatePath( + const QPainterPath& path, float scale, float x_origin, + std::vector& out_positions, std::vector& out_indices) +{ + // toFillPolygons flattens curves and returns one or more polygons + // representing the filled region. Holes would appear as separate + // polygons with opposite winding — in this v1 we treat every polygon + // as a solid fill. + const QList polys = path.toFillPolygons(); + for(const auto& qpoly : polys) + { + if(qpoly.size() < 3) + continue; + std::vector poly; + poly.reserve(qpoly.size()); + // Skip the closing duplicate vertex that Qt tends to append. + int count = qpoly.size(); + if(count > 1 && qpoly[0] == qpoly[count - 1]) + count--; + for(int i = 0; i < count; ++i) + { + const auto& p = qpoly[i]; + // Y flip so the mesh uses a right-handed Y-up frame (Qt is Y-down). + poly.push_back({float(p.x() * scale + x_origin), + float(-p.y() * scale)}); + } + const uint32_t base = uint32_t(out_positions.size() / 3); + for(const auto& v : poly) + { + out_positions.push_back(v.x); + out_positions.push_back(v.y); + out_positions.push_back(0.f); + } + earClip(poly, base, out_indices); + } +} + +} // namespace + +void TextToMesh::rebuild() +{ + const bool text_inputs_changed + = m_cached_text != inputs.text.value + || m_cached_family != inputs.font_family.value + || m_cached_size != inputs.font_size.value + || m_cached_bold != inputs.bold.value + || m_cached_italic != inputs.italic.value + || m_cached_height != inputs.height.value + || m_cached_center != inputs.center_x.value; + + float scratch[16]; + CachedTRS xformCache = m_cachedTRS; + computeTRSMatrix(inputs, scratch, xformCache); + m_cachedTRS = xformCache; + + // Rebuild the mesh only when the text / font parameters changed. + // Pure TRS edits keep the same mesh_component and just bump the + // enclosing scene_state version. + if(text_inputs_changed || !m_cached_mesh) + { + m_cached_text = inputs.text.value; + m_cached_family = inputs.font_family.value; + m_cached_size = inputs.font_size.value; + m_cached_bold = inputs.bold.value; + m_cached_italic = inputs.italic.value; + m_cached_height = inputs.height.value; + m_cached_center = inputs.center_x.value; + + // Build a QRawFont from the requested family. QRawFont::fromFont + // resolves aliases (e.g. "Sans" → the system default). + QFont qf(QString::fromStdString(inputs.font_family.value)); + qf.setPixelSize(inputs.font_size.value); + qf.setBold(inputs.bold.value); + qf.setItalic(inputs.italic.value); + QRawFont rf = QRawFont::fromFont(qf); + if(!rf.isValid()) + { + // Fallback: default system font at the requested size. + QFont def; + def.setPixelSize(inputs.font_size.value); + rf = QRawFont::fromFont(def); + } + + const QString str = QString::fromStdString(inputs.text.value); + const QVector glyphs = rf.glyphIndexesForString(str); + const QVector advances = rf.advancesForGlyphIndexes(glyphs); + + // Pixel → world scale: QRawFont::pixelSize() is the nominal pixel + // size. Height control sets the target cap height; we approximate + // cap height as pixelSize × 0.7 (typical for Latin fonts). + const float cap_ratio = 0.7f; + const float pixel_to_world + = inputs.height.value + / (float(rf.pixelSize()) * cap_ratio + 1e-6f); + + std::vector positions; + std::vector indices; + positions.reserve(glyphs.size() * 32 * 3); + indices.reserve(glyphs.size() * 32); + + float cursor_x_px = 0.f; + for(int gi = 0; gi < glyphs.size(); ++gi) + { + QPainterPath gp = rf.pathForGlyph(glyphs[gi]); + if(!gp.isEmpty()) + tessellatePath( + gp, pixel_to_world, cursor_x_px * pixel_to_world, + positions, indices); + if(gi < advances.size()) + cursor_x_px += float(advances[gi].x()); + } + + // Optionally center the text on X — total advance is where we + // ended up at cursor_x_px; shift all vertices by -half. + if(inputs.center_x.value && !positions.empty()) + { + const float half = cursor_x_px * pixel_to_world * 0.5f; + for(std::size_t v = 0; v < positions.size(); v += 3) + positions[v] -= half; + } + + if(positions.empty() || indices.empty()) + { + // Empty string or unrenderable font — keep m_wrapped_state valid + // (reset mesh) but clear its content so republish emits empty. + m_cached_mesh.reset(); + if(!m_wrapped_state) + m_wrapped_state = std::make_shared(); + m_wrapped_state->roots.reset(); + m_wrapped_state->materials.reset(); + m_wrapped_state->version = ++m_version_counter; + m_wrapped_state->dirty_index = m_version_counter; + m_pending_dirty = 0xFF; + return; + } + + // Build position / normal / texcoord buffers. + const std::size_t vcount = positions.size() / 3; + auto pos_buf = std::make_shared>(std::move(positions)); + auto nrm_buf = std::make_shared>(vcount * 3, 0.f); + for(std::size_t i = 0; i < vcount; ++i) + (*nrm_buf)[i * 3 + 2] = 1.f; // +Z normal + auto uv_buf = std::make_shared>(vcount * 2, 0.f); + auto idx_buf = std::make_shared>(std::move(indices)); + + auto make_res = [](std::shared_ptr> b, + ossia::buffer_data::usage u) { + auto r = std::make_shared(); + ossia::buffer_data bd; + bd.data = std::shared_ptr(b, b->data()); + bd.byte_size = int64_t(b->size() * sizeof(float)); + bd.usage_hint = u; + r->resource = std::move(bd); + r->dirty_index = 1; + return r; + }; + + ossia::mesh_primitive mp; + // Stable id keyed on the position-buffer pointer (changes when the + // text or font changes, stable while neither does). Required by + // the registry's mesh-slab allocator: a 0 id makes the slab + // uncacheable and the mesh disappears from rendering. + mp.stable_id = (uint64_t)((uintptr_t)pos_buf.get()); + mp.topology = ossia::primitive_topology::triangles; + mp.vertex_count = uint32_t(vcount); + mp.index_count = uint32_t(idx_buf->size()); + // Local-space AABB over the tessellated glyph positions. Enables GPU + // frustum / occlusion culling in downstream scene filters. + mp.bounds = ossia::compute_aabb_from_positions(pos_buf->data(), vcount); + // No material_component — consumer applies default factors. + + uint32_t bi = 0; + auto push_attr = [&](std::shared_ptr> b, + int floats_per_vertex, + ossia::attribute_semantic sem, + ossia::vertex_format fmt) { + mp.vertex_buffers.push_back( + make_res(b, ossia::buffer_data::usage::vertex_buffer)); + ossia::vertex_attribute a; + a.semantic = sem; + a.format = fmt; + a.buffer_index = bi++; + a.byte_offset = 0; + a.byte_stride = uint32_t(floats_per_vertex) * sizeof(float); + a.rate = ossia::vertex_attribute::input_rate::per_vertex; + mp.attributes.push_back(a); + }; + push_attr(pos_buf, 3, ossia::attribute_semantic::position, ossia::vertex_format::float3); + push_attr(nrm_buf, 3, ossia::attribute_semantic::normal, ossia::vertex_format::float3); + push_attr(uv_buf, 2, ossia::attribute_semantic::texcoord0, ossia::vertex_format::float2); + + { + auto ib = std::make_shared(); + ossia::buffer_data bd; + bd.data = std::shared_ptr(idx_buf, idx_buf->data()); + bd.byte_size = int64_t(idx_buf->size() * sizeof(uint32_t)); + bd.usage_hint = ossia::buffer_data::usage::index_buffer; + ib->resource = std::move(bd); + ib->dirty_index = 1; + mp.index_buffer = std::move(ib); + mp.index_type = ossia::index_format::uint32; + } + + auto mc = std::make_shared(); + mc->primitives.push_back(std::move(mp)); + mc->dirty_index = 1; + m_cached_mesh = std::move(mc); + } + + // Build scene_node tree: root { scene_transform, mesh_component }. + ossia::scene_transform xform; + xform.translation[0] = inputs.position.value.x; + xform.translation[1] = inputs.position.value.y; + xform.translation[2] = inputs.position.value.z; + auto q = QQuaternion::fromEulerAngles( + inputs.rotation.value.x, inputs.rotation.value.y, + inputs.rotation.value.z); + xform.rotation[0] = q.x(); + xform.rotation[1] = q.y(); + xform.rotation[2] = q.z(); + xform.rotation[3] = q.scalar(); + xform.scale[0] = inputs.scale.value.x; + xform.scale[1] = inputs.scale.value.y; + xform.scale[2] = inputs.scale.value.z; + xform.raw_slot = m_xform_ref; + + auto children = std::make_shared>(); + children->push_back(xform); + children->push_back(ossia::mesh_component_ptr(m_cached_mesh)); + + auto node = std::make_shared(); + node->name = "Text"; + node->children = std::move(children); + node->dirty_index = ++m_version_counter; + + auto roots = std::make_shared>(); + roots->push_back(std::move(node)); + + // One default material so downstream PBR has something to bind. + auto mat = std::make_shared(); + mat->base_color_factor[0] = 1.f; + mat->base_color_factor[1] = 1.f; + mat->base_color_factor[2] = 1.f; + mat->base_color_factor[3] = 1.f; + auto mats = std::make_shared>(); + mats->push_back(std::move(mat)); + + if(!m_wrapped_state) + m_wrapped_state = std::make_shared(); + m_wrapped_state->roots = std::move(roots); + m_wrapped_state->materials = std::move(mats); + m_wrapped_state->version = m_version_counter; + m_wrapped_state->dirty_index = m_version_counter; + m_pending_dirty = 0xFF; +} + +void TextToMesh::operator()() +{ + if(!m_wrapped_state) + rebuild(); + outputs.scene_out.scene.state = m_wrapped_state; + outputs.scene_out.dirty = m_pending_dirty; + m_pending_dirty = 0; +} + +void TextToMesh::init( + score::gfx::RenderList& r, QRhiResourceUpdateBatch& res) +{ + if(!raw_transform_slot.valid()) + { + raw_transform_slot = r.registry().allocate( + score::gfx::GpuResourceRegistry::Arena::RawTransform, + sizeof(score::gfx::RawLocalTransform)); + m_xform_ref = r.registry().toOssiaRef(raw_transform_slot); + } + if(raw_transform_slot.valid()) + { + score::gfx::RawLocalTransform seed{}; + r.registry().updateSlot(res, raw_transform_slot, &seed, sizeof(seed)); + } +} + +void TextToMesh::update( + score::gfx::RenderList& r, QRhiResourceUpdateBatch& res, score::gfx::Edge*) +{ + if(!raw_transform_slot.valid()) + return; + + score::gfx::RawLocalTransform xform{}; + xform.translation[0] = inputs.position.value.x; + xform.translation[1] = inputs.position.value.y; + xform.translation[2] = inputs.position.value.z; + QQuaternion q = QQuaternion::fromEulerAngles( + inputs.rotation.value.x, inputs.rotation.value.y, + inputs.rotation.value.z); + xform.rotation[0] = q.x(); + xform.rotation[1] = q.y(); + xform.rotation[2] = q.z(); + xform.rotation[3] = q.scalar(); + xform.scale[0] = inputs.scale.value.x; + xform.scale[1] = inputs.scale.value.y; + xform.scale[2] = inputs.scale.value.z; + r.registry().updateSlot(res, raw_transform_slot, &xform, sizeof(xform)); +} + +void TextToMesh::release(score::gfx::RenderList& r) +{ + if(raw_transform_slot.valid()) + r.registry().free(raw_transform_slot); + m_xform_ref = {}; + // Producer-state-drift Option A — see Light::release. + m_wrapped_state.reset(); +} + +} // namespace Threedim diff --git a/src/plugins/score-plugin-threedim/Threedim/TextToMesh.hpp b/src/plugins/score-plugin-threedim/Threedim/TextToMesh.hpp new file mode 100644 index 0000000000..d3f874c6c7 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/TextToMesh.hpp @@ -0,0 +1,125 @@ +#pragma once +#include "TransformHelper.hpp" + +#include + +#include + +#include +#include +#include + +#include +#include + +class QRhiResourceUpdateBatch; + +namespace score::gfx +{ +class RenderList; +struct Edge; +} + +namespace Threedim +{ + +// Rasterize text into 3D geometry. Each glyph is converted to a +// QPainterPath, flattened into polygons, and tessellated via simple +// ear-clipping. Output is a scene_spec containing one scene_node with +// one mesh_component whose vertices describe the text in the XY plane +// (normal = +Z) around the origin. +// +// Limitations (v1): +// - Holes are NOT handled. Glyphs with interior holes ("O", "D", "o", +// "P" counter, etc.) render as solid shapes. Fix planned by adding +// earcut.hpp or hole-bridging to the tessellator. +// - Extrusion = 0 (flat). A later revision will extrude along -Z +// with properly-oriented side walls. +// - Tangents are synthesized as (1, 0, 0, 1) by ScenePreprocessor's +// fallback — no per-vertex tangent computed here. +// +// Designed for VJ / title-card use rather than typography; single-line +// inputs only. For paragraph text, use TextToTexture on a quad. +class TextToMesh +{ +public: + halp_meta(name, "Text to Mesh") + halp_meta(category, "Visuals/3D/Text") + halp_meta(c_name, "text_to_mesh") + halp_meta(authors, "ossia team") + halp_meta( + manual_url, + "https://ossia.io/score-docs/processes/text-to-mesh.html") + halp_meta(uuid, "c8f2a4d5-6e9b-4d3a-b7f1-5c4e2d8a9f6b") + + struct ins + { + // Port-driven rebuild: controls trigger TextToMesh::rebuild() via + // their update() callbacks; operator()() just republishes m_state. + struct : halp::lineedit<"Text", "Hello"> + { void update(TextToMesh& n) { n.rebuild(); } } text; + struct : halp::lineedit<"Font family", "Sans"> + { void update(TextToMesh& n) { n.rebuild(); } } font_family; + struct : halp::spinbox_i32<"Font size", halp::irange{4, 512, 72}> + { void update(TextToMesh& n) { n.rebuild(); } } font_size; + struct : halp::toggle<"Bold"> + { void update(TextToMesh& n) { n.rebuild(); } } bold; + struct : halp::toggle<"Italic"> + { void update(TextToMesh& n) { n.rebuild(); } } italic; + // World-space height of a capital 'H'. Glyph paths come out in + // pixel units from Qt; we scale them to this target so the mesh + // lives at a sensible world scale regardless of font_size. + struct : halp::hslider_f32<"Height", halp::range{0.01, 100., 1.}> + { void update(TextToMesh& n) { n.rebuild(); } } height; + // Centers the text around the origin on the X axis (vs. left-align + // at X=0). Useful for title cards. + struct : halp::toggle<"Center X"> + { void update(TextToMesh& n) { n.rebuild(); } } center_x; + + struct : PositionControl + { void update(TextToMesh& n) { n.rebuild(); } } position; + struct : RotationControl + { void update(TextToMesh& n) { n.rebuild(); } } rotation; + struct : ScaleControl + { void update(TextToMesh& n) { n.rebuild(); } } scale; + } inputs; + + struct outs + { + struct + { + halp_meta(name, "Scene Out"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_out; + } outputs; + + void rebuild(); + void operator()(); + + void init(score::gfx::RenderList& r, QRhiResourceUpdateBatch& res); + void update( + score::gfx::RenderList& r, QRhiResourceUpdateBatch& res, + score::gfx::Edge* e); + void release(score::gfx::RenderList& r); + + std::shared_ptr m_wrapped_state; + CachedTRS m_cachedTRS{}; + // Mesh-rebuild cache — expensive tessellation only re-runs when text + // or font parameters actually change. + std::string m_cached_text; + std::string m_cached_family; + int m_cached_size{-1}; + bool m_cached_bold{false}; + bool m_cached_italic{false}; + float m_cached_height{-1.f}; + bool m_cached_center{false}; + std::shared_ptr m_cached_mesh; + int64_t m_version_counter{0}; + uint8_t m_pending_dirty{0xFF}; + + score::gfx::GpuResourceRegistry::Slot raw_transform_slot; + ossia::gpu_slot_ref m_xform_ref{}; +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/TextToTexture.hpp b/src/plugins/score-plugin-threedim/Threedim/TextToTexture.hpp new file mode 100644 index 0000000000..4d396ef557 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/TextToTexture.hpp @@ -0,0 +1,146 @@ +#pragma once +#include +#include +#include + +#include +#include +#include +#include + +#include + +namespace Threedim +{ + +// Rasterize a text string into an RGBA texture via QPainter. Pipes into +// any node that consumes halp::gpu_texture — most commonly +// MaterialOverride (to show text on a mesh's base-color slot) or +// Instancer / a billboard renderer (for text sprites). +// +// Re-renders only when a control (text / font / size / color / canvas +// dimensions) changes — the update() hooks on each port fire recreate(). +class TextToTexture +{ +public: + halp_meta(name, "Text to Texture") + halp_meta(category, "Visuals/3D/Text") + halp_meta(c_name, "text_to_texture") + halp_meta(authors, "ossia team") + halp_meta( + manual_url, + "https://ossia.io/score-docs/processes/text-to-texture.html") + halp_meta(uuid, "5d3a9b2f-7e6c-4a8d-b1f4-9c2e3d5a7b8f") + + struct ins + { + struct : halp::lineedit<"Text", "Hello, world"> + { + void update(TextToTexture& self) { self.recreate(); } + } text; + struct : halp::lineedit<"Font family", "Sans"> + { + void update(TextToTexture& self) { self.recreate(); } + } font_family; + struct : halp::spinbox_i32<"Font size", halp::irange{4, 512, 64}> + { + void update(TextToTexture& self) { self.recreate(); } + } font_size; + struct : halp::toggle<"Bold"> + { + void update(TextToTexture& self) { self.recreate(); } + } bold; + struct : halp::toggle<"Italic"> + { + void update(TextToTexture& self) { self.recreate(); } + } italic; + + struct : halp::spinbox_i32<"Canvas width", halp::irange{16, 4096, 1024}> + { + void update(TextToTexture& self) { self.recreate(); } + } canvas_w; + struct : halp::spinbox_i32<"Canvas height", halp::irange{16, 4096, 256}> + { + void update(TextToTexture& self) { self.recreate(); } + } canvas_h; + + // Colors are vec4 (r, g, b, a) in [0, 1]. A transparent background + // is the useful default — drop on any mesh and you see only the + // glyphs. + struct : halp::hslider_f32<"Text R", halp::range{0., 1., 1.}> { void update(TextToTexture& s) { s.recreate(); } } fg_r; + struct : halp::hslider_f32<"Text G", halp::range{0., 1., 1.}> { void update(TextToTexture& s) { s.recreate(); } } fg_g; + struct : halp::hslider_f32<"Text B", halp::range{0., 1., 1.}> { void update(TextToTexture& s) { s.recreate(); } } fg_b; + struct : halp::hslider_f32<"Text A", halp::range{0., 1., 1.}> { void update(TextToTexture& s) { s.recreate(); } } fg_a; + struct : halp::hslider_f32<"BG R", halp::range{0., 1., 0.}> { void update(TextToTexture& s) { s.recreate(); } } bg_r; + struct : halp::hslider_f32<"BG G", halp::range{0., 1., 0.}> { void update(TextToTexture& s) { s.recreate(); } } bg_g; + struct : halp::hslider_f32<"BG B", halp::range{0., 1., 0.}> { void update(TextToTexture& s) { s.recreate(); } } bg_b; + struct : halp::hslider_f32<"BG A", halp::range{0., 1., 0.}> { void update(TextToTexture& s) { s.recreate(); } } bg_a; + + // Text alignment inside the canvas: 0=left, 1=center, 2=right for h; + // 0=top, 1=center, 2=bottom for v. + struct : halp::spinbox_i32<"H align", halp::irange{0, 2, 1}> + { void update(TextToTexture& s) { s.recreate(); } } h_align; + struct : halp::spinbox_i32<"V align", halp::irange{0, 2, 1}> + { void update(TextToTexture& s) { s.recreate(); } } v_align; + } inputs; + + struct + { + halp::texture_output<"Output", halp::rgba_texture> main; + } outputs; + + void recreate() + { + const int w = inputs.canvas_w.value; + const int h = inputs.canvas_h.value; + if(w <= 0 || h <= 0) + return; + + // Qt renders with premultiplied alpha; we output straight RGBA8. + // QImage::Format_RGBA8888 is non-premultiplied and matches what + // gpu_texture expects when upload-bound as RGBA8. + QImage img(w, h, QImage::Format_RGBA8888); + img.fill(QColor::fromRgbF( + inputs.bg_r.value, inputs.bg_g.value, inputs.bg_b.value, + inputs.bg_a.value)); + + QPainter p(&img); + p.setRenderHint(QPainter::Antialiasing, true); + p.setRenderHint(QPainter::TextAntialiasing, true); + + QFont f(QString::fromStdString(inputs.font_family.value)); + f.setPixelSize(inputs.font_size.value); + f.setBold(inputs.bold.value); + f.setItalic(inputs.italic.value); + p.setFont(f); + p.setPen(QColor::fromRgbF( + inputs.fg_r.value, inputs.fg_g.value, inputs.fg_b.value, + inputs.fg_a.value)); + + int flags = 0; + switch(inputs.h_align.value) + { + case 0: flags |= Qt::AlignLeft; break; + case 2: flags |= Qt::AlignRight; break; + default: flags |= Qt::AlignHCenter; + } + switch(inputs.v_align.value) + { + case 0: flags |= Qt::AlignTop; break; + case 2: flags |= Qt::AlignBottom; break; + default: flags |= Qt::AlignVCenter; + } + flags |= Qt::TextWordWrap; + + p.drawText( + QRect(0, 0, w, h), flags, + QString::fromStdString(inputs.text.value)); + p.end(); + + outputs.main.create(w, h); + std::memcpy(outputs.main.texture.bytes, img.constBits(), std::size_t(w) * h * 4); + outputs.main.upload(); + } +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/TextureInfo.hpp b/src/plugins/score-plugin-threedim/Threedim/TextureInfo.hpp new file mode 100644 index 0000000000..3a72043b03 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/TextureInfo.hpp @@ -0,0 +1,96 @@ +#pragma once +#include +#include +#include +#include + +#include +#include +#include + +namespace Threedim +{ +// Tiny inspector node: takes a halp::gpu_texture_input -- a zero-copy +// reference to the upstream's GPU texture -- and exposes its metadata +// (width, height, format, native handle) on regular value-output ports +// plus a single human-readable summary string. +// +// Wiring: when an Image-typed edge is connected to our Texture port, +// score's CpuAnalysisNode (the GfxRenderer specialization for nodes +// with no texture/buffer/geometry outputs) allocates a render target +// at init time via texture_inputs_storage::init(), points the upstream +// at it through renderTargetForInput(), and -- thanks to the +// gpu_texture_port branch in that storage -- writes the resulting +// QRhiTexture pointer plus its pixel size into our gpu_texture struct +// (handle / width / height). The format enum is mapped from the +// negotiated QRhiTexture::Format via gpp::qrhi::toTextureFormat. None of +// the per-frame readback machinery used for halp::texture_input fires +// for us, so this is essentially free. +class TextureInfo +{ +public: + halp_meta(name, "Texture Info") + halp_meta(category, "Visuals/Utilities") + halp_meta(c_name, "texture_info") + halp_meta(manual_url, "https://ossia.io/score-docs/processes/texture-info.html") + halp_meta(uuid, "5bd9c8e2-7f1a-4e3b-9c0d-2a4b6f8e1d72") + + struct + { + halp::gpu_texture_input<"Texture"> texture; + } inputs; + + struct + { + halp::val_port<"Width", int> width; + halp::val_port<"Height", int> height; + halp::val_port<"Format", std::string> format; + // Raw native handle as an opaque integer (a QRhiTexture* on every + // backend score supports today). Useful only for visual identity + // ("did the upstream rebuild this texture?"). + halp::val_port<"Handle", int64_t> handle; + halp::val_port<"Readable", std::string> readable; + } outputs; + + static std::string_view format_name(halp::gpu_texture::format_t f) noexcept + { + using F = halp::gpu_texture; + switch(f) + { + case F::RGBA8: + return "RGBA8"; + case F::RGBA16F: + return "RGBA16F"; + case F::RGBA32F: + return "RGBA32F"; + case F::R8: + return "R8"; + case F::R16: + return "R16"; + case F::R16F: + return "R16F"; + case F::R32F: + return "R32F"; + default: + return "unknown"; + } + } + + void operator()() + { + const auto& t = inputs.texture.texture; + const auto fmt_name = format_name(t.format); + + outputs.width.value = t.width; + outputs.height.value = t.height; + outputs.format.value = std::string{fmt_name}; + outputs.handle.value = reinterpret_cast(t.handle); + + auto& ret = outputs.readable.value; + ret.clear(); + fmt::format_to( + std::back_inserter(ret), "{}x{} {} (handle=0x{:x})", t.width, t.height, + fmt_name, reinterpret_cast(t.handle)); + } +}; +} diff --git a/src/plugins/score-plugin-threedim/Threedim/TinyObj.hpp b/src/plugins/score-plugin-threedim/Threedim/TinyObj.hpp index 69fc71eeed..86810fd861 100644 --- a/src/plugins/score-plugin-threedim/Threedim/TinyObj.hpp +++ b/src/plugins/score-plugin-threedim/Threedim/TinyObj.hpp @@ -59,16 +59,38 @@ static void toGL(auto& from, float (&to)[N]) inline void rebuild_transform(auto& inputs, auto& outputs) { QMatrix4x4 model{}; - auto& pos = inputs.position; - auto& rot = inputs.rotation; - auto& sc = inputs.scale; - model.translate(pos.value.x, pos.value.y, pos.value.z); - model.rotate(QQuaternion::fromEulerAngles(rot.value.x, rot.value.y, rot.value.z)); - model.scale(sc.value.x, sc.value.y, sc.value.z); + if constexpr(requires { inputs.position; }) + { + auto& pos = inputs.position; + model.translate(pos.value.x, pos.value.y, pos.value.z); + } + + if constexpr(requires { inputs.rotation; }) + { + auto& rot = inputs.rotation; + model.rotate(QQuaternion::fromEulerAngles(rot.value.x, rot.value.y, rot.value.z)); + } - toGL(model, outputs.geometry.transform); - outputs.geometry.dirty_transform = true; + if constexpr(requires { inputs.scale; }) + { + auto& sc = inputs.scale; + model.scale(sc.value.x, sc.value.y, sc.value.z); + } + + // Legacy path: writes into the halp::mesh-style `geometry` output. + // Scene-only loaders (GltfParser/FbxParser after the legacy outlet was + // removed) don't have `outputs.geometry`; we leave the Position/Rotation/ + // Scale controls as a no-op for now. They'll be re-wired to a scene-level + // root transform when we add that feature to scene_spec. + if constexpr(requires { + outputs.geometry.transform; + outputs.geometry.dirty_transform; + }) + { + toGL(model, outputs.geometry.transform); + outputs.geometry.dirty_transform = true; + } } struct PositionControl : halp::xyz_spinboxes_f32<"Position", halp::free_range_min<>> { diff --git a/src/plugins/score-plugin-threedim/Threedim/Transform3D.cpp b/src/plugins/score-plugin-threedim/Threedim/Transform3D.cpp new file mode 100644 index 0000000000..ea517a0b9a --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/Transform3D.cpp @@ -0,0 +1,113 @@ +#include "Transform3D.hpp" + +#include +#include + +#include + +namespace Threedim +{ + +void Transform3D::operator()() +{ + const auto& in = inputs.scene_in.scene; + const auto* in_state = in.state.get(); + + if(!in_state || in_state->empty()) + { + outputs.scene_out.scene = {}; + outputs.scene_out.dirty = 0; + m_state.reset(); + m_cached_in_state = nullptr; + m_cached_in_version = -1; + m_cachedTRS.valid = false; + return; + } + + // Cache check: republish the prior wrapped state when neither upstream + // (state pointer / version) nor TRS controls changed. Stops downstream + // identity-keyed caches from rebuilding every frame on a stable input — + // see diagnostic 027. + const int64_t in_version = in_state->version; + const bool upstream_changed + = (m_cached_in_state != in_state) || (m_cached_in_version != in_version); + const bool trs_changed = transformChanged(inputs, m_cachedTRS); + + if(m_state && !upstream_changed && !trs_changed) + { + outputs.scene_out.scene.state = m_state; + outputs.scene_out.dirty = 0; + return; + } + + // Rebuild via the canonical helper: it now propagates skeletons and + // collections too (diagnostic 026), updates m_cachedTRS in place, and + // bumps m_version_counter so downstream version-keyed caches see a + // monotonic bump exactly when something actually changed. + m_state = wrapSceneWithTransform( + in.state, inputs, m_cachedTRS, m_version_counter, m_xform_ref); + m_cached_in_state = in_state; + m_cached_in_version = in_version; + + outputs.scene_out.scene.state = m_state; + outputs.scene_out.dirty = 0xFF; +} + +// Order invariant: called by GfxRenderer::initState BEFORE the first +// operator()() and BEFORE processControlIn fires any rebuild() callback. +// m_xform_ref populated here is therefore safe to read in rebuild() +// without a guard. Adding prepare() to this node breaks the invariant — +// see CpuFilterNode.hpp for details. +void Transform3D::init( + score::gfx::RenderList& r, QRhiResourceUpdateBatch& res) +{ + if(!xform_slot.valid()) + { + xform_slot = r.registry().allocate( + score::gfx::GpuResourceRegistry::Arena::RawTransform, + sizeof(score::gfx::RawLocalTransform)); + m_xform_ref = r.registry().toOssiaRef(xform_slot); + } + if(xform_slot.valid()) + { + score::gfx::RawLocalTransform seed{}; + r.registry().updateSlot(res, xform_slot, &seed, sizeof(seed)); + } +} + +void Transform3D::update( + score::gfx::RenderList& r, QRhiResourceUpdateBatch& res, score::gfx::Edge*) +{ + if(!xform_slot.valid()) + return; + + score::gfx::RawLocalTransform raw{}; + raw.translation[0] = inputs.position.value.x; + raw.translation[1] = inputs.position.value.y; + raw.translation[2] = inputs.position.value.z; + auto q = QQuaternion::fromEulerAngles( + inputs.rotation.value.x, inputs.rotation.value.y, + inputs.rotation.value.z); + raw.rotation[0] = q.x(); + raw.rotation[1] = q.y(); + raw.rotation[2] = q.z(); + raw.rotation[3] = q.scalar(); + raw.scale[0] = inputs.scale.value.x; + raw.scale[1] = inputs.scale.value.y; + raw.scale[2] = inputs.scale.value.z; + r.registry().updateSlot(res, xform_slot, &raw, sizeof(raw)); +} + +void Transform3D::release(score::gfx::RenderList& r) +{ + if(xform_slot.valid()) + r.registry().free(xform_slot); + m_xform_ref = {}; + // Clear cached scene_state so the next operator()() rebuilds against + // the post-release registry. Producer-state-drift Option A — see + // matching comment in Light::release. + m_state.reset(); + m_cached_in_state = nullptr; +} + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/Transform3D.hpp b/src/plugins/score-plugin-threedim/Threedim/Transform3D.hpp new file mode 100644 index 0000000000..ac81943976 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/Transform3D.hpp @@ -0,0 +1,100 @@ +#pragma once +#include "TransformHelper.hpp" + +#include +#include + +#include + +#include + +#include +#include +#include + +class QRhiResourceUpdateBatch; + +namespace score::gfx +{ +class RenderList; +struct Edge; +} + +namespace Threedim +{ + +// Scene-in → scene-out transform: wraps the incoming scene's roots under a +// single parent node carrying a `scene_transform` payload (TRS). Materials, +// animations and cameras pass through by shared_ptr identity so downstream +// identity-based caches stay hot. +class Transform3D +{ +public: + halp_meta(name, "Transform 3D") + halp_meta(c_name, "transform3d_avnd") + halp_meta(category, "Visuals/3D/Scene") + halp_meta(authors, "ossia team") + halp_meta(uuid, "7a9f2b41-4d58-4e93-b7c2-0f5d3e8a6b1c") + + struct ins + { + struct + { + halp_meta(name, "Scene In"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_in; + + halp::xyz_spinboxes_f32< + "Position", halp::range{-10000., 10000., 0.}> + position; + halp::xyz_spinboxes_f32<"Rotation", halp::range{0., 359.9999999, 0.}> + rotation; + halp::xyz_spinboxes_f32< + "Scale", halp::range{0.00001, 1000., 1.}> + scale; + } inputs; + + struct outs + { + struct + { + halp_meta(name, "Scene Out"); + ossia::scene_spec scene; + uint8_t dirty{0}; + } scene_out; + } outputs; + + void operator()(); + + // Render-thread hooks. init claims one RawTransform slot for the + // emitted scene_transform; update packs the current control TRS + // into a RawLocalTransform and uploads; release returns the slot. + // The preprocessor composes the world-space matrix for this slot + // from the scene-node parent chain CPU-side. + void init(score::gfx::RenderList& r, QRhiResourceUpdateBatch& res); + void update( + score::gfx::RenderList& r, QRhiResourceUpdateBatch& res, + score::gfx::Edge* e); + void release(score::gfx::RenderList& r); + + score::gfx::GpuResourceRegistry::Slot xform_slot; + + // Ossia-facing snapshot of xform_slot. Written once in init(), + // copied onto the emitted scene_transform's raw_slot every + // operator()() tick. + ossia::gpu_slot_ref m_xform_ref{}; + + // Cache: republish the same emitted scene_state when neither upstream + // (input scene_state pointer / version) nor controls (TRS) changed. + // Prevents downstream SceneSelector / SceneGraphFilter / SceneDuplicator / + // CreateCollection from rebuilding every frame, which they did when we + // emitted a fresh shared_ptr each tick — diagnostic 027. + std::shared_ptr m_state; + const ossia::scene_state* m_cached_in_state{}; + int64_t m_cached_in_version{-1}; + CachedTRS m_cachedTRS{}; + int64_t m_version_counter{0}; +}; + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/TransformHelper.hpp b/src/plugins/score-plugin-threedim/Threedim/TransformHelper.hpp new file mode 100644 index 0000000000..11babfa4a6 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/TransformHelper.hpp @@ -0,0 +1,176 @@ +#pragma once +#include + +#include +#include + +#include +#include +#include + +namespace Threedim +{ + +// Shared TRS-matrix computation for halp nodes that output a +// `halp::mesh`-style geometry with a `transform[16]` slot plus a +// `dirty_transform` flag (BuffersToGeometry, BuffersToGeometry2, +// VoxelLoader, ...). Call from operator() every frame: computes a +// column-major 4x4 TRS matrix from the XYZ controls, writes it into +// `out_transform16`, and returns true iff the matrix changed since +// the last call (so the caller can set `dirty_transform` accordingly). +// +// Cached prev values live on the caller via the CachedTRS struct — +// identical layout across the three call sites so each node just +// declares one member `CachedTRS m_cachedTRS{}` and passes it in. +struct CachedTRS +{ + float pos[3]{0, 0, 0}; + float rot[3]{0, 0, 0}; + float scale[3]{1, 1, 1}; + bool valid{false}; +}; + +// `Inputs` is duck-typed: must expose `.position.value.{x,y,z}`, etc. +template +inline bool +computeTRSMatrix(const Inputs& inputs, float out_transform16[16], CachedTRS& cache) +{ + const float px = inputs.position.value.x; + const float py = inputs.position.value.y; + const float pz = inputs.position.value.z; + const float rx = inputs.rotation.value.x; + const float ry = inputs.rotation.value.y; + const float rz = inputs.rotation.value.z; + const float sx = inputs.scale.value.x; + const float sy = inputs.scale.value.y; + const float sz = inputs.scale.value.z; + + const bool changed + = !cache.valid + || cache.pos[0] != px || cache.pos[1] != py || cache.pos[2] != pz + || cache.rot[0] != rx || cache.rot[1] != ry || cache.rot[2] != rz + || cache.scale[0] != sx || cache.scale[1] != sy || cache.scale[2] != sz; + + if(!changed) + return false; + + // Build column-major 4x4: translate * rotate * scale, matching the + // convention used across the 3D plugin (QMatrix4x4's constData() + // returns column-major). + QMatrix4x4 m; + m.translate(px, py, pz); + m.rotate(QQuaternion::fromEulerAngles(rx, ry, rz)); + m.scale(sx, sy, sz); + std::memcpy(out_transform16, m.constData(), sizeof(float) * 16); + + cache.pos[0] = px; cache.pos[1] = py; cache.pos[2] = pz; + cache.rot[0] = rx; cache.rot[1] = ry; cache.rot[2] = rz; + cache.scale[0] = sx; cache.scale[1] = sy; cache.scale[2] = sz; + cache.valid = true; + return true; +} + +// Wrap a raw scene_state under a single parent scene_node whose first child +// is a scene_transform carrying this node's position / rotation / scale +// controls. FlattenVisitor processes payloads in order and transforms apply +// to subsequent siblings, so the wrap applies the TRS to every descendant. +// +// Used by asset-loader-style nodes (FbxParser, GltfParser, AssetLoader) to +// compose the control-knob transform on top of the as-loaded scene without +// touching the raw state (kept stable so downstream identity caches stay +// warm). Shared to avoid re-duplicating the same 40 lines per loader. +// +// `Inputs` is duck-typed: must expose `.position.value.{x,y,z}`, +// `.rotation.value.{x,y,z}`, `.scale.value.{x,y,z}`. +template +inline std::shared_ptr wrapSceneWithTransform( + const std::shared_ptr& raw, + const Inputs& inputs, CachedTRS& cache, int64_t& version_counter, + const ossia::gpu_slot_ref& xform_ref = {}) +{ + if(!raw) + return nullptr; + + // Skip rebuild when nothing changed: cache check also updates the cache + // on a real change. We rebuild when there IS no wrapped output yet (first + // call) OR when inputs differ from the cache; compute cache-hit separately. + ossia::scene_transform xform; + xform.translation[0] = inputs.position.value.x; + xform.translation[1] = inputs.position.value.y; + xform.translation[2] = inputs.position.value.z; + auto q = QQuaternion::fromEulerAngles( + inputs.rotation.value.x, inputs.rotation.value.y, + inputs.rotation.value.z); + xform.rotation[0] = q.x(); + xform.rotation[1] = q.y(); + xform.rotation[2] = q.z(); + xform.rotation[3] = q.scalar(); + xform.scale[0] = inputs.scale.value.x; + xform.scale[1] = inputs.scale.value.y; + xform.scale[2] = inputs.scale.value.z; + // Stamp the producer's RawTransform slot ref (if any) so the + // preprocessor composes a world matrix at the matching offset. + xform.raw_slot = xform_ref; + + auto children = std::make_shared>(); + children->push_back(xform); + if(raw->roots) + for(const auto& root : *raw->roots) + children->push_back(root); + + auto parent = std::make_shared(); + parent->children = std::move(children); + + auto new_roots = std::make_shared>(); + new_roots->push_back(std::move(parent)); + + auto wrapped = std::make_shared(); + wrapped->roots = std::move(new_roots); + // Identity-preserving passthrough of every scene_state shared field so + // downstream caches stay warm. `collections` was missed in the initial + // landing (CreateCollection writes them onto scene_state::collections, + // and dropping them here silently loses the named-collection list on + // every TRS pass) — diagnostic 026. + wrapped->materials = raw->materials; + wrapped->animations = raw->animations; + wrapped->cameras = raw->cameras; + wrapped->skeletons = raw->skeletons; + wrapped->collections = raw->collections; + wrapped->environment = raw->environment; + wrapped->active_camera_id = raw->active_camera_id; + wrapped->version = ++version_counter; + wrapped->dirty_index = 1; + + cache.pos[0] = inputs.position.value.x; + cache.pos[1] = inputs.position.value.y; + cache.pos[2] = inputs.position.value.z; + cache.rot[0] = inputs.rotation.value.x; + cache.rot[1] = inputs.rotation.value.y; + cache.rot[2] = inputs.rotation.value.z; + cache.scale[0] = inputs.scale.value.x; + cache.scale[1] = inputs.scale.value.y; + cache.scale[2] = inputs.scale.value.z; + cache.valid = true; + + return wrapped; +} + +// Test whether the controls differ from a prior cached snapshot, without +// applying them. Use this to gate a wrapSceneWithTransform() rebuild when +// you want to only allocate a new wrapped state when the user moved a knob. +template +inline bool transformChanged(const Inputs& inputs, const CachedTRS& cache) +{ + return !cache.valid + || cache.pos[0] != inputs.position.value.x + || cache.pos[1] != inputs.position.value.y + || cache.pos[2] != inputs.position.value.z + || cache.rot[0] != inputs.rotation.value.x + || cache.rot[1] != inputs.rotation.value.y + || cache.rot[2] != inputs.rotation.value.z + || cache.scale[0] != inputs.scale.value.x + || cache.scale[1] != inputs.scale.value.y + || cache.scale[2] != inputs.scale.value.z; +} + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/VcgImporters.cpp b/src/plugins/score-plugin-threedim/Threedim/VcgImporters.cpp new file mode 100644 index 0000000000..744245e962 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/VcgImporters.cpp @@ -0,0 +1,215 @@ +#include "VcgImporters.hpp" + +// vcglib pulls Qt / GL through its utility headers; we only need the +// header-only trimesh + io_trimesh subset. Isolate these includes here so +// the rest of the plugin isn't exposed to vcglib's macro soup. +#include +#include +#include + +#include + +namespace Threedim +{ + +namespace +{ + +// Minimal vcglib mesh type for STL / OFF import: per-vertex position + +// normal + colour + bit flags, per-face vertex refs + normal + colour. +// STL contributes position + per-face normal; OFF can contribute per-vertex +// and per-face colours. We always request normals + colours; vcglib zero- +// inits any it doesn't fill. +class ImpVertex; +class ImpFace; +struct ImpTypes : public vcg::UsedTypes< + vcg::Use::AsVertexType, + vcg::Use::AsFaceType> +{}; +class ImpVertex : public vcg::Vertex< + ImpTypes, vcg::vertex::Coord3f, vcg::vertex::Normal3f, + vcg::vertex::Color4b, vcg::vertex::BitFlags> +{}; +class ImpFace : public vcg::Face< + ImpTypes, vcg::face::VertexRef, vcg::face::Normal3f, + vcg::face::Color4b, vcg::face::BitFlags> +{}; +class ImpMesh : public vcg::tri::TriMesh< + std::vector, std::vector> +{}; + +// Expand the loaded vcglib mesh into the flat, non-interleaved float_vec +// layout Threedim::mesh expects: all positions, then all normals, then +// all colours. De-indexed (one output vertex per triangle corner) because +// STL doesn't carry per-vertex normals shared across triangles, and OFF +// often has smooth normals but STL's "one normal per face" forces the +// per-corner expansion anyway. +static std::vector +convertVcgToMeshes(const ImpMesh& vm, Threedim::float_vec& out, int loadmask) +{ + std::vector result; + if(vm.face.empty() && vm.vert.empty()) + return result; + + // Count output vertices — one per triangle corner (de-indexed). + const bool has_faces = !vm.face.empty(); + const bool has_normal = (loadmask & vcg::tri::io::Mask::IOM_VERTNORMAL) + || (loadmask & vcg::tri::io::Mask::IOM_FACENORMAL); + const bool has_color = (loadmask & vcg::tri::io::Mask::IOM_VERTCOLOR) + || (loadmask & vcg::tri::io::Mask::IOM_FACECOLOR); + + Threedim::mesh m{}; + m.texcoord = false; + m.normals = has_normal; + m.colors = has_color; + m.tangents = false; + m.points = !has_faces; + m.extras.clear(); + + if(has_faces) + { + const size_t corners = vm.face.size() * 3; + m.vertices = (int64_t)corners; + + // Allocate contiguous attribute blocks. Layout matches Threedim::mesh's + // convention: offsets stored in elements (floats), not bytes. + const int64_t pos_count = 3 * corners; + const int64_t nor_count = has_normal ? 3 * corners : 0; + const int64_t col_count = has_color ? 4 * corners : 0; + const int64_t total_floats = pos_count + nor_count + col_count; + + const int64_t pos_offset = (int64_t)out.size(); + const int64_t nor_offset = pos_offset + pos_count; + const int64_t col_offset = nor_offset + nor_count; + + out.resize(pos_offset + total_floats); + + m.pos_offset = pos_offset; + m.normal_offset = has_normal ? nor_offset : 0; + m.color_offset = has_color ? col_offset : 0; + + // Fill buffer by walking faces. + for(size_t fi = 0; fi < vm.face.size(); ++fi) + { + const auto& f = vm.face[fi]; + + // Use face normal as per-corner normal if per-vertex is unavailable + // (STL case). vcglib's ImporterSTL computes per-face normals. + const bool have_face_normal + = loadmask & vcg::tri::io::Mask::IOM_FACENORMAL; + + for(int c = 0; c < 3; ++c) + { + const auto* v = f.cV(c); + const int64_t base_p = pos_offset + (fi * 3 + c) * 3; + out[base_p + 0] = (float)v->cP()[0]; + out[base_p + 1] = (float)v->cP()[1]; + out[base_p + 2] = (float)v->cP()[2]; + + if(has_normal) + { + const int64_t base_n = nor_offset + (fi * 3 + c) * 3; + const auto& n = have_face_normal ? f.cN() : v->cN(); + out[base_n + 0] = (float)n[0]; + out[base_n + 1] = (float)n[1]; + out[base_n + 2] = (float)n[2]; + } + + if(has_color) + { + const int64_t base_c = col_offset + (fi * 3 + c) * 4; + const bool have_face_color + = loadmask & vcg::tri::io::Mask::IOM_FACECOLOR; + const auto& cc = have_face_color ? f.cC() : v->cC(); + out[base_c + 0] = cc[0] / 255.0f; + out[base_c + 1] = cc[1] / 255.0f; + out[base_c + 2] = cc[2] / 255.0f; + out[base_c + 3] = cc[3] / 255.0f; + } + } + } + } + else + { + // Point cloud (no faces). Emit one vertex per input vertex. + const size_t nv = vm.vert.size(); + m.vertices = (int64_t)nv; + const int64_t pos_count = 3 * nv; + const int64_t nor_count = has_normal ? 3 * nv : 0; + const int64_t col_count = has_color ? 4 * nv : 0; + const int64_t total_floats = pos_count + nor_count + col_count; + + const int64_t pos_offset = (int64_t)out.size(); + const int64_t nor_offset = pos_offset + pos_count; + const int64_t col_offset = nor_offset + nor_count; + out.resize(pos_offset + total_floats); + m.pos_offset = pos_offset; + m.normal_offset = has_normal ? nor_offset : 0; + m.color_offset = has_color ? col_offset : 0; + + for(size_t i = 0; i < nv; ++i) + { + const auto& v = vm.vert[i]; + out[pos_offset + i * 3 + 0] = (float)v.cP()[0]; + out[pos_offset + i * 3 + 1] = (float)v.cP()[1]; + out[pos_offset + i * 3 + 2] = (float)v.cP()[2]; + + if(has_normal) + { + out[nor_offset + i * 3 + 0] = (float)v.cN()[0]; + out[nor_offset + i * 3 + 1] = (float)v.cN()[1]; + out[nor_offset + i * 3 + 2] = (float)v.cN()[2]; + } + + if(has_color) + { + out[col_offset + i * 4 + 0] = v.cC()[0] / 255.0f; + out[col_offset + i * 4 + 1] = v.cC()[1] / 255.0f; + out[col_offset + i * 4 + 2] = v.cC()[2] / 255.0f; + out[col_offset + i * 4 + 3] = v.cC()[3] / 255.0f; + } + } + } + + result.push_back(std::move(m)); + return result; +} + +template +std::vector +importVcgGeneric(std::string_view filename, Threedim::float_vec& out) +{ + ImpMesh vm; + int loadmask = 0; + const std::string path{filename}; + const int err = OpenFn(vm, path.c_str(), loadmask, nullptr); + if(err != 0) + return {}; + return convertVcgToMeshes(vm, out, loadmask); +} + +// Wrappers to pin the importer function pointer signature. +static int openStl(ImpMesh& m, const char* p, int& mask, vcg::CallBackPos* cb) +{ + return vcg::tri::io::ImporterSTL::Open(m, p, mask, cb); +} +static int openOff(ImpMesh& m, const char* p, int& mask, vcg::CallBackPos* cb) +{ + return vcg::tri::io::ImporterOFF::Open(m, p, mask, cb); +} + +} // namespace + +std::vector +StlFromFile(std::string_view filename, Threedim::float_vec& buffer) +{ + return importVcgGeneric<&openStl>(filename, buffer); +} + +std::vector +OffFromFile(std::string_view filename, Threedim::float_vec& buffer) +{ + return importVcgGeneric<&openOff>(filename, buffer); +} + +} // namespace Threedim diff --git a/src/plugins/score-plugin-threedim/Threedim/VcgImporters.hpp b/src/plugins/score-plugin-threedim/Threedim/VcgImporters.hpp new file mode 100644 index 0000000000..0306dfa972 --- /dev/null +++ b/src/plugins/score-plugin-threedim/Threedim/VcgImporters.hpp @@ -0,0 +1,27 @@ +#pragma once +#include + +#include + +namespace Threedim +{ + +// vcglib bridges. Load a 3D file via vcg::tri::io::Importer::Open +// and convert the loaded mesh into the same flat float_vec + mesh record +// format that TinyObjFromFile / PlyFromFile produce, so downstream +// `sceneStateFromMeshes` (or GeometryLoader's `rebuild_geometry`) +// consumes them uniformly. +// +// Adds STL and OFF support (the two remaining generally-useful formats +// vcglib offers that we weren't already covering via tinyobj / miniply). +// COLLADA (DAE) is a candidate for a follow-up — it carries scene +// hierarchy + materials + skinning, and deserves a richer conversion +// path than "dump meshes into one flat buffer". + +std::vector StlFromFile( + std::string_view filename, Threedim::float_vec& buffer); + +std::vector OffFromFile( + std::string_view filename, Threedim::float_vec& buffer); + +} diff --git a/src/plugins/score-plugin-threedim/Threedim/VoxelLoader.cpp b/src/plugins/score-plugin-threedim/Threedim/VoxelLoader.cpp index ab5d6a58bf..7c4297b591 100644 --- a/src/plugins/score-plugin-threedim/Threedim/VoxelLoader.cpp +++ b/src/plugins/score-plugin-threedim/Threedim/VoxelLoader.cpp @@ -162,4 +162,14 @@ std::function VoxelLoader::ins::vox_t::process(file_type tv) }; } +void VoxelLoader::operator()() +{ + // Compute TRS matrix from position/rotation/scale controls and write + // into halp::mesh::transform[16]. dirty_transform is set iff the + // matrix actually changed vs last frame, so downstream doesn't + // rebuild its transform binding every frame when the knobs are idle. + outputs.geometry.dirty_transform + = computeTRSMatrix(inputs, outputs.geometry.transform, m_cachedTRS); +} + } diff --git a/src/plugins/score-plugin-threedim/Threedim/VoxelLoader.hpp b/src/plugins/score-plugin-threedim/Threedim/VoxelLoader.hpp index 250c087d2f..8e76a7e89f 100644 --- a/src/plugins/score-plugin-threedim/Threedim/VoxelLoader.hpp +++ b/src/plugins/score-plugin-threedim/Threedim/VoxelLoader.hpp @@ -1,4 +1,6 @@ #pragma once +#include "TransformHelper.hpp" + #include #include #include @@ -60,6 +62,7 @@ class VoxelLoader void reload(); void rebuild_geometry(); + void operator()(); std::vector meshinfo{}; float_vec complete; @@ -67,6 +70,9 @@ class VoxelLoader // Cache the file data so mode changes can re-process std::string cached_filename; + + // Per-frame TRS matrix cache (see TransformHelper.hpp). + CachedTRS m_cachedTRS{}; }; } diff --git a/src/plugins/score-plugin-threedim/score_plugin_threedim.cpp b/src/plugins/score-plugin-threedim/score_plugin_threedim.cpp index 44eb0b0cb7..a16237563e 100644 --- a/src/plugins/score-plugin-threedim/score_plugin_threedim.cpp +++ b/src/plugins/score-plugin-threedim/score_plugin_threedim.cpp @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -21,17 +22,57 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include #include #include #include -#include -#include #include +#include +#include #include #include #include @@ -184,27 +225,37 @@ class SSynthDropHandler final : public Process::ProcessDropHandler } }; -class OBJLibraryHandler final +class AssetLibraryHandler final : public QObject , public Library::LibraryInterface { SCORE_CONCRETE("da4af155-3cb6-41df-8c10-5a002b9d97ca") - QSet acceptedFiles() const noexcept override { return {"obj", "ply"}; } + QSet acceptedFiles() const noexcept override + { + // Extension list must stay aligned with AssetDropHandler::fileExtensions + // below — the Library panel surfaces files by acceptedFiles, the canvas + // drag-drop accepts them via fileExtensions, and AssetLoader::process + // routes the underlying parser by extension. .splat and .spz arrive + // through the splat_binary / spz parsers (see PrimitiveCloud/SplatBinary + // and SpzCodec); they were missing from the Library list even though + // the runtime fully handles them. + return {"fbx", "gltf", "glb", "obj", "ply", "stl", "off", + "usd", "usda", "usdc", "usdz", "splat", "spz"}; + } Library::Subcategories categories; - using proc = oscr::ProcessModel; + using proc = oscr::ProcessModel; void setup(Library::ProcessesItemModel& model, const score::GUIApplicationContext& ctx) override { - // TODO relaunch whenever library path changes... const auto& key = Metadata::get(); QModelIndex node = model.find(key); - if (node == QModelIndex{}) + if(node == QModelIndex{}) return; - categories.init("Object Loader", node, ctx); + categories.init("Asset Loader", node, ctx); } std::function asyncAddPath(std::string_view path) override @@ -224,13 +275,18 @@ class OBJLibraryHandler final } }; -class OBJDropHandler final : public Process::ProcessDropHandler +class AssetDropHandler final : public Process::ProcessDropHandler { SCORE_CONCRETE("1d6cac56-2059-4fb8-9cef-19301a1fba3d") - QSet fileExtensions() const noexcept override { return {"obj", "ply"}; } + QSet fileExtensions() const noexcept override + { + return {"fbx", "gltf", "glb", "obj", "ply", "stl", "off", + "splat", "spz", + "usd", "usda", "usdc", "usdz"}; + } - using proc = oscr::ProcessModel; + using proc = oscr::ProcessModel; void dropData( std::vector& vec, const DroppedFile& data, @@ -325,7 +381,9 @@ class VoxDropHandler final : public Process::ProcessDropHandler /** * This file instantiates the classes that are provided by this plug-in. */ -score_plugin_threedim::score_plugin_threedim() = default; +score_plugin_threedim::score_plugin_threedim() +{ +} score_plugin_threedim::~score_plugin_threedim() = default; std::vector score_plugin_threedim::factories( @@ -338,7 +396,38 @@ std::vector score_plugin_threedim::factories( oscr::instantiate_fx(fx, ctx, key); oscr::instantiate_fx(fx, ctx, key); oscr::instantiate_fx(fx, ctx, key); - oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); oscr::instantiate_fx(fx, ctx, key); oscr::instantiate_fx(fx, ctx, key); oscr::instantiate_fx(fx, ctx, key); @@ -350,6 +439,9 @@ std::vector score_plugin_threedim::factories( oscr::instantiate_fx(fx, ctx, key); oscr::instantiate_fx(fx, ctx, key); oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); + oscr::instantiate_fx(fx, ctx, key); oscr::instantiate_fx(fx, ctx, key); oscr::instantiate_fx(fx, ctx, key); oscr::instantiate_fx(fx, ctx, key); @@ -357,20 +449,34 @@ std::vector score_plugin_threedim::factories( oscr::instantiate_fx(fx, ctx, key); oscr::instantiate_fx(fx, ctx, key); oscr::instantiate_fx(fx, ctx, key); + // Splat (legacy GaussianSplatNode) factories removed: AssetLoader now + // routes .splat / .spz / 3DGS .ply files through primitive_cloud_component + // and the new ScenePreprocessor / 3dgs.tile rendering pipeline. The legacy + // Splat process kept its own GaussianSplatNode renderer; superseded. + // Existing projects that referenced the legacy Splat UUID + // ("cdc15a16-e856-4e02-9339-7d9e48da10ce") get a UUID-rewrite alias to + // AssetLoader at load time (see C-22d). auto add = instantiate_factories< score::ApplicationContext, FW, + Gfx::RenderPipeline::ProcessFactory, + Gfx::ScenePreprocessor::ProcessFactory, + Gfx::SceneFilter::ProcessFactory, + Gfx::FlattenedSceneFilter::ProcessFactory, + Gfx::MergeGeometries::ProcessFactory>, FW, FW, FW, + Threedim::AssetDropHandler, Threedim::VoxDropHandler>, FW>(ctx, key); + Gfx::ScenePreprocessor::ProcessExecutorComponentFactory, + Gfx::SceneFilter::ProcessExecutorComponentFactory, + Gfx::FlattenedSceneFilter::ProcessExecutorComponentFactory, + Gfx::MergeGeometries::ProcessExecutorComponentFactory>>(ctx, key); fx.insert( fx.end(), std::make_move_iterator(add.begin()),