diff --git a/.github/workflows/build-nabla.yml b/.github/workflows/build-nabla.yml index f92ffa7545..44c9808ff0 100644 --- a/.github/workflows/build-nabla.yml +++ b/.github/workflows/build-nabla.yml @@ -31,7 +31,7 @@ jobs: matrix: # vendor: [msvc, clangcl] # TODO: Yas please fix ClangCL, we have a few new compile errors - # if we build MSVC then build "run-compiler-explorer" target, for ClangCL build just "nsc" + # build full Nabla preset, run-compiler-explorer is pulled in via ALL when Docker integration is enabled vendor: [msvc] config: [Release, Debug, RelWithDebInfo] tag: ['17.13.6'] @@ -50,9 +50,35 @@ jobs: Set-MpPreference -DisableArchiveScanning $true Set-MpPreference -DisableScanningMappedNetworkDrivesForFullScan $true - if (-not (docker network ls --format '{{.Name}}' | Where-Object { $_ -eq 'docker_default' })) { - docker network create --driver nat docker_default - if ($LASTEXITCODE -ne 0) { exit 1 } + $maxAttempts = 12 + $delaySeconds = 5 + $dockerReady = $false + + for ($attempt = 1; $attempt -le $maxAttempts; $attempt++) { + $networkNames = docker network ls --format '{{.Name}}' + if ($LASTEXITCODE -eq 0) { + if (-not ($networkNames | Where-Object { $_ -eq 'docker_default' })) { + docker network create --driver nat docker_default + if ($LASTEXITCODE -eq 0) { + $dockerReady = $true + break + } + } + else { + $dockerReady = $true + break + } + } + + if ($attempt -lt $maxAttempts) { + Write-Host "Docker not ready yet (attempt $attempt/$maxAttempts), retry in ${delaySeconds}s..." + Start-Sleep -Seconds $delaySeconds + } + } + + if (-not $dockerReady) { + Write-Error "Docker was not ready after $($maxAttempts*$delaySeconds)s total wait" + exit 1 } - name: Set prefix @@ -122,22 +148,17 @@ jobs: docker exec orphan ` ${{ env.entry }} ${{ env.cmd }} -Command cmake ` --preset ci-configure-dynamic-${{ matrix.vendor }} ` + -DCMAKE_INSTALL_PREFIX:PATH=C:/mount/nabla/build-ct/install ` --profiling-output=profiling/cmake-profiling.json ` --profiling-format=google-trace - - name: Container – Build NSC + - name: Container – Build & Install Nabla run: | docker exec orphan ` ${{ env.entry }} ${{ env.cmd }} -Command cmake --build ` --preset ci-build-dynamic-${{ matrix.vendor }} ` - -t run-compiler-explorer --config ${{ matrix.config }} - - - name: Container – Install Nabla - run: | - docker exec orphan ` - ${{ env.entry }} ${{ env.cmd }} -Command cmake --install ` - ${{ env.binary }} --config ${{ matrix.config }} ` - --prefix ${{ env.install }} + --target install ` + --config ${{ matrix.config }} - name: API / Examples / Check Run (Create) id: check-run-create @@ -390,11 +411,11 @@ jobs: if (-not (Test-Path "smoke/build-ct/install")) { throw "smoke/build-ct/install not found" } tree.com smoke /F - - name: Configure Smoke - run: cmake -S smoke -B smoke/out + - name: Smoke Flow MINIMALISTIC + run: cmake -D FLOW=MINIMALISTIC -D CONFIG=${{ matrix.config }} -P smoke/RunSmokeFlow.cmake - - name: Build Smoke - run: cmake --build smoke/out --config ${{ matrix.config }} + - name: Smoke Flow CONFIGURE_ONLY + run: cmake -D FLOW=CONFIGURE_ONLY -D CONFIG=${{ matrix.config }} -P smoke/RunSmokeFlow.cmake - - name: CTest Smoke - run: ctest --verbose --test-dir smoke/out --force-new-ctest-process --output-on-failure --no-tests=error -C ${{ matrix.config }} \ No newline at end of file + - name: Smoke Flow BUILD_ONLY + run: cmake -D FLOW=BUILD_ONLY -D CONFIG=${{ matrix.config }} -P smoke/RunSmokeFlow.cmake diff --git a/.github/workflows/run-nsc.yml b/.github/workflows/run-nsc.yml index d5f9f74c2b..ce050581dc 100644 --- a/.github/workflows/run-nsc.yml +++ b/.github/workflows/run-nsc.yml @@ -51,9 +51,35 @@ jobs: Set-MpPreference -DisableArchiveScanning $true Set-MpPreference -DisableScanningMappedNetworkDrivesForFullScan $true - if (-not (docker network ls --format '{{.Name}}' | Where-Object { $_ -eq 'docker_default' })) { - docker network create --driver nat docker_default - if ($LASTEXITCODE -ne 0) { exit 1 } + $maxAttempts = 12 + $delaySeconds = 5 + $dockerReady = $false + + for ($attempt = 1; $attempt -le $maxAttempts; $attempt++) { + $networkNames = docker network ls --format '{{.Name}}' + if ($LASTEXITCODE -eq 0) { + if (-not ($networkNames | Where-Object { $_ -eq 'docker_default' })) { + docker network create --driver nat docker_default + if ($LASTEXITCODE -eq 0) { + $dockerReady = $true + break + } + } + else { + $dockerReady = $true + break + } + } + + if ($attempt -lt $maxAttempts) { + Write-Host "Docker not ready yet (attempt $attempt/$maxAttempts), retry in ${delaySeconds}s..." + Start-Sleep -Seconds $delaySeconds + } + } + + if (-not $dockerReady) { + Write-Error "Docker was not ready after $($maxAttempts*$delaySeconds)s total wait" + exit 1 } $sendDiscord = "${{ inputs.withDiscordMSG }}" -eq "true" diff --git a/3rdparty/CMakeLists.txt b/3rdparty/CMakeLists.txt index 705158b15a..68e821dfdf 100755 --- a/3rdparty/CMakeLists.txt +++ b/3rdparty/CMakeLists.txt @@ -530,9 +530,6 @@ nbl_install_dir(imath/src/Imath) nbl_install_file(blake/c/blake3.h) -nbl_install_file_spec(nlohmann_json/include/nlohmann/json_fwd.hpp nlohmann) -nbl_install_file_spec(nlohmann_json/include/nlohmann/detail/abi_macros.hpp nlohmann/detail) - nbl_install_dir(boost/superproject/libs/preprocessor/include/boost) nbl_install_file_spec(renderdoc/renderdoc_app.h renderdoc) diff --git a/3rdparty/Vulkan-Headers b/3rdparty/Vulkan-Headers index 33d7f51258..3dda5a1a87 160000 --- a/3rdparty/Vulkan-Headers +++ b/3rdparty/Vulkan-Headers @@ -1 +1 @@ -Subproject commit 33d7f512583b8de44d1b6384aa1cf482f92e53e9 +Subproject commit 3dda5a1a87b62fdf3baf4680edc41c00e85a7a22 diff --git a/3rdparty/Vulkan-Tools b/3rdparty/Vulkan-Tools index 761e7bf273..4b6f7101c1 160000 --- a/3rdparty/Vulkan-Tools +++ b/3rdparty/Vulkan-Tools @@ -1 +1 @@ -Subproject commit 761e7bf2736f3ad326fdfc1b3c1543f4e669fd5c +Subproject commit 4b6f7101c15e09a8931f2f81c97146d0dfe68bc5 diff --git a/CMakePresets.json b/CMakePresets.json index 3117e607ac..88f093ac8f 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -295,7 +295,32 @@ "CMAKE_BUILD_TYPE": "Debug", "CMAKE_EXPORT_COMPILE_COMMANDS": "ON" } - } + }, + { + "name": "n4ce-configure-windows-msvc", + "hidden": false, + "binaryDir": "build/production/n4ce", + "cacheVariables": { + "NBL_STATIC_BUILD": "OFF", + "NBL_SKIP_BUILD_OPTIONS_VALIDATION": "ON", + "CMAKE_SUPPRESS_REGENERATION": "OFF", + "NBL_COMPILER_DYNAMIC_RUNTIME": "ON", + "NBL_EMBED_BUILTIN_RESOURCES": "ON", + "NBL_UPDATE_GIT_SUBMODULE": "OFF", + "NBL_COMPILE_WITH_CUDA": "OFF", + "NBL_BUILD_OPTIX": "OFF", + "NBL_BUILD_MITSUBA_LOADER": "OFF", + "NBL_BUILD_RADEON_RAYS": "OFF", + "_NBL_COMPILE_WITH_OPEN_EXR_": "ON", + "NBL_EXPLICIT_MODULE_LOAD_LOG": "ON", + "NBL_CPACK_NO_BUILD_DIRECTORY_MODULES": "ON", + "GIT_FAIL_IF_NONZERO_EXIT": "OFF" + }, + "displayName": "[N4CE]: Dynamic library target, Visual Studio 17 2022 generator, MSVC v143 toolset", + "description": "Configure as dynamic library with Visual Studio 17 2022 generator and MSVC v143 toolset", + "generator": "Visual Studio 17 2022", + "toolset": "v143" + } ], "buildPresets": [ { diff --git a/cmake/NablaConfig.cmake.in b/cmake/NablaConfig.cmake.in index 44b2a1abcb..c91f6dfaec 100644 --- a/cmake/NablaConfig.cmake.in +++ b/cmake/NablaConfig.cmake.in @@ -15,6 +15,700 @@ endif() include("${CMAKE_CURRENT_LIST_DIR}/NablaExportTargets.cmake") check_required_components(Nabla) +# +# nabla_sync_runtime_modules( +# [TARGETS ] +# [DESTINATION ] +# [DESTINATION_DEBUG ] +# [DESTINATION_RELEASE ] +# [DESTINATION_RELWITHDEBINFO ] +# [MODE ] +# [RUNTIME_MODULES_SUBDIR ] +# [BUILD_TRIGGER_TARGETS ] +# ) +# +# nabla_apply_runtime_lookup( +# TARGETS +# [RUNTIME_MODULES_SUBDIR ] +# ) +# +# nabla_setup_runtime_install_modules( +# [RUNTIME_MODULES_SUBDIR ] +# ) +# +# nabla_setup_runtime_modules( +# [TARGETS ] +# [DESTINATION ] +# [DESTINATION_DEBUG ] +# [DESTINATION_RELEASE ] +# [DESTINATION_RELWITHDEBINFO ] +# [APPLY_LOOKUP_TO_TARGETS ] +# [RUNTIME_MODULES_SUBDIR ] +# [MODE ] +# [INSTALL_RULES ] +# [BUILD_TRIGGER_TARGETS ] +# ) +# +# Wrapper around sync + lookup + install helpers. +# +# Config mapping: +# - Runtime source path is resolved from mapped imported config of Nabla::Nabla. +# - MAP_IMPORTED_CONFIG_* and CMAKE_MAP_IMPORTED_CONFIG_* are applied automatically. +# - MODE=CONFIGURE_TIME and MODE=BOTH resolve mapped imported config during configure/generate. +# - For MODE=CONFIGURE_TIME and MODE=BOTH, finalize mapping before calling helpers. +# - If using CMAKE_MAP_IMPORTED_CONFIG_, set it before find_package(Nabla). +# + +function(_nbl_runtime_modules_apply_lookup_definitions _TARGET _RUNTIME_MODULES_SUBDIR) + target_compile_definitions("${_TARGET}" PRIVATE + NBL_CPACK_PACKAGE_NABLA_DLL_DIR="./${_RUNTIME_MODULES_SUBDIR}" + NBL_CPACK_PACKAGE_DXC_DLL_DIR="./${_RUNTIME_MODULES_SUBDIR}" + ) +endfunction() + +function(_nbl_runtime_modules_apply_lookup_definitions_to_targets _TARGETS _RUNTIME_MODULES_SUBDIR) + set(_targets ${_TARGETS}) + list(REMOVE_DUPLICATES _targets) + + foreach(_target IN LISTS _targets) + if(NOT TARGET "${_target}") + message(FATAL_ERROR "Nabla: target \"${_target}\" does not exist") + endif() + _nbl_runtime_modules_apply_lookup_definitions("${_target}" "${_RUNTIME_MODULES_SUBDIR}") + endforeach() +endfunction() + +function(_nbl_runtime_modules_add_install_rules _RUNTIME_MODULES_SUBDIR) + if(NOT DEFINED CMAKE_INSTALL_BINDIR) + include(GNUInstallDirs) + endif() + + set(_nbl_install_modules_dest "${CMAKE_INSTALL_BINDIR}/${_RUNTIME_MODULES_SUBDIR}") + string(MD5 _nbl_install_modules_dest_key "${_nbl_install_modules_dest}") + get_property(_nbl_install_rules_added GLOBAL PROPERTY "NBL_RUNTIME_MODULES_INSTALL_RULES_${_nbl_install_modules_dest_key}") + if(NOT _nbl_install_rules_added) + install(FILES "$" + DESTINATION "${_nbl_install_modules_dest}" + ) + install(DIRECTORY "$,3rdparty,dxc>/" + DESTINATION "${_nbl_install_modules_dest}" + ) + set_property(GLOBAL PROPERTY "NBL_RUNTIME_MODULES_INSTALL_RULES_${_nbl_install_modules_dest_key}" TRUE) + endif() +endfunction() + +function(_nbl_runtime_modules_collect_consumer_configs _OUT_CONFIGS) + if(CMAKE_CONFIGURATION_TYPES) + set(_consumer_configs ${CMAKE_CONFIGURATION_TYPES}) + elseif(CMAKE_BUILD_TYPE) + set(_consumer_configs "${CMAKE_BUILD_TYPE}") + else() + set(_consumer_configs Debug) + endif() + + list(REMOVE_DUPLICATES _consumer_configs) + set(${_OUT_CONFIGS} ${_consumer_configs} PARENT_SCOPE) +endfunction() + +function(_nbl_runtime_modules_expand_destination_pairs _DESTINATION_DEFAULT _DESTINATION_DEBUG _DESTINATION_RELEASE _DESTINATION_RELWITHDEBINFO _OUT_CFG_DST_PAIRS) + _nbl_runtime_modules_collect_consumer_configs(_consumer_configs) + set(_cfg_dst_pairs "") + + foreach(_consumer_config IN LISTS _consumer_configs) + string(TOUPPER "${_consumer_config}" _cfg_upper) + if(_cfg_upper STREQUAL "DEBUG" AND NOT _DESTINATION_DEBUG STREQUAL "") + set(_resolved_destination "${_DESTINATION_DEBUG}") + elseif(_cfg_upper STREQUAL "RELEASE" AND NOT _DESTINATION_RELEASE STREQUAL "") + set(_resolved_destination "${_DESTINATION_RELEASE}") + elseif(_cfg_upper STREQUAL "RELWITHDEBINFO" AND NOT _DESTINATION_RELWITHDEBINFO STREQUAL "") + set(_resolved_destination "${_DESTINATION_RELWITHDEBINFO}") + else() + set(_resolved_destination "${_DESTINATION_DEFAULT}") + endif() + + if(_resolved_destination STREQUAL "") + message(FATAL_ERROR "Nabla: missing destination for consumer config \"${_consumer_config}\". Provide DESTINATION or one of DESTINATION_DEBUG/DESTINATION_RELEASE/DESTINATION_RELWITHDEBINFO.") + endif() + + if(_resolved_destination MATCHES "\\$<") + message(FATAL_ERROR "Nabla: DESTINATION for MODE CONFIGURE_TIME must be a plain path without generator expressions.") + endif() + + cmake_path(IS_ABSOLUTE _resolved_destination _is_abs) + if(NOT _is_abs) + cmake_path(ABSOLUTE_PATH _resolved_destination BASE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" OUTPUT_VARIABLE _resolved_destination) + endif() + + list(APPEND _cfg_dst_pairs "${_consumer_config}::${_resolved_destination}") + endforeach() + + set(${_OUT_CFG_DST_PAIRS} ${_cfg_dst_pairs} PARENT_SCOPE) +endfunction() + +function(_nbl_runtime_modules_resolve_imported_nabla_file _CONSUMER_CONFIG _OUT_IMPORTED_FILE) + string(TOUPPER "${_CONSUMER_CONFIG}" _cfg_upper) + + # Resolve runtime source from mapped imported config for given consumer config. + set(_mapped_candidates "") + get_target_property(_target_map "Nabla::Nabla" "MAP_IMPORTED_CONFIG_${_cfg_upper}") + if(_target_map AND NOT _target_map STREQUAL "NOTFOUND") + list(APPEND _mapped_candidates ${_target_map}) + endif() + + set(_global_map_var "CMAKE_MAP_IMPORTED_CONFIG_${_cfg_upper}") + if(DEFINED ${_global_map_var} AND NOT "${${_global_map_var}}" STREQUAL "") + list(APPEND _mapped_candidates ${${_global_map_var}}) + endif() + + list(APPEND _mapped_candidates "${_cfg_upper}") + + foreach(_mapped_config IN LISTS _mapped_candidates) + if(_mapped_config STREQUAL "") + get_target_property(_candidate "Nabla::Nabla" IMPORTED_LOCATION) + else() + string(TOUPPER "${_mapped_config}" _mapped_upper) + get_target_property(_candidate "Nabla::Nabla" "IMPORTED_LOCATION_${_mapped_upper}") + endif() + + if(_candidate AND NOT _candidate STREQUAL "NOTFOUND" AND EXISTS "${_candidate}") + set(${_OUT_IMPORTED_FILE} "${_candidate}" PARENT_SCOPE) + return() + endif() + endforeach() + + get_target_property(_imported_configs "Nabla::Nabla" IMPORTED_CONFIGURATIONS) + foreach(_imported_config IN LISTS _imported_configs) + get_target_property(_candidate "Nabla::Nabla" "IMPORTED_LOCATION_${_imported_config}") + if(_candidate AND NOT _candidate STREQUAL "NOTFOUND" AND EXISTS "${_candidate}") + set(${_OUT_IMPORTED_FILE} "${_candidate}" PARENT_SCOPE) + return() + endif() + endforeach() + + get_target_property(_candidate "Nabla::Nabla" IMPORTED_LOCATION) + if(_candidate AND NOT _candidate STREQUAL "NOTFOUND" AND EXISTS "${_candidate}") + set(${_OUT_IMPORTED_FILE} "${_candidate}" PARENT_SCOPE) + return() + endif() + + message(FATAL_ERROR "Nabla: cannot resolve imported runtime location for consumer config \"${_CONSUMER_CONFIG}\"") +endfunction() + +function(_nbl_runtime_modules_resolve_dxc_runtime_file _NABLA_IMPORTED_FILE _OUT_DXC_IMPORTED_FILE) + cmake_path(GET _NABLA_IMPORTED_FILE PARENT_PATH _nabla_runtime_dir) + set(_dxc_runtime_file "${_nabla_runtime_dir}/3rdparty/dxc/${CMAKE_SHARED_LIBRARY_PREFIX}dxcompiler${CMAKE_SHARED_LIBRARY_SUFFIX}") + + if(NOT EXISTS "${_dxc_runtime_file}") + message(FATAL_ERROR "Nabla: DXC runtime module not found at \"${_dxc_runtime_file}\"") + endif() + + set(${_OUT_DXC_IMPORTED_FILE} "${_dxc_runtime_file}" PARENT_SCOPE) +endfunction() + +function(_nbl_runtime_modules_expand_target_configure_sync_pairs _TARGET _RUNTIME_MODULES_SUBDIR _OUT_CFG_DST_PAIRS) + if(NOT TARGET "${_TARGET}") + message(FATAL_ERROR "Nabla: target \"${_TARGET}\" does not exist") + endif() + + get_target_property(_runtime_output_dir "${_TARGET}" RUNTIME_OUTPUT_DIRECTORY) + if(_runtime_output_dir) + set(_runtime_output_base "${_runtime_output_dir}") + elseif(DEFINED CMAKE_RUNTIME_OUTPUT_DIRECTORY AND NOT CMAKE_RUNTIME_OUTPUT_DIRECTORY STREQUAL "") + set(_runtime_output_base "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}") + else() + if(CMAKE_CONFIGURATION_TYPES) + set(_runtime_output_base "${CMAKE_CURRENT_BINARY_DIR}/$") + else() + set(_runtime_output_base "${CMAKE_CURRENT_BINARY_DIR}") + endif() + endif() + + _nbl_runtime_modules_collect_consumer_configs(_consumer_configs) + set(_cfg_dst_pairs "") + + if(_runtime_output_base MATCHES "\\$") + set(_runtime_output_without_config "${_runtime_output_base}") + string(REPLACE "$" "" _runtime_output_without_config "${_runtime_output_without_config}") + if(_runtime_output_without_config MATCHES "\\$<") + message(FATAL_ERROR "Nabla: MODE CONFIGURE_TIME supports only $ generator expression in runtime output directory") + endif() + + foreach(_consumer_config IN LISTS _consumer_configs) + set(_runtime_output_resolved "${_runtime_output_base}") + string(REPLACE "$" "${_consumer_config}" _runtime_output_resolved "${_runtime_output_resolved}") + cmake_path(IS_ABSOLUTE _runtime_output_resolved _is_abs) + if(NOT _is_abs) + cmake_path(ABSOLUTE_PATH _runtime_output_resolved BASE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" OUTPUT_VARIABLE _runtime_output_resolved) + endif() + set(_runtime_modules_dst "${_runtime_output_resolved}/${_RUNTIME_MODULES_SUBDIR}") + list(APPEND _cfg_dst_pairs "${_consumer_config}::${_runtime_modules_dst}") + endforeach() + else() + if(_runtime_output_base MATCHES "\\$<") + message(FATAL_ERROR "Nabla: MODE CONFIGURE_TIME supports only plain paths or paths with $ in runtime output directory") + endif() + + list(LENGTH _consumer_configs _consumer_configs_count) + if(_consumer_configs_count GREATER 1) + message(FATAL_ERROR "Nabla: MODE CONFIGURE_TIME with multi-config generators requires $ in runtime output directory") + endif() + + list(GET _consumer_configs 0 _consumer_config) + cmake_path(IS_ABSOLUTE _runtime_output_base _is_abs) + if(NOT _is_abs) + cmake_path(ABSOLUTE_PATH _runtime_output_base BASE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" OUTPUT_VARIABLE _runtime_output_base) + endif() + set(_runtime_modules_dst "${_runtime_output_base}/${_RUNTIME_MODULES_SUBDIR}") + list(APPEND _cfg_dst_pairs "${_consumer_config}::${_runtime_modules_dst}") + endif() + + set(${_OUT_CFG_DST_PAIRS} ${_cfg_dst_pairs} PARENT_SCOPE) +endfunction() + +function(_nbl_runtime_modules_add_configure_sync_rule_for_pairs _CFG_DST_PAIRS _ENABLE_CONFIGURE_DEPENDS) + set(_cfg_dst_pairs ${_CFG_DST_PAIRS}) + + foreach(_cfg_dst_pair IN LISTS _cfg_dst_pairs) + string(REPLACE "::" ";" _cfg_dst_parts "${_cfg_dst_pair}") + list(GET _cfg_dst_parts 0 _consumer_config) + list(GET _cfg_dst_parts 1 _runtime_modules_dst) + + string(MD5 _runtime_modules_dst_key "${_runtime_modules_dst}") + get_property(_runtime_modules_config_synced GLOBAL PROPERTY "NBL_RUNTIME_MODULES_CONFIG_SYNC_${_runtime_modules_dst_key}") + if(_runtime_modules_config_synced) + continue() + endif() + + _nbl_runtime_modules_resolve_imported_nabla_file("${_consumer_config}" _nabla_runtime_file) + _nbl_runtime_modules_resolve_dxc_runtime_file("${_nabla_runtime_file}" _dxc_runtime_file) + + file(MAKE_DIRECTORY "${_runtime_modules_dst}") + + cmake_path(GET _nabla_runtime_file FILENAME _nabla_runtime_name) + cmake_path(GET _dxc_runtime_file FILENAME _dxc_runtime_name) + set(_nabla_runtime_dst "${_runtime_modules_dst}/${_nabla_runtime_name}") + set(_dxc_runtime_dst "${_runtime_modules_dst}/${_dxc_runtime_name}") + + file(COPY_FILE "${_nabla_runtime_file}" "${_nabla_runtime_dst}" ONLY_IF_DIFFERENT INPUT_MAY_BE_RECENT) + file(COPY_FILE "${_dxc_runtime_file}" "${_dxc_runtime_dst}" ONLY_IF_DIFFERENT INPUT_MAY_BE_RECENT) + + if(_ENABLE_CONFIGURE_DEPENDS) + set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS + "${_nabla_runtime_file}" + "${_dxc_runtime_file}" + "${_nabla_runtime_dst}" + "${_dxc_runtime_dst}" + ) + endif() + + set_property(GLOBAL PROPERTY "NBL_RUNTIME_MODULES_CONFIG_SYNC_${_runtime_modules_dst_key}" TRUE) + endforeach() +endfunction() + +function(_nbl_runtime_modules_add_configure_sync_rule_for_targets _TARGETS _RUNTIME_MODULES_SUBDIR _ENABLE_CONFIGURE_DEPENDS) + set(_targets ${_TARGETS}) + list(REMOVE_DUPLICATES _targets) + + set(_cfg_dst_pairs "") + foreach(_target IN LISTS _targets) + _nbl_runtime_modules_expand_target_configure_sync_pairs("${_target}" "${_RUNTIME_MODULES_SUBDIR}" _target_cfg_dst_pairs) + list(APPEND _cfg_dst_pairs ${_target_cfg_dst_pairs}) + endforeach() + + _nbl_runtime_modules_add_configure_sync_rule_for_pairs("${_cfg_dst_pairs}" "${_ENABLE_CONFIGURE_DEPENDS}") +endfunction() + +function(_nbl_runtime_modules_add_build_sync_rule_for_target _TARGET _RUNTIME_MODULES_SUBDIR) + if(NOT TARGET "${_TARGET}") + message(FATAL_ERROR "Nabla: target \"${_TARGET}\" does not exist") + endif() + + get_target_property(_nbl_runtime_output_dir "${_TARGET}" RUNTIME_OUTPUT_DIRECTORY) + if(_nbl_runtime_output_dir) + set(_nbl_runtime_modules_dest "$") + elseif(DEFINED CMAKE_RUNTIME_OUTPUT_DIRECTORY AND NOT CMAKE_RUNTIME_OUTPUT_DIRECTORY STREQUAL "") + set(_nbl_runtime_modules_dest "$") + else() + set(_nbl_runtime_modules_dest "$/${_RUNTIME_MODULES_SUBDIR}") + endif() + + string(MD5 _nbl_runtime_modules_dest_key "${_nbl_runtime_modules_dest}") + + get_property(_nbl_runtime_modules_stamp GLOBAL PROPERTY "NBL_RUNTIME_MODULES_BUILD_SYNC_${_nbl_runtime_modules_dest_key}") + if(NOT _nbl_runtime_modules_stamp) + set(_nbl_runtime_modules_stamp "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/nabla_runtime_modules_${_nbl_runtime_modules_dest_key}.stamp") + + add_custom_command( + OUTPUT "${_nbl_runtime_modules_stamp}" + COMMAND ${CMAKE_COMMAND} -E make_directory "${_nbl_runtime_modules_dest}" + COMMAND ${CMAKE_COMMAND} -E copy_if_different "$" "${_nbl_runtime_modules_dest}/" + COMMAND ${CMAKE_COMMAND} -E copy_directory "$,3rdparty,dxc>" "${_nbl_runtime_modules_dest}" + COMMAND ${CMAKE_COMMAND} -E touch "${_nbl_runtime_modules_stamp}" + DEPENDS + "$" + "$,3rdparty,dxc,${CMAKE_SHARED_LIBRARY_PREFIX}dxcompiler${CMAKE_SHARED_LIBRARY_SUFFIX}>" + "$>" + COMMAND_EXPAND_LISTS + VERBATIM + ) + + set_property(GLOBAL PROPERTY "NBL_RUNTIME_MODULES_BUILD_SYNC_${_nbl_runtime_modules_dest_key}" "${_nbl_runtime_modules_stamp}") + endif() + + set_source_files_properties("${_nbl_runtime_modules_stamp}" PROPERTIES GENERATED TRUE) + target_sources("${_TARGET}" PRIVATE "${_nbl_runtime_modules_stamp}") +endfunction() + +function(_nbl_runtime_modules_add_build_sync_rule_for_targets _TARGETS _RUNTIME_MODULES_SUBDIR) + set(_targets ${_TARGETS}) + list(REMOVE_DUPLICATES _targets) + + foreach(_target IN LISTS _targets) + _nbl_runtime_modules_add_build_sync_rule_for_target("${_target}" "${_RUNTIME_MODULES_SUBDIR}") + endforeach() +endfunction() + +function(_nbl_runtime_modules_add_build_sync_rule_for_destination_pairs _BUILD_TRIGGER_TARGETS _CFG_DST_PAIRS) + set(_build_trigger_targets ${_BUILD_TRIGGER_TARGETS}) + list(REMOVE_DUPLICATES _build_trigger_targets) + + foreach(_target IN LISTS _build_trigger_targets) + if(NOT TARGET "${_target}") + message(FATAL_ERROR "Nabla: BUILD_TRIGGER_TARGETS contains unknown target \"${_target}\"") + endif() + endforeach() + + set(_cfg_dst_pairs ${_CFG_DST_PAIRS}) + + foreach(_cfg_dst_pair IN LISTS _cfg_dst_pairs) + string(REPLACE "::" ";" _cfg_dst_parts "${_cfg_dst_pair}") + list(GET _cfg_dst_parts 0 _consumer_config) + list(GET _cfg_dst_parts 1 _runtime_modules_dst) + + _nbl_runtime_modules_resolve_imported_nabla_file("${_consumer_config}" _nabla_runtime_file) + _nbl_runtime_modules_resolve_dxc_runtime_file("${_nabla_runtime_file}" _dxc_runtime_file) + + cmake_path(GET _nabla_runtime_file FILENAME _nabla_runtime_name) + cmake_path(GET _dxc_runtime_file FILENAME _dxc_runtime_name) + set(_nabla_runtime_dst "${_runtime_modules_dst}/${_nabla_runtime_name}") + set(_dxc_runtime_dst "${_runtime_modules_dst}/${_dxc_runtime_name}") + + string(MD5 _runtime_modules_dst_key "${_consumer_config}::${_runtime_modules_dst}") + + get_property(_sync_target GLOBAL PROPERTY "NBL_RUNTIME_MODULES_BUILD_SYNC_DEST_TARGET_${_runtime_modules_dst_key}") + if(NOT _sync_target) + set(_sync_target "nabla_runtime_modules_dest_sync_${_runtime_modules_dst_key}") + set(_sync_stamp "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${_sync_target}.stamp") + + add_custom_command( + OUTPUT "${_sync_stamp}" + COMMAND ${CMAKE_COMMAND} -E make_directory "${_runtime_modules_dst}" + COMMAND ${CMAKE_COMMAND} -E copy_if_different "${_nabla_runtime_file}" "${_runtime_modules_dst}/" + COMMAND ${CMAKE_COMMAND} -E copy_if_different "${_dxc_runtime_file}" "${_runtime_modules_dst}/" + COMMAND ${CMAKE_COMMAND} -E touch "${_sync_stamp}" + DEPENDS + "${_nabla_runtime_file}" + "${_dxc_runtime_file}" + "${_nabla_runtime_dst}" + "${_dxc_runtime_dst}" + VERBATIM + ) + + add_custom_target("${_sync_target}" DEPENDS "${_sync_stamp}") + set_target_properties("${_sync_target}" PROPERTIES + FOLDER "CMakePredefinedTargets" + EXCLUDE_FROM_ALL TRUE + ) + + set_property(GLOBAL PROPERTY "NBL_RUNTIME_MODULES_BUILD_SYNC_DEST_TARGET_${_runtime_modules_dst_key}" "${_sync_target}") + endif() + + foreach(_target IN LISTS _build_trigger_targets) + add_dependencies("${_target}" "${_sync_target}") + endforeach() + endforeach() +endfunction() + +# +# nabla_apply_runtime_lookup( +# TARGETS +# [RUNTIME_MODULES_SUBDIR ] +# ) +# +# Applies runtime lookup compile definitions to executable targets. +# The lookup is always relative to executable directory and does not expose +# absolute paths. +# +# Notes: +# - TARGETS is required. +# - RUNTIME_MODULES_SUBDIR defaults to "Libraries". +# +function(nabla_apply_runtime_lookup) + set(_nbl_runtime_modules_subdir "Libraries") + + cmake_parse_arguments(_NBL_CUSTOM "" "RUNTIME_MODULES_SUBDIR" "TARGETS" ${ARGV}) + + if(_NBL_CUSTOM_UNPARSED_ARGUMENTS) + message(FATAL_ERROR "Nabla: unexpected arguments for nabla_apply_runtime_lookup: ${_NBL_CUSTOM_UNPARSED_ARGUMENTS}") + endif() + + if(_NBL_CUSTOM_RUNTIME_MODULES_SUBDIR) + set(_nbl_runtime_modules_subdir "${_NBL_CUSTOM_RUNTIME_MODULES_SUBDIR}") + endif() + + if(NOT _NBL_CUSTOM_TARGETS) + message(FATAL_ERROR "Nabla: nabla_apply_runtime_lookup requires TARGETS ") + endif() + + _nbl_runtime_modules_apply_lookup_definitions_to_targets("${_NBL_CUSTOM_TARGETS}" "${_nbl_runtime_modules_subdir}") +endfunction() + +# +# nabla_setup_runtime_install_modules( +# [RUNTIME_MODULES_SUBDIR ] +# ) +# +# Adds install() rules that copy Nabla and DXC runtime modules into: +# ${CMAKE_INSTALL_BINDIR}/ +# +# Notes: +# - RUNTIME_MODULES_SUBDIR defaults to "Libraries". +# - This helper only adds install rules. +# +function(nabla_setup_runtime_install_modules) + set(_nbl_runtime_modules_subdir "Libraries") + cmake_parse_arguments(_NBL_CUSTOM "" "RUNTIME_MODULES_SUBDIR" "" ${ARGV}) + + if(_NBL_CUSTOM_UNPARSED_ARGUMENTS) + message(FATAL_ERROR "Nabla: unexpected arguments for nabla_setup_runtime_install_modules: ${_NBL_CUSTOM_UNPARSED_ARGUMENTS}") + endif() + + if(_NBL_CUSTOM_RUNTIME_MODULES_SUBDIR) + set(_nbl_runtime_modules_subdir "${_NBL_CUSTOM_RUNTIME_MODULES_SUBDIR}") + endif() + + _nbl_runtime_modules_add_install_rules("${_nbl_runtime_modules_subdir}") +endfunction() + +# +# nabla_sync_runtime_modules( +# [TARGETS ] +# [DESTINATION ] +# [DESTINATION_DEBUG ] +# [DESTINATION_RELEASE ] +# [DESTINATION_RELWITHDEBINFO ] +# [MODE BUILD_TIME|CONFIGURE_TIME|BOTH] +# [RUNTIME_MODULES_SUBDIR ] +# [BUILD_TRIGGER_TARGETS ] +# ) +# +# Synchronizes runtime modules from Nabla package into consumer runtime layout. +# +# Input modes (mutually exclusive): +# - TARGETS mode +# Copies beside each target runtime dir under RUNTIME_MODULES_SUBDIR. +# - DESTINATION mode +# Copies to explicit DESTINATION or DESTINATION_DEBUG/RELEASE/RELWITHDEBINFO paths. +# +# MODE: +# - BUILD_TIME +# Copy during build. +# - CONFIGURE_TIME +# Copy during configure/generate and set configure depends. +# - BOTH +# Run configure-time copy and build-time copy. +# +# Rules: +# - exactly one input mode must be used +# - BUILD_TRIGGER_TARGETS is valid only in DESTINATION mode for BUILD_TIME/BOTH +# +function(nabla_sync_runtime_modules) + set(_nbl_runtime_modules_subdir "Libraries") + set(_nbl_mode BUILD_TIME) + + cmake_parse_arguments(_NBL_CUSTOM "" "MODE;DESTINATION;DESTINATION_DEBUG;DESTINATION_RELEASE;DESTINATION_RELWITHDEBINFO;RUNTIME_MODULES_SUBDIR" "TARGETS;BUILD_TRIGGER_TARGETS" ${ARGV}) + + if(_NBL_CUSTOM_UNPARSED_ARGUMENTS) + message(FATAL_ERROR "Nabla: unexpected arguments for nabla_sync_runtime_modules: ${_NBL_CUSTOM_UNPARSED_ARGUMENTS}") + endif() + + if(_NBL_CUSTOM_RUNTIME_MODULES_SUBDIR) + set(_nbl_runtime_modules_subdir "${_NBL_CUSTOM_RUNTIME_MODULES_SUBDIR}") + endif() + if(DEFINED _NBL_CUSTOM_MODE) + set(_nbl_mode "${_NBL_CUSTOM_MODE}") + endif() + + string(TOUPPER "${_nbl_mode}" _nbl_mode) + if(NOT _nbl_mode MATCHES "^(BUILD_TIME|CONFIGURE_TIME|BOTH)$") + message(FATAL_ERROR "Nabla: invalid MODE='${_nbl_mode}', expected BUILD_TIME, CONFIGURE_TIME or BOTH") + endif() + + set(_has_targets OFF) + if(_NBL_CUSTOM_TARGETS) + set(_has_targets ON) + endif() + + set(_has_destination OFF) + if(DEFINED _NBL_CUSTOM_DESTINATION AND NOT _NBL_CUSTOM_DESTINATION STREQUAL "") + set(_has_destination ON) + endif() + if(DEFINED _NBL_CUSTOM_DESTINATION_DEBUG AND NOT _NBL_CUSTOM_DESTINATION_DEBUG STREQUAL "") + set(_has_destination ON) + endif() + if(DEFINED _NBL_CUSTOM_DESTINATION_RELEASE AND NOT _NBL_CUSTOM_DESTINATION_RELEASE STREQUAL "") + set(_has_destination ON) + endif() + if(DEFINED _NBL_CUSTOM_DESTINATION_RELWITHDEBINFO AND NOT _NBL_CUSTOM_DESTINATION_RELWITHDEBINFO STREQUAL "") + set(_has_destination ON) + endif() + + if(_has_targets AND _has_destination) + message(FATAL_ERROR "Nabla: use either TARGETS mode or DESTINATION mode, not both") + endif() + + if(NOT _has_targets AND NOT _has_destination) + message(FATAL_ERROR "Nabla: nabla_sync_runtime_modules requires TARGETS or DESTINATION/DESTINATION_DEBUG/DESTINATION_RELEASE/DESTINATION_RELWITHDEBINFO") + endif() + + if(_has_targets) + if(_NBL_CUSTOM_BUILD_TRIGGER_TARGETS) + message(FATAL_ERROR "Nabla: BUILD_TRIGGER_TARGETS is valid only in DESTINATION mode") + endif() + + if(_nbl_mode STREQUAL "CONFIGURE_TIME" OR _nbl_mode STREQUAL "BOTH") + set(_enable_configure_depends OFF) + if(_nbl_mode STREQUAL "CONFIGURE_TIME") + set(_enable_configure_depends ON) + endif() + _nbl_runtime_modules_add_configure_sync_rule_for_targets("${_NBL_CUSTOM_TARGETS}" "${_nbl_runtime_modules_subdir}" "${_enable_configure_depends}") + endif() + + if(_nbl_mode STREQUAL "BUILD_TIME" OR _nbl_mode STREQUAL "BOTH") + _nbl_runtime_modules_add_build_sync_rule_for_targets("${_NBL_CUSTOM_TARGETS}" "${_nbl_runtime_modules_subdir}") + endif() + + return() + endif() + + _nbl_runtime_modules_expand_destination_pairs( + "${_NBL_CUSTOM_DESTINATION}" + "${_NBL_CUSTOM_DESTINATION_DEBUG}" + "${_NBL_CUSTOM_DESTINATION_RELEASE}" + "${_NBL_CUSTOM_DESTINATION_RELWITHDEBINFO}" + _cfg_dst_pairs + ) + + if(_nbl_mode STREQUAL "CONFIGURE_TIME" OR _nbl_mode STREQUAL "BOTH") + set(_enable_configure_depends OFF) + if(_nbl_mode STREQUAL "CONFIGURE_TIME") + set(_enable_configure_depends ON) + endif() + _nbl_runtime_modules_add_configure_sync_rule_for_pairs("${_cfg_dst_pairs}" "${_enable_configure_depends}") + endif() + + if(_nbl_mode STREQUAL "BUILD_TIME" OR _nbl_mode STREQUAL "BOTH") + if(NOT _NBL_CUSTOM_BUILD_TRIGGER_TARGETS) + message(FATAL_ERROR "Nabla: DESTINATION mode with MODE ${_nbl_mode} requires BUILD_TRIGGER_TARGETS") + endif() + _nbl_runtime_modules_add_build_sync_rule_for_destination_pairs("${_NBL_CUSTOM_BUILD_TRIGGER_TARGETS}" "${_cfg_dst_pairs}") + elseif(_NBL_CUSTOM_BUILD_TRIGGER_TARGETS) + message(FATAL_ERROR "Nabla: BUILD_TRIGGER_TARGETS is valid only for MODE BUILD_TIME or MODE BOTH") + endif() +endfunction() + +# +# nabla_setup_runtime_modules( +# [TARGETS ] +# [DESTINATION ] +# [DESTINATION_DEBUG ] +# [DESTINATION_RELEASE ] +# [DESTINATION_RELWITHDEBINFO ] +# [MODE BUILD_TIME|CONFIGURE_TIME|BOTH] +# [RUNTIME_MODULES_SUBDIR ] +# [INSTALL_RULES ON|OFF] +# [APPLY_LOOKUP_TO_TARGETS ] +# [BUILD_TRIGGER_TARGETS ] +# ) +# +# Convenience wrapper that composes: +# - nabla_sync_runtime_modules(...) +# - nabla_apply_runtime_lookup(...) +# - nabla_setup_runtime_install_modules(...) when INSTALL_RULES is enabled +# +# Lookup behavior: +# - if APPLY_LOOKUP_TO_TARGETS is set, lookup is applied to that list +# - else if TARGETS mode is used, lookup is applied to TARGETS +# - else no lookup changes are applied +# +function(nabla_setup_runtime_modules) + set(_nbl_runtime_modules_subdir "Libraries") + set(_nbl_install_rules OFF) + set(_nbl_mode BUILD_TIME) + + cmake_parse_arguments(_NBL_CUSTOM "" "RUNTIME_MODULES_SUBDIR;INSTALL_RULES;MODE;DESTINATION;DESTINATION_DEBUG;DESTINATION_RELEASE;DESTINATION_RELWITHDEBINFO" "TARGETS;APPLY_LOOKUP_TO_TARGETS;BUILD_TRIGGER_TARGETS" ${ARGV}) + + if(_NBL_CUSTOM_UNPARSED_ARGUMENTS) + message(FATAL_ERROR "Nabla: unexpected arguments for nabla_setup_runtime_modules: ${_NBL_CUSTOM_UNPARSED_ARGUMENTS}") + endif() + + if(_NBL_CUSTOM_RUNTIME_MODULES_SUBDIR) + set(_nbl_runtime_modules_subdir "${_NBL_CUSTOM_RUNTIME_MODULES_SUBDIR}") + endif() + if(DEFINED _NBL_CUSTOM_INSTALL_RULES) + set(_nbl_install_rules "${_NBL_CUSTOM_INSTALL_RULES}") + endif() + if(DEFINED _NBL_CUSTOM_MODE) + set(_nbl_mode "${_NBL_CUSTOM_MODE}") + endif() + + set(_sync_args + MODE "${_nbl_mode}" + RUNTIME_MODULES_SUBDIR "${_nbl_runtime_modules_subdir}" + ) + + if(_NBL_CUSTOM_TARGETS) + list(APPEND _sync_args TARGETS ${_NBL_CUSTOM_TARGETS}) + endif() + if(DEFINED _NBL_CUSTOM_DESTINATION AND NOT _NBL_CUSTOM_DESTINATION STREQUAL "") + list(APPEND _sync_args DESTINATION "${_NBL_CUSTOM_DESTINATION}") + endif() + if(DEFINED _NBL_CUSTOM_DESTINATION_DEBUG AND NOT _NBL_CUSTOM_DESTINATION_DEBUG STREQUAL "") + list(APPEND _sync_args DESTINATION_DEBUG "${_NBL_CUSTOM_DESTINATION_DEBUG}") + endif() + if(DEFINED _NBL_CUSTOM_DESTINATION_RELEASE AND NOT _NBL_CUSTOM_DESTINATION_RELEASE STREQUAL "") + list(APPEND _sync_args DESTINATION_RELEASE "${_NBL_CUSTOM_DESTINATION_RELEASE}") + endif() + if(DEFINED _NBL_CUSTOM_DESTINATION_RELWITHDEBINFO AND NOT _NBL_CUSTOM_DESTINATION_RELWITHDEBINFO STREQUAL "") + list(APPEND _sync_args DESTINATION_RELWITHDEBINFO "${_NBL_CUSTOM_DESTINATION_RELWITHDEBINFO}") + endif() + if(_NBL_CUSTOM_BUILD_TRIGGER_TARGETS) + list(APPEND _sync_args BUILD_TRIGGER_TARGETS ${_NBL_CUSTOM_BUILD_TRIGGER_TARGETS}) + endif() + + nabla_sync_runtime_modules(${_sync_args}) + + set(_lookup_targets "") + if(_NBL_CUSTOM_APPLY_LOOKUP_TO_TARGETS) + set(_lookup_targets ${_NBL_CUSTOM_APPLY_LOOKUP_TO_TARGETS}) + elseif(_NBL_CUSTOM_TARGETS) + set(_lookup_targets ${_NBL_CUSTOM_TARGETS}) + endif() + + if(_lookup_targets) + nabla_apply_runtime_lookup( + TARGETS ${_lookup_targets} + RUNTIME_MODULES_SUBDIR "${_nbl_runtime_modules_subdir}" + ) + endif() + + if(_nbl_install_rules) + nabla_setup_runtime_install_modules( + RUNTIME_MODULES_SUBDIR "${_nbl_runtime_modules_subdir}" + ) + endif() +endfunction() + if(NABLA_FIND_PACKAGE_VERBOSE) message(STATUS "\n-- Nabla_ROOT = ${Nabla_ROOT}" @@ -29,4 +723,4 @@ if(NABLA_FIND_PACKAGE_VERBOSE) "-- Nabla's DXC module git info:" "\n${_nabla_dxc_git_info_raw}" ) -endif() \ No newline at end of file +endif() diff --git a/cmake/common.cmake b/cmake/common.cmake index 6a3b5ec990..48a4098d97 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1656,7 +1656,11 @@ function(NBL_CREATE_RESOURCE_ARCHIVE) return() endif() - set(IMPL_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${IMPL_TARGET}") + if(DEFINED NBL_ROOT_PATH_BINARY AND NBL_ROOT_PATH_BINARY) + set(IMPL_OUTPUT_DIRECTORY "${NBL_ROOT_PATH_BINARY}/${IMPL_TARGET}") + else() + set(IMPL_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${IMPL_TARGET}") + endif() set(_BUNDLE_ARCHIVE_ABSOLUTE_PATH_ "") get_filename_component(_BUNDLE_SEARCH_DIRECTORY_ "${IMPL_BIND}" ABSOLUTE) diff --git a/docs/consume/README.md b/docs/consume/README.md new file mode 100644 index 0000000000..e73a69c310 --- /dev/null +++ b/docs/consume/README.md @@ -0,0 +1,199 @@ +# Consuming Nabla Package + +This document describes how to consume an installed Nabla package from another CMake project. + +## 1. Package API + +After `find_package(Nabla CONFIG REQUIRED)`, the package provides: + +- imported target `Nabla::Nabla` +- helper `nabla_sync_runtime_modules(...)` +- helper `nabla_apply_runtime_lookup(...)` +- helper `nabla_setup_runtime_install_modules(...)` +- wrapper `nabla_setup_runtime_modules(...)` + +On shared builds, runtime modules include Nabla and DXC. + +Implementation and argument docs: + +- package API implementation: `${Nabla_ROOT}/cmake/NablaConfig.cmake` +- source template in Nabla repo: `cmake/NablaConfig.cmake.in` +- each public helper has usage notes in comments directly above its definition + +## 2. Minimal baseline + +```cmake +cmake_minimum_required(VERSION 3.30) +project(MyApp CXX) + +find_package(Nabla REQUIRED CONFIG) + +add_executable(my_app main.cpp) +target_link_libraries(my_app PRIVATE Nabla::Nabla) +``` + +Behavior in this minimal setup: + +- executable loads Nabla/DXC directly from package-provided lookup paths +- this works in consumer build interface without extra copy helpers +- install layout is not configured by this baseline + +If you also need your own install layout, add install rules and relative lookup defines. +Helpers from sections below can do this for you. + +## 3. Runtime setup primitives + +### 3.1 Copy runtime modules + +```cmake +nabla_sync_runtime_modules( + TARGETS my_app + MODE BUILD_TIME + RUNTIME_MODULES_SUBDIR "Libraries" +) +``` + +or with explicit destination(s): + +```cmake +nabla_sync_runtime_modules( + DESTINATION_DEBUG "${CMAKE_BINARY_DIR}/Debug/Libraries" + DESTINATION_RELEASE "${CMAKE_BINARY_DIR}/Release/Libraries" + DESTINATION_RELWITHDEBINFO "${CMAKE_BINARY_DIR}/RelWithDebInfo/Libraries" + MODE CONFIGURE_TIME +) +``` + +Rules: + +- use either `TARGETS` mode or `DESTINATION` / `DESTINATION_DEBUG` / `DESTINATION_RELEASE` / `DESTINATION_RELWITHDEBINFO` mode +- `MODE CONFIGURE_TIME` does copy during configure/generate +- `MODE BUILD_TIME` and `MODE BOTH` in destination mode require `BUILD_TRIGGER_TARGETS` + +### 3.2 Apply runtime lookup defines + +```cmake +nabla_apply_runtime_lookup( + TARGETS my_app + RUNTIME_MODULES_SUBDIR "Libraries" +) +``` + +This sets: + +- `NBL_CPACK_PACKAGE_NABLA_DLL_DIR="./Libraries"` +- `NBL_CPACK_PACKAGE_DXC_DLL_DIR="./Libraries"` + +### 3.3 Install runtime modules + +```cmake +include(GNUInstallDirs) + +nabla_setup_runtime_install_modules( + RUNTIME_MODULES_SUBDIR "Libraries" +) + +install(TARGETS my_app + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" +) +``` + +## 4. Wrapper helper + +`nabla_setup_runtime_modules(...)` composes: + +- `nabla_sync_runtime_modules(...)` +- `nabla_apply_runtime_lookup(...)` +- optional `nabla_setup_runtime_install_modules(...)` + +Example: + +```cmake +nabla_setup_runtime_modules( + TARGETS my_app + MODE CONFIGURE_TIME + RUNTIME_MODULES_SUBDIR "Libraries" + INSTALL_RULES ON +) +``` + +## 5. Split flow global copy and per-exe lookup + +This is the split pattern used by consumers that want one global copy setup and per-exe lookup: + +```cmake +# one global copy setup +nabla_sync_runtime_modules( + DESTINATION_DEBUG "${CMAKE_BINARY_DIR}/3rdparty/shared/Debug/Libraries" + DESTINATION_RELEASE "${CMAKE_BINARY_DIR}/3rdparty/shared/Release/Libraries" + DESTINATION_RELWITHDEBINFO "${CMAKE_BINARY_DIR}/3rdparty/shared/RelWithDebInfo/Libraries" + MODE CONFIGURE_TIME +) + +# per executable target +nabla_apply_runtime_lookup( + TARGETS my_app + RUNTIME_MODULES_SUBDIR "Libraries" +) +``` + +## 6. Config mapping + +Runtime source paths are resolved from mapped imported config of `Nabla::Nabla`. + +Imported-config mapping applies automatically. This includes cross-config usage when one consumer config maps to a different imported config. + +If you override mapping: + +- do it in the same configure run +- if using `CMAKE_MAP_IMPORTED_CONFIG_`, set it before `find_package(Nabla)` +- for `MODE CONFIGURE_TIME` and `MODE BOTH`, set mapping before helper call + +## 7. Troubleshooting + +### `Could not load dxcompiler module` or `Could not load Nabla API` + +Check: + +- lookup defines are applied to executable target(s) +- lookup subdir matches actual runtime layout +- runtime modules exist in build/install runtime directory + +### Build works but installed app fails + +Install rules are usually missing. + +Use either: + +- `nabla_setup_runtime_install_modules(...)` +- `nabla_setup_runtime_modules(... INSTALL_RULES ON)` + +## 8. Design guidance + +For relocatable consumers: + +- keep lookup relative to executable +- never expose absolute paths in public compile definitions +- keep copy setup and lookup setup explicit in CMake + +Note: + +- current Nabla build interface still compiles some runtime lookup data with absolute paths +- this is a known issue on Nabla side and will be refactored +- do not propagate that pattern to package consumers +- consumer-facing package helpers are designed to avoid exposing absolute paths in consumer compile definitions + +## 9. Smoke reference + +`smoke/` is a reference consumer for Nabla package consumption. + +It contains multiple usage flows: + +- `MINIMALISTIC` link-only consumption without helper calls +- `CONFIGURE_ONLY` helper-based configure-time runtime sync +- `BUILD_ONLY` helper-based build-time runtime sync + +Flow selection is done with `NBL_SMOKE_FLOW` in `smoke/CMakeLists.txt` and `FLOW` in `smoke/RunSmokeFlow.cmake`. + +Smoke is also used as CI coverage for package consumption flows. +The `smoke-tests` job in `.github/workflows/build-nabla.yml` runs those flows as end-to-end checks. diff --git a/examples_tests b/examples_tests index 18fe5eb6a3..d8fe0efeba 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 18fe5eb6a39d7a09f8e928ca040d06d430e205bb +Subproject commit d8fe0efeba12dcc4a27488b81a5111d2c452a360 diff --git a/include/nbl/application_templates/MonoDeviceApplication.hpp b/include/nbl/application_templates/MonoDeviceApplication.hpp index a3a169d7b7..c7a94fe332 100644 --- a/include/nbl/application_templates/MonoDeviceApplication.hpp +++ b/include/nbl/application_templates/MonoDeviceApplication.hpp @@ -24,7 +24,8 @@ class MonoDeviceApplication : public virtual MonoSystemMonoLoggerApplication virtual bool onAppTerminated() override { // break the circular references from queues tracking submit resources - m_device->waitIdle(); + if (m_device) + m_device->waitIdle(); m_device = nullptr; m_api = nullptr; return base_t::onAppTerminated(); diff --git a/include/nbl/asset/IAccelerationStructure.h b/include/nbl/asset/IAccelerationStructure.h index 6d64a2b769..a1d325a2a3 100644 --- a/include/nbl/asset/IAccelerationStructure.h +++ b/include/nbl/asset/IAccelerationStructure.h @@ -76,7 +76,8 @@ class IBottomLevelAccelerationStructure : public IAccelerationStructure // Provided by VK_NV_displacement_micromap ALLOW_DISPLACEMENT_MICROMAP_UPDATE_BIT = 0x1u<<9u, // Provided by VK_KHR_ray_tracing_position_fetch - ALLOW_DATA_ACCESS = 0x1u<<11u, + ALLOW_DATA_ACCESS_BIT = 0x1u<<11u, + ALLOW_DATA_ACCESS = ALLOW_DATA_ACCESS_BIT // deprecated }; diff --git a/include/nbl/asset/IAsset.h b/include/nbl/asset/IAsset.h index b7142713bf..c6589cf043 100644 --- a/include/nbl/asset/IAsset.h +++ b/include/nbl/asset/IAsset.h @@ -169,9 +169,11 @@ class IAsset : virtual public core::IReferenceCounted inline void visitDependents(std::function visit) { assert(isMutable()); - visitDependents([&](const IAsset* dependent) -> bool + visitDependents_impl([&](const IAsset* dep) -> bool { - return visit(const_cast(dependent)); + if (dep) + return visit(const_cast(dep)); + return true; }); } diff --git a/include/nbl/asset/IAssetManager.h b/include/nbl/asset/IAssetManager.h index d9995526bc..45b32b7c61 100644 --- a/include/nbl/asset/IAssetManager.h +++ b/include/nbl/asset/IAssetManager.h @@ -122,7 +122,7 @@ class NBL_API2 IAssetManager : public core::IReferenceCounted explicit IAssetManager(core::smart_refctd_ptr&& system, core::smart_refctd_ptr&& compilerSet = nullptr) : m_system(std::move(system)), m_compilerSet(std::move(compilerSet)), - m_defaultLoaderOverride(this) + m_defaultLoaderOverride({.manager=this}) { assert(IPreHashed::INVALID_HASH == static_cast(core::blake3_hasher{})); initializeMeshTools(); diff --git a/include/nbl/asset/ICPUAccelerationStructure.h b/include/nbl/asset/ICPUAccelerationStructure.h index 4973d7c433..04f160459a 100644 --- a/include/nbl/asset/ICPUAccelerationStructure.h +++ b/include/nbl/asset/ICPUAccelerationStructure.h @@ -247,13 +247,16 @@ class ICPUBottomLevelAccelerationStructure final : public IPreHashed, public IBo geometryCount = m_triangleGeoms->size(); } - // https://registry.khronos.org/vulkan/specs/latest/man/html/vkGetAccelerationStructureBuildSizesKHR.html#VUID-vkGetAccelerationStructureBuildSizesKHR-pBuildInfo-03619 - if (geometryCount == 0) { - if (m_geometryPrimitiveCount && m_geometryPrimitiveCount->size() > 0) return false; + // https://registry.khronos.org/vulkan/specs/latest/man/html/vkGetAccelerationStructureBuildSizesKHR.html#VUID-vkGetAccelerationStructureBuildSizesKHR-pBuildInfo-03619 + if (geometryCount == 0) + { + if (m_geometryPrimitiveCount && m_geometryPrimitiveCount->size() > 0) + return false; } - else + else { - if (!m_geometryPrimitiveCount || m_geometryPrimitiveCount->size() != geometryCount) return false; + if (!m_geometryPrimitiveCount || m_geometryPrimitiveCount->size() != geometryCount) + return false; } return true; } @@ -275,7 +278,7 @@ class ICPUBottomLevelAccelerationStructure final : public IPreHashed, public IBo core::smart_refctd_dynamic_array m_geometryPrimitiveCount = nullptr; core::bitflag m_buildFlags = BUILD_FLAGS::PREFER_FAST_TRACE_BIT; - inline void visitDependents_impl(std::function visit) const override {} + inline void visitDependents_impl(std::function visit) const override {} }; class ICPUTopLevelAccelerationStructure final : public IAsset, public ITopLevelAccelerationStructure @@ -287,14 +290,6 @@ class ICPUTopLevelAccelerationStructure final : public IAsset, public ITopLevelA // ICPUTopLevelAccelerationStructure() = default; - - // - inline auto& getBuildRangeInfo() - { - assert(isMutable()); - return m_buildRangeInfo; - } - inline auto& getBuildRangeInfo() const {return m_buildRangeInfo;} // inline core::bitflag getBuildFlags() const {return m_buildFlags;} @@ -365,7 +360,6 @@ class ICPUTopLevelAccelerationStructure final : public IAsset, public ITopLevelA auto cp = core::make_smart_refctd_ptr(); cp->m_instances = core::make_refctd_dynamic_array>(*m_instances); - cp->m_buildRangeInfo = m_buildRangeInfo; cp->m_buildFlags = m_buildFlags; if (_depth--) @@ -385,9 +379,6 @@ class ICPUTopLevelAccelerationStructure final : public IAsset, public ITopLevelA if (!m_instances) return false; for (const auto& instance : *m_instances) if (!instance.getBase().blas->valid()) return false; - if (m_buildRangeInfo.instanceCount != m_instances->size()) return false; - // https://registry.khronos.org/vulkan/specs/latest/man/html/VkAccelerationStructureBuildRangeInfoKHR.html#VUID-VkAccelerationStructureBuildRangeInfoKHR-primitiveOffset-03660 - if (m_buildRangeInfo.instanceByteOffset % 16 != 0) return false; return true; } @@ -396,15 +387,16 @@ class ICPUTopLevelAccelerationStructure final : public IAsset, public ITopLevelA private: core::smart_refctd_dynamic_array m_instances = nullptr; - hlsl::acceleration_structures::top_level::BuildRangeInfo m_buildRangeInfo; core::bitflag m_buildFlags = BUILD_FLAGS::PREFER_FAST_BUILD_BIT; - inline void visitDependents_impl(std::function visit) const override - { - if (!m_instances) return; - for (const auto& instance : *m_instances) - if (!visit(instance.getBase().blas.get())) return; - } + inline void visitDependents_impl(std::function visit) const override + { + if (!m_instances) + return; + for (const auto& instance : *m_instances) + if (!visit(instance.getBase().blas.get())) + return; + } }; } diff --git a/include/nbl/asset/ICPUGeometryCollection.h b/include/nbl/asset/ICPUGeometryCollection.h index d231f1df00..9e84de833e 100644 --- a/include/nbl/asset/ICPUGeometryCollection.h +++ b/include/nbl/asset/ICPUGeometryCollection.h @@ -84,17 +84,17 @@ class NBL_API2 ICPUGeometryCollection : public IAsset, public IGeometryCollectio } // - template// requires std::is_same_v()),decltype(ICPUBottomLevelAccelerationStructure::Triangles&)> - inline Iterator exportForBLAS(Iterator out, uint32_t* pWrittenOrdinals=nullptr) const + class CBLASExporter final : public IBLASExporter { - return exportForBLAS(std::forward(out),[](const hlsl::float32_t3x4& lhs, const hlsl::float32_t3x4& rhs)->void + protected: + inline void setTransform(BLASTriangles& out, const uint32_t geomIndex) override { - lhs = rhs; - if (pWrittenOrdinals) - *(pWrittenOrdinals++) = (ptrdiff_t(&rhs)-offsetof(SGeometryReference,transform)-ptrdiff_t(base_t::m_geometries.data()))/sizeof(SGeometryReference); + out.transform = m_geoms[geomIndex].transform; } - ); - } + + public: + inline CBLASExporter(const core::vector& _geoms) : IBLASExporter(_geoms) {} + }; protected: // diff --git a/include/nbl/asset/ICPUMorphTargets.h b/include/nbl/asset/ICPUMorphTargets.h index 29924f9727..e5d775e1af 100644 --- a/include/nbl/asset/ICPUMorphTargets.h +++ b/include/nbl/asset/ICPUMorphTargets.h @@ -46,6 +46,7 @@ class NBL_API2 ICPUMorphTargets : public IAsset, public IMorphTargets* getTargets() { if (isMutable()) diff --git a/include/nbl/asset/ICPUScene.h b/include/nbl/asset/ICPUScene.h index 56a069c469..9d363317de 100644 --- a/include/nbl/asset/ICPUScene.h +++ b/include/nbl/asset/ICPUScene.h @@ -110,11 +110,11 @@ class ICPUScene final : public IAsset, public IScene struct SInstanceStorage final { public: - inline SInstanceStorage(const size_t size=1) : morphTargets(size), materials(size), initialTransforms(size) {} + inline SInstanceStorage(const size_t size=0) : morphTargets(size), materials(size), initialTransforms(size) {} inline void clearInitialTransforms() {initialTransforms.clear();} - inline operator bool() const + explicit inline operator bool() const { if (morphTargets.size()!=materials.size()) return false; @@ -131,11 +131,11 @@ class ICPUScene final : public IAsset, public IScene initialTransforms.reserve(newSize); } - inline void resize(const size_t newSize) + inline void resize(const size_t newSize, const bool forceTransformStorage=false) { morphTargets.resize(newSize); materials.resize(newSize,InvalidMaterialTable); - if (!initialTransforms.empty()) + if (forceTransformStorage || !initialTransforms.empty()) initialTransforms.resize(newSize,ICPUGeometryCollection::SGeometryReference{}.transform); } @@ -143,11 +143,12 @@ class ICPUScene final : public IAsset, public IScene { morphTargets.erase(morphTargets.begin()+first,morphTargets.begin()+last); materials.erase(materials.begin()+first, materials.begin()+last); - initialTransforms.erase(initialTransforms.begin()+first,initialTransforms.begin()+last); + if (!initialTransforms.empty()) + initialTransforms.erase(initialTransforms.begin()+first,initialTransforms.begin()+last); } inline void erase(const size_t ix) {return erase(ix,ix+1);} - inline size_t size() const {return morphTargets.size();} + inline uint64_t size() const {return morphTargets.size();} inline std::span> getMorphTargets() {return morphTargets;} inline std::span> getMorphTargets() const {return morphTargets;} @@ -169,6 +170,165 @@ class ICPUScene final : public IAsset, public IScene // TODO: animations (keyframed transforms, skeleton instance) }; + // utility + class ITLASExporter + { + protected: + using instance_flags_t = asset::ICPUTopLevelAccelerationStructure::INSTANCE_FLAGS; + + inline ITLASExporter(const SInstanceStorage& _storage) : m_storage(_storage) {} + + const SInstanceStorage& m_storage; + + public: + virtual inline ICPUMorphTargets::index_t getTargetIndex(const uint32_t instanceIx) {return ICPUMorphTargets::index_t{0u};} + + virtual inline instance_flags_t getInstanceFlags(const uint32_t instanceIx, const ICPUMorphTargets::index_t targetIx) + { + // TODO: could derive from the material table if we want FORCE_OPAQUE_BIT or FORCE_NO_OPAQUE_BIT but its a whole instance thing + return instance_flags_t::TRIANGLE_FACING_CULL_DISABLE_BIT; + } + + virtual inline uint32_t getInstanceIndex(const uint32_t instanceIx, const ICPUMorphTargets::index_t targetIx) {return instanceIx;} + + // default + virtual inline uint32_t getSBTOffset(const material_table_offset_t materialsBeginIndex) + { + return 0; + } + + virtual inline uint8_t getMask(const uint32_t instanceIx, const ICPUMorphTargets::index_t targetIx) + { + return 0xFF; + } + + virtual inline hlsl::float32_t3x4 getTransform(const uint32_t instanceIx, const ICPUMorphTargets::index_t targetIx) + { + if (m_storage.initialTransforms.empty()) + return hlsl::math::linalg::diagonal(1.f); + else + return m_storage.initialTransforms[instanceIx]; + } + + // TODO: when we allow non-polygon geometries in the collection, we need to return a named pair, one BLAS for tris and one for AABBs + virtual core::smart_refctd_ptr getBLAS(const uint32_t instanceIx, const ICPUMorphTargets::index_t targetIx) = 0; + + struct SResult + { + explicit inline operator bool() const {return instances && !instances->empty();} + + core::smart_refctd_dynamic_array instances = nullptr; + bool allInstancesValid = false; + }; + // TODO: SBT stuff + inline SResult operator()() + { + // this is because most GPUs report 16M as max instance count, and there's only 24 bits in `instanceCustomIndex` + constexpr uint64_t MaxInstanceCount = 0x1u<<24; + const uint64_t instanceCount = m_storage.size(); + if (instanceCount>MaxInstanceCount) + return {}; + + std::vector instances; + instances.reserve(instanceCount*2); + bool allInstancesValid = true; + for (auto i=0u; ivalid()) + { + allInstancesValid = false; + continue; + } + const auto* const collection = targets->getTargets()[targetIx.value].geoCollection.get(); + ICPUTopLevelAccelerationStructure::StaticInstance inst; + inst.base.blas = getBLAS(i,targetIx); + if (!inst.base.blas) + { + allInstancesValid = false; + continue; + } + inst.transform = getTransform(i,targetIx); + const uint32_t customIndex = getInstanceIndex(i,targetIx); + if (customIndex>=MaxInstanceCount) + { + allInstancesValid = false; + continue; + } + inst.base.instanceCustomIndex = customIndex; + inst.base.mask = getMask(i,targetIx); + const auto targetTableOffset = m_storage.materials[i]+targets->getGeometryExclusiveCount(targetIx); + const auto sbtOffset = getSBTOffset(targetTableOffset); + if (sbtOffset>MaxInstanceCount+collection->getGeometries().size()) + { + allInstancesValid = false; + continue; + } + inst.base.instanceShaderBindingTableRecordOffset = sbtOffset; + inst.base.flags = static_cast(getInstanceFlags(i,targetIx)); + instances.emplace_back().instance = std::move(inst); + } + // TODO: adjust BLAS geometry flags according to materials set opaqueness and NO_DUPLICATE_ANY_HIT_INVOCATION_BIT + SResult retval = {.instances=core::make_refctd_dynamic_array(instanceCount),.allInstancesValid=allInstancesValid}; + std::move(instances.begin(),instances.end(),retval.instances->begin()); + return retval; + } + }; + class CDefaultTLASExporter final : public ITLASExporter + { + using triangles_t = ICPUBottomLevelAccelerationStructure::Triangles; + core::vector triangleScratch; + core::vector primitiveCountScratch; + + public: + inline CDefaultTLASExporter(const SInstanceStorage& _storage) : ITLASExporter(_storage) {} + + inline core::smart_refctd_ptr getBLAS(const uint32_t instanceIx, const ICPUMorphTargets::index_t targetIx) override + { + const auto* const targets = m_storage.morphTargets[instanceIx].get(); + const auto* const collection = targets->getTargets()[targetIx.value].geoCollection.get(); + // TODO: use emplace so erase can be faster + auto& entry = m_blasCache[collection]; + if (!entry) + { + entry = core::make_smart_refctd_ptr(); + // + const auto& geometries = collection->getGeometries(); + // deal with triangles + { + triangleScratch.resize(geometries.size()); + primitiveCountScratch.resize(geometries.size()); + const auto usedScratchEnd = ICPUGeometryCollection::CBLASExporter(geometries)(triangleScratch.begin(),primitiveCountScratch.data()); + // TODO: report some error that a there was an unsupported geometry + //triangleScratch.end()!=usedScratchEnd + const auto actualGeoCount = std::distance(triangleScratch.begin(),usedScratchEnd); + if (actualGeoCount==0) + { + m_blasCache.erase(m_blasCache.find(collection)); + return nullptr; + } + auto triGeos = core::make_refctd_dynamic_array>(actualGeoCount); + std::move(triangleScratch.begin(),usedScratchEnd,triGeos->begin()); + auto primCounts = core::make_refctd_dynamic_array>(actualGeoCount); + std::copy_n(primitiveCountScratch.data(),actualGeoCount,primCounts->data()); + entry->setGeometries(std::move(triGeos),std::move(primCounts)); + } + using build_f = ICPUBottomLevelAccelerationStructure::BUILD_FLAGS; + // no virtual callbacks because its easy to tell what geometry collection the BLAS came from by looking at the cache after the export + // TODO: Allow Update when we figure out morph targets/skinning + // TODO: GEOMETRY_TYPE_IS_AABB_BIT for non-polygon geometry collections + entry->setBuildFlags(build_f::PREFER_FAST_TRACE_BIT|build_f::ALLOW_COMPACTION_BIT); + entry->setContentHash(entry->computeContentHash()); + } + return entry; + } + + // when doing animations, it good to copy and reuse this with dummy BLASes but where content hashes are already the same + core::unordered_map> m_blasCache; + }; + // inline SInstanceStorage& getInstances() {return m_instances;} inline const SInstanceStorage& getInstances() const {return m_instances;} diff --git a/include/nbl/asset/IGeometry.h b/include/nbl/asset/IGeometry.h index 2e76e4bde7..319c98d99e 100644 --- a/include/nbl/asset/IGeometry.h +++ b/include/nbl/asset/IGeometry.h @@ -176,7 +176,7 @@ class IGeometryBase : public virtual core::IReferenceCounted struct SDataViewBase { // mostly checking validity of the format - inline operator bool() const {return format==EF_UNKNOWN || !isBlockCompressionFormat(format) && !isDepthOrStencilFormat(format);} + explicit inline operator bool() const {return format==EF_UNKNOWN || !isBlockCompressionFormat(format) && !isDepthOrStencilFormat(format);} // inline bool isFormatted() const {return format!=EF_UNKNOWN && bool(*this);} @@ -294,7 +294,7 @@ class IGeometry : public std::conditional_t() const diff --git a/include/nbl/asset/IGeometryCollection.h b/include/nbl/asset/IGeometryCollection.h index 1fac15ec17..ffebf81a22 100644 --- a/include/nbl/asset/IGeometryCollection.h +++ b/include/nbl/asset/IGeometryCollection.h @@ -34,8 +34,7 @@ class NBL_API2 IGeometryCollection : public virtual core::IReferenceCounted if (jointRedirectView.getElementCount()getJointCount()) return false; } - else - return true; + return true; } inline bool hasTransform() const {return !core::isnan(transform[0][0]);} @@ -57,7 +56,45 @@ class NBL_API2 IGeometryCollection : public virtual core::IReferenceCounted inline bool isSkinned() const {return getJointCount()>0;} // View of matrices being the inverse bind pose inline const SDataView& getInverseBindPoseView() const {return m_inverseBindPoseView;} + + + // + class IBLASExporter + { + protected: + using BLASTriangles = IBottomLevelAccelerationStructure::Triangles>; + inline IBLASExporter(const core::vector& _geoms) : m_geoms(_geoms) {} + virtual void setTransform(BLASTriangles& out, const uint32_t geomIndex) = 0; + + const core::vector& m_geoms; + + public: + template // requires (std::is_same_v()),decltype(BLASTriangles&)> && PrimCountIter is integral && OrdinalIter is also) + inline TriIter operator()(TriIter outIt, PrimCountIter outPrimCount, uint32_t* pWrittenOrdinals=nullptr) + { + for (const auto& ref : m_geoms) + { + // not a polygon geometry + const auto* geo = ref.geometry.get(); + if (geo->getPrimitiveType()!=IGeometryBase::EPrimitiveType::Polygon) + continue; + const auto ordinal = std::distance(m_geoms.data(),&ref); + const auto* polyGeo = static_cast*>(geo); + *outIt = polyGeo->exportForBLAS(); + if (outIt->vertexData[0]) + { + if (pWrittenOrdinals) + *(pWrittenOrdinals++) = ordinal; + *(outPrimCount++) = polyGeo->getPrimitiveCount(); + if (ref.hasTransform()) + setTransform(*outIt,ordinal); + outIt++; + } + } + return outIt; + } + }; protected: virtual ~IGeometryCollection() = default; @@ -94,25 +131,6 @@ class NBL_API2 IGeometryCollection : public virtual core::IReferenceCounted m_jointAABBView = std::move(jointAABBView); return true; } - - // need to be protected because of the mess around `transform` requires us to provide diffferent signatures for ICPUGeometryCollection and IGPUGeometryCollection - using BLASTriangles = IBottomLevelAccelerationStructure::Triangles>; - template// requires std::is_same_v()),decltype(BLASTriangles&)> - inline Iterator exportForBLAS(Iterator out, Callback& setTransform) const - { - for (const auto& ref : m_geometries) - { - // not a polygon geometry - const auto* geo = ref.geometry.get(); - if (geo->getPrimitiveType()==IGeometryBase::EPrimitiveType::Polygon) - continue; - const auto* polyGeo = static_cast*>(geo); - *out = polyGeo->exportForBLAS(); - if (out->vertexData[0]) - out++; - } - return out; - } // For the entire collection, as always it should NOT include any geometry which is affected by a joint. diff --git a/include/nbl/asset/IMorphTargets.h b/include/nbl/asset/IMorphTargets.h index 6f208c6f73..27c1bba5c5 100644 --- a/include/nbl/asset/IMorphTargets.h +++ b/include/nbl/asset/IMorphTargets.h @@ -22,7 +22,7 @@ class NBL_API2 IMorphTargets : public virtual core::IReferenceCounted inline index_t() = default; explicit inline index_t(uint32_t _value) : value(_value) {} - inline operator bool() const {return value!=(~0u);} + explicit inline operator bool() const {return value!=(~0u);} uint32_t value = ~0u; }; @@ -68,7 +68,7 @@ class NBL_API2 IMorphTargets : public virtual core::IReferenceCounted struct STarget { - inline operator bool() const + explicit inline operator bool() const { if (!geoCollection) return false; diff --git a/include/nbl/asset/IPolygonGeometry.h b/include/nbl/asset/IPolygonGeometry.h index d23d503de4..6597f19810 100644 --- a/include/nbl/asset/IPolygonGeometry.h +++ b/include/nbl/asset/IPolygonGeometry.h @@ -10,6 +10,7 @@ #include "nbl/asset/IAccelerationStructure.h" #include +#include namespace nbl::asset { @@ -39,41 +40,53 @@ class IPolygonGeometryBase : public virtual core::IReferenceCounted template requires (sizeof(OutT)<8 && hlsl::concepts::UnsignedIntegralScalar) struct SContext final { - // `indexOfIndex` is somewhat of a baseIndex - template - inline void streamOut(const uint32_t indexOfIndex, const Range& permutation) - { - auto& typedOut = reinterpret_cast(out); - if (indexBuffer) - switch (indexSize) + private: + // `indexOfIndex` is somewhat of a baseIndex + template + inline void impl_streamOut(const uint32_t indexOfIndex, const Range permutation) { - case 1: - for (const auto relIx : permutation) - *(typedOut++) = reinterpret_cast(indexBuffer)[indexOfIndex+relIx]; - break; - case 2: - for (const auto relIx : permutation) - *(typedOut++) = reinterpret_cast(indexBuffer)[indexOfIndex+relIx]; - break; - case 4: - for (const auto relIx : permutation) - *(typedOut++) = reinterpret_cast(indexBuffer)[indexOfIndex+relIx]; - break; - default: - assert(false); - break; + auto& typedOut = reinterpret_cast(out); + if (indexBuffer) + switch (indexSize) + { + case 1: + for (const auto relIx : permutation) + *(typedOut++) = reinterpret_cast(indexBuffer)[indexOfIndex+relIx]; + break; + case 2: + for (const auto relIx : permutation) + *(typedOut++) = reinterpret_cast(indexBuffer)[indexOfIndex+relIx]; + break; + case 4: + for (const auto relIx : permutation) + *(typedOut++) = reinterpret_cast(indexBuffer)[indexOfIndex+relIx]; + break; + default: + assert(false); + break; + } + else + for (const auto relIx : permutation) + *(typedOut++) = indexOfIndex+relIx; } - else - for (const auto relIx : permutation) - *(typedOut++) = indexOfIndex+relIx; - } - - // always the base pointer, doesn't get advanced - const void* const indexBuffer; - const uint64_t indexSize : 3; - const uint64_t beginPrimitive : 30; - const uint64_t endPrimitive : 31; - void* out; + + public: + template requires std::ranges::bidirectional_range + inline void streamOut(const uint32_t indexOfIndex, const V& permutation) + { + if (reversePrims) + impl_streamOut(indexOfIndex,permutation|std::views::reverse); + else + impl_streamOut(indexOfIndex,permutation); + } + + // always the base pointer, doesn't get advanced + const void* const indexBuffer; + const uint64_t indexSize : 3; + const uint64_t beginPrimitive : 30; + const uint64_t endPrimitive : 30; + const uint64_t reversePrims : 1 = false; + void* out; }; // could have been a static if not virtual virtual void operator()(SContext& ctx) const = 0; @@ -195,7 +208,7 @@ class IPolygonGeometry : public IIndexableGeometry, public IPolygonG struct SJointWeight { // one thing this doesn't check is whether every vertex has a weight and index - inline operator bool() const {return indices && isIntegerFormat(indices.composed.format) && weights && weights.composed.isFormatted() && indices.getElementCount()==weights.getElementCount();} + explicit inline operator bool() const {return indices && isIntegerFormat(indices.composed.format) && weights && weights.composed.isFormatted() && indices.getElementCount()==weights.getElementCount();} SDataView indices; // Assumption is that only non-zero weights are present, which is why the joints are indexed (sparseness) diff --git a/include/nbl/asset/IPreHashed.h b/include/nbl/asset/IPreHashed.h index f7252211e1..50d9e71b21 100644 --- a/include/nbl/asset/IPreHashed.h +++ b/include/nbl/asset/IPreHashed.h @@ -39,61 +39,61 @@ class IPreHashed : public IAsset discardContent_impl(); } - static inline void discardDependantsContents(const std::span roots) - { - core::vector stack; - core::unordered_set alreadyVisited; // whether we have push the node to the stack - auto push = [&stack,&alreadyVisited](IAsset* node) -> bool - { - const auto [dummy,inserted] = alreadyVisited.insert(node); - if (inserted) - stack.push_back(node); - return true; - }; - for (const auto& root : roots) - push(root); - while (!stack.empty()) - { - auto* entry = stack.back(); - stack.pop_back(); - entry->visitDependents(push); - // pre order traversal does discard - auto* isPrehashed = dynamic_cast(entry); - if (isPrehashed) - isPrehashed->discardContent(); - } - } - static inline bool anyDependantDiscardedContents(const IAsset* root) - { - core::vector stack; - core::unordered_set alreadyVisited; // whether we have push the node to the stack - bool result = false; - auto push = [&stack,&alreadyVisited,&result](const IAsset* node) -> bool - { - const auto [dummy,inserted] = alreadyVisited.insert(node); - if (inserted) + static inline void discardDependantsContents(const std::span roots) { - auto* isPrehashed = dynamic_cast(node); - if (isPrehashed && isPrehashed->missingContent()) - { - stack.clear(); - result = true; - return false; - } - stack.push_back(node); + core::vector stack; + core::unordered_set alreadyVisited; // whether we have push the node to the stack + auto push = [&stack,&alreadyVisited](IAsset* node) -> bool + { + const auto [dummy,inserted] = alreadyVisited.insert(node); + if (inserted) + stack.push_back(node); + return true; + }; + for (const auto& root : roots) + push(root); + while (!stack.empty()) + { + auto* entry = stack.back(); + stack.pop_back(); + entry->visitDependents(push); + // pre order traversal does discard + auto* isPrehashed = dynamic_cast(entry); + if (isPrehashed) + isPrehashed->discardContent(); + } + } + static inline bool anyDependantDiscardedContents(const IAsset* root) + { + core::vector stack; + core::unordered_set alreadyVisited; // whether we have push the node to the stack + bool result = false; + auto push = [&stack,&alreadyVisited,&result](const IAsset* node) -> bool + { + const auto [dummy,inserted] = alreadyVisited.insert(node); + if (inserted) + { + auto* isPrehashed = dynamic_cast(node); + if (isPrehashed && isPrehashed->missingContent()) + { + stack.clear(); + result = true; + return false; + } + stack.push_back(node); + } + return true; + }; + if (!push(root)) + return true; + while (!stack.empty()) + { + auto* entry = stack.back(); + stack.pop_back(); + entry->visitDependents(push); + } + return result; } - return true; - }; - if (!push(root)) - return true; - while (!stack.empty()) - { - auto* entry = stack.back(); - stack.pop_back(); - entry->visitDependents(push); - } - return result; - } protected: inline IPreHashed() = default; diff --git a/include/nbl/asset/IRenderpass.h b/include/nbl/asset/IRenderpass.h index ce41e35573..a46210f73e 100644 --- a/include/nbl/asset/IRenderpass.h +++ b/include/nbl/asset/IRenderpass.h @@ -302,7 +302,7 @@ class NBL_API2 IRenderpass uint32_t dependencyCount = 0u; int8_t viewMaskMSB = -1; - inline operator bool() const {return subpassCount;} + explicit inline operator bool() const {return subpassCount;} inline bool hasViewMasks() const {return viewMaskMSB>=0;} }; diff --git a/include/nbl/asset/interchange/IAssetLoader.h b/include/nbl/asset/interchange/IAssetLoader.h index a194f0e13e..5354228278 100644 --- a/include/nbl/asset/interchange/IAssetLoader.h +++ b/include/nbl/asset/interchange/IAssetLoader.h @@ -11,6 +11,7 @@ #include "nbl/system/ILogger.h" #include "nbl/asset/interchange/SAssetBundle.h" +#include "nbl/asset/utils/CGeometryCreator.h" namespace nbl::asset @@ -171,14 +172,35 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted protected: constexpr static inline bool ASSET_MUTABILITY_ON_CACHE_INSERT = true; - IAssetManager* m_manager; system::ISystem* m_system; public: - NBL_API2 IAssetLoaderOverride(IAssetManager* _manager); + struct SCreationParams + { + IAssetManager* manager = nullptr; + core::smart_refctd_ptr geoCreator = nullptr; + //core::smart_refctd_ptr polyGeoManip = nullptr; + }; + NBL_API2 IAssetLoaderOverride(SCreationParams&& params); + + // + inline IAssetManager* getManager() const {return m_creationParams.manager;} // - inline IAssetManager* getManager() const {return m_manager;} + inline CGeometryCreator* getGeometryCreator() + { + if (!m_creationParams.geoCreator) + m_creationParams.geoCreator = core::make_smart_refctd_ptr(); + return m_creationParams.geoCreator.get(); + } + + /* + inline CPolygonGeometryManipulator* getPolygonGeometryManipulator() + { + if (!m_creationParams.geoCreator) + m_creationParams.geoCreator = core::make_smart_refctd_ptr(); + return m_creationParams.polyGeoManip.get(); + }*/ //! template @@ -274,6 +296,9 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted //! After a successful load of an asset or sub-asset //TODO change name virtual void insertAssetIntoCache(SAssetBundle& asset, const std::string& supposedKey, const SAssetLoadParams& _params, const uint32_t hierarchyLevel); + + private: + SCreationParams m_creationParams; }; public: @@ -302,6 +327,7 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted protected: // accessors for loaders + // TODO: deprecate SAssetBundle interm_getAssetInHierarchy(system::IFile* _file, const std::string& _supposedFilename, const IAssetLoader::SAssetLoadParams& _params, uint32_t _hierarchyLevel, IAssetLoader::IAssetLoaderOverride* _override); SAssetBundle interm_getAssetInHierarchy(const std::string& _filename, const IAssetLoader::SAssetLoadParams& _params, uint32_t _hierarchyLevel, IAssetLoader::IAssetLoaderOverride* _override); // only the overload we use for now diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index 4a31bd6a95..c3bed0e49e 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -128,27 +128,28 @@ class NBL_API2 CPolygonGeometryManipulator const_cast(geo->getAABBStorage()) = computeAABB(geo); } - static inline core::smart_refctd_ptr createTriangleListIndexing(const ICPUPolygonGeometry* geo) + static inline core::smart_refctd_ptr createTriangleListIndexing(const ICPUPolygonGeometry* geo, const bool reverse=false, const bool recomputeHash=true) { const auto* indexing = geo->getIndexingCallback(); - if (!indexing) return nullptr; - if (indexing->degree() != 3) return nullptr; + if (!indexing || indexing->degree()!=3) // TODO: why just triangle? + return nullptr; const auto originalView = geo->getIndexView(); - const auto originalIndexSize = originalView ? originalView.composed.stride : 0; + const auto originalIndexSize = originalView ? originalView.composed.stride:0; const auto primCount = geo->getPrimitiveCount(); const auto maxIndex = geo->getPositionView().getElementCount() - 1; const uint8_t indexSize = maxIndex <= std::numeric_limits::max() ? sizeof(uint16_t) : sizeof(uint32_t); const auto outGeometry = core::move_and_static_cast(geo->clone(0u)); - if (indexing && indexing->knownTopology() == EPT_TRIANGLE_LIST) + if (indexing->knownTopology()==EPT_TRIANGLE_LIST && !reverse) return outGeometry; auto* outGeo = outGeometry.get(); const auto indexBufferUsages = [&] { - if (originalView) return originalView.src.buffer->getUsageFlags(); + if (originalView) + return originalView.src.buffer->getUsageFlags(); return core::bitflag(IBuffer::EUF_INDEX_BUFFER_BIT); }(); auto indexBuffer = ICPUBuffer::create({ primCount * indexing->degree() * indexSize, indexBufferUsages }); @@ -173,7 +174,8 @@ class NBL_API2 CPolygonGeometryManipulator .indexSize = originalIndexSize, .beginPrimitive = 0, .endPrimitive = primCount, - .out = indexBufferPtr, + .reversePrims = reverse, + .out = indexBufferPtr }; indexing->operator()(context); @@ -190,7 +192,8 @@ class NBL_API2 CPolygonGeometryManipulator .indexSize = originalIndexSize, .beginPrimitive = 0, .endPrimitive = primCount, - .out = indexBufferPtr, + .reversePrims = reverse, + .out = indexBufferPtr }; indexing->operator()(context); @@ -209,11 +212,35 @@ class NBL_API2 CPolygonGeometryManipulator outGeo->setIndexing(IPolygonGeometryBase::TriangleList()); outGeo->setIndexView(std::move(indexView)); - CGeometryManipulator::recomputeContentHash(outGeo->getIndexView()); + + if (recomputeHash) + CGeometryManipulator::recomputeContentHash(outGeo->getIndexView()); return outGeometry; } + template + requires (std::same_as, hlsl::float32_t3>) + static inline hlsl::shapes::OBB<3, hlsl::float32_t> calculateOBB(size_t vertexCount, FetchVertexFn&& fetchFn, float epsilon = 1.525e-5f) + { + return COBBGenerator::compute(vertexCount, std::forward(fetchFn), epsilon); + } + + static core::smart_refctd_ptr createUnweldedList(const ICPUPolygonGeometry* inGeo, const bool reverse=false, const bool recomputeHash=true); + + using SSNGVertexData = CSmoothNormalGenerator::VertexData; + using SSNGVxCmpFunction = CSmoothNormalGenerator::VxCmpFunction; + // NOTE: Requires unwelded mesh on input, TODO make it resillient against that (only unweld normals temporarily, maybe even avoid position unweld) + static core::smart_refctd_ptr createSmoothVertexNormal(const ICPUPolygonGeometry* inbuffer, const bool enableWelding=false, float epsilon=1.525e-5f, + SSNGVxCmpFunction vxcmp=[](const SSNGVertexData& v0, const SSNGVertexData& v1, const ICPUPolygonGeometry* buffer) + { + constexpr float cosOf45Deg = 0.70710678118f; + return hlsl::dot(v0.weightedNormal,v1.weightedNormal)*hlsl::rsqrt(hlsl::dot(v0.weightedNormal,v0.weightedNormal)*hlsl::dot(v1.weightedNormal,v1.weightedNormal)) > cosOf45Deg; + }, + const bool recomputeHash = true + ); + + //! Comparison methods enum E_ERROR_METRIC { @@ -232,26 +259,6 @@ class NBL_API2 CPolygonGeometryManipulator EEM_QUATERNION, EEM_COUNT }; - - template - requires (std::same_as, hlsl::float32_t3>) - static inline hlsl::shapes::OBB<3, hlsl::float32_t> calculateOBB(size_t vertexCount, FetchVertexFn&& fetchFn, float epsilon = 1.525e-5f) - { - return COBBGenerator::compute(vertexCount, std::forward(fetchFn), epsilon); - } - - static core::smart_refctd_ptr createUnweldedList(const ICPUPolygonGeometry* inGeo); - - using SSNGVertexData = CSmoothNormalGenerator::VertexData; - using SSNGVxCmpFunction = CSmoothNormalGenerator::VxCmpFunction; - - static core::smart_refctd_ptr createSmoothVertexNormal(const ICPUPolygonGeometry* inbuffer, bool enableWelding = false, float epsilon = 1.525e-5f, - SSNGVxCmpFunction vxcmp = [](const SSNGVertexData& v0, const SSNGVertexData& v1, const ICPUPolygonGeometry* buffer) - { - constexpr float cosOf45Deg = 0.70710678118f; - return dot(normalize(v0.weightedNormal),normalize(v1.weightedNormal)) > cosOf45Deg; - }); - #if 0 // TODO: REDO //! Struct used to pass chosen comparison method and epsilon to functions performing error metrics. /** @@ -385,24 +392,6 @@ class NBL_API2 CPolygonGeometryManipulator */ static core::smart_refctd_ptr idxBufferFromLineStripsToLines(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType); - //! Creates index buffer from input converting it to indices for triangle list primitives. Input is assumed to be indices for triangle strip. - /** - @param _input Input index buffer's data. - @param _idxCount Index count. - @param _inIndexType Type of input index buffer data (32bit or 16bit). - @param _outIndexType Type of output index buffer data (32bit or 16bit). - */ - static core::smart_refctd_ptr idxBufferFromTriangleStripsToTriangles(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType); - - //! Creates index buffer from input converting it to indices for triangle list primitives. Input is assumed to be indices for triangle fan. - /** - @param _input Input index buffer's data. - @param _idxCount Index count. - @param _inIndexType Type of input index buffer data (32bit or 16bit). - @param _outIndexType Type of output index buffer data (32bit or 16bit). - */ - static core::smart_refctd_ptr idxBufferFromTrianglesFanToTriangles(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType); - //! static inline std::array getTriangleIndices(const ICPUMeshBuffer* mb, uint32_t triangleIx) { @@ -606,40 +595,6 @@ class NBL_API2 CPolygonGeometryManipulator return aabb; } - //! Recalculates the cached bounding box of the meshbuffer - static inline void recalculateBoundingBox(ICPUMeshBuffer* meshbuffer) - { - meshbuffer->setBoundingBox(calculateBoundingBox(meshbuffer,meshbuffer->getJointAABBs())); - } - - //! Flips the direction of surfaces. - /** Changes backfacing triangles to frontfacing - triangles and vice versa. - \param mesh Mesh on which the operation is performed. */ - static void flipSurfaces(ICPUMeshBuffer* inbuffer); - - //! Creates a copy of a mesh with all vertices unwelded - /** \param mesh Input mesh - \return Mesh consisting only of unique faces. All vertices - which were previously shared are now duplicated. */ - static core::smart_refctd_ptr createMeshBufferUniquePrimitives(ICPUMeshBuffer* inbuffer, bool _makeIndexBuf = false); - - // - static core::smart_refctd_ptr calculateSmoothNormals(ICPUMeshBuffer* inbuffer, bool makeNewMesh = false, float epsilon = 1.525e-5f, - uint32_t normalAttrID = 3u, - VxCmpFunction vxcmp = [](const IMeshManipulator::SSNGVertexData& v0, const IMeshManipulator::SSNGVertexData& v1, ICPUMeshBuffer* buffer) - { - static constexpr float cosOf45Deg = 0.70710678118f; - return dot(v0.parentTriangleFaceNormal,v1.parentTriangleFaceNormal)[0] > cosOf45Deg; - }); - - - //! Creates a copy of a mesh with vertices welded - /** \param mesh Input mesh - \param errMetrics Array of size EVAI_COUNT. Describes error metric for each vertex attribute (used if attribute is of floating point or normalized type). - \param tolerance The threshold for vertex comparisons. - \return Mesh without redundant vertices. */ - static core::smart_refctd_ptr createMeshBufferWelded(ICPUMeshBuffer *inbuffer, const SErrorMetric* errMetrics, const bool& optimIndexType = true, const bool& makeNewMesh = false); //! Throws meshbuffer into full optimizing pipeline consisting of: vertices welding, z-buffer optimization, vertex cache optimization (Forsyth's algorithm), fetch optimization and attributes requantization. A new meshbuffer is created unless given meshbuffer doesn't own (getMeshDataAndFormat()==NULL) a data format descriptor. /**@return A new meshbuffer or NULL if an error occured. */ @@ -864,45 +819,6 @@ class CMeshManipulator : public IMeshManipulator return output; } - template - static inline core::smart_refctd_ptr triangleStripsToTriangles(const void* _input, uint32_t& _idxCount) - { - const auto outputSize = _idxCount = (_idxCount - 2) * 3; - - auto output = ICPUBuffer::create({ sizeof(OutType)*outputSize }); - const auto* iptr = reinterpret_cast(_input); - auto* optr = reinterpret_cast(output->getPointer()); - for (uint32_t i = 0, j = 0; i < outputSize; j += 2) - { - optr[i++] = iptr[j + 0]; - optr[i++] = iptr[j + 1]; - optr[i++] = iptr[j + 2]; - if (i == outputSize) - break; - optr[i++] = iptr[j + 2]; - optr[i++] = iptr[j + 1]; - optr[i++] = iptr[j + 3]; - } - return output; - } - - template - static inline core::smart_refctd_ptr trianglesFanToTriangles(const void* _input, uint32_t& _idxCount) - { - const auto outputSize = _idxCount = (_idxCount - 2) * 3; - - auto output = ICPUBuffer::create({ sizeof(OutType)*outputSize }); - const auto* iptr = reinterpret_cast(_input); - auto* optr = reinterpret_cast(output->getPointer()); - for (uint32_t i = 0, j = 1; i < outputSize;) - { - optr[i++] = iptr[0]; - optr[i++] = iptr[j++]; - optr[i++] = iptr[j]; - } - return output; - } - private: CQuantNormalCache quantNormalCache; CQuantQuaternionCache quantQuaternionCache; diff --git a/include/nbl/asset/utils/CVertexWelder.h b/include/nbl/asset/utils/CVertexWelder.h index 7f6065e2a9..21d4236438 100644 --- a/include/nbl/asset/utils/CVertexWelder.h +++ b/include/nbl/asset/utils/CVertexWelder.h @@ -6,350 +6,355 @@ #include "nbl/asset/utils/CPolygonGeometryManipulator.h" -namespace nbl::asset { +namespace nbl::asset +{ template concept VertexWelderAccelerationStructure = requires(T const cobj, hlsl::float32_t3 position, std::function fn) { - typename T::vertex_data_t; - { std::same_as }; - { cobj.forEachBroadphaseNeighborCandidates(position, fn) } -> std::same_as; + typename T::vertex_data_t; + { std::same_as }; + { cobj.forEachBroadphaseNeighborCandidates(position, fn) } -> std::same_as; }; -class CVertexWelder { - - public: - - class WeldPredicate - { - public: - virtual bool init(const ICPUPolygonGeometry* geom) = 0; - virtual bool operator()(const ICPUPolygonGeometry* geom, uint32_t idx1, uint32_t idx2) const = 0; - virtual ~WeldPredicate() = default; - }; - - class DefaultWeldPredicate : public WeldPredicate - { - private: - - struct SDataViewContext +class CVertexWelder +{ + public: + class WeldPredicate { - uint32_t channelCount : 3; - uint32_t byteSize: 29; + public: + virtual bool init(const ICPUPolygonGeometry* geom) = 0; + virtual bool operator()(const ICPUPolygonGeometry* geom, uint32_t idx1, uint32_t idx2) const = 0; + virtual ~WeldPredicate() = default; }; - static inline bool isIntegralElementEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, uint32_t byteSize) + class DefaultWeldPredicate final : public WeldPredicate { - const auto* basePtr = reinterpret_cast(view.getPointer()); - const auto stride = view.composed.stride; - return (memcmp(basePtr + (index1 * stride), basePtr + (index2 * stride), byteSize) == 0); - } - - static inline bool isRealElementEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, uint32_t channelCount, float epsilon) - { - hlsl::float64_t4 val1, val2; - view.decodeElement(index1, val1); - view.decodeElement(index2, val2); - for (auto channel_i = 0u; channel_i < channelCount; channel_i++) - { - const auto diff = abs(val1[channel_i] - val2[channel_i]); - if (diff > epsilon) return false; - } - return true; - } - - static inline bool isAttributeValEqual(const ICPUPolygonGeometry::SDataView& view, const SDataViewContext& context, uint32_t index1, uint32_t index2, float epsilon) - { - if (context.byteSize == 0) return true; - - assert(view); - assert(view.composed.isFormatted()); - assert(IGeometryBase::getMatchingAABBFormat(view.composed.format) == view.composed.rangeFormat); - switch (view.composed.rangeFormat) - { - case IGeometryBase::EAABBFormat::U64: - case IGeometryBase::EAABBFormat::U32: - case IGeometryBase::EAABBFormat::S64: - case IGeometryBase::EAABBFormat::S32: - { - return isIntegralElementEqual(view, index1, index2, context.byteSize); - } - default: - { - return isRealElementEqual(view, index1, index2, context.channelCount, epsilon); - } - } - return true; - } - - static inline bool isAttributeDirEqual(const ICPUPolygonGeometry::SDataView& view, const SDataViewContext& context, uint32_t index1, uint32_t index2, float epsilon) - { - if (context.byteSize == 0) return true; - - assert(view); - assert(view.composed.isFormatted()); - assert(IGeometryBase::getMatchingAABBFormat(view.composed.format) == view.composed.rangeFormat); - switch (view.composed.rangeFormat) - { - case IGeometryBase::EAABBFormat::U64: - case IGeometryBase::EAABBFormat::U32: - case IGeometryBase::EAABBFormat::S64: - case IGeometryBase::EAABBFormat::S32: - { - return isIntegralElementEqual(view, index1, index2, context.byteSize); - } - default: - { - if (context.channelCount != 3) - return isRealElementEqual(view, index1, index2, context.channelCount, epsilon); - - hlsl::float64_t4 val1, val2; - view.decodeElement(index1, val1); - view.decodeElement(index2, val2); - return (1.0 - hlsl::dot(val1, val2)) < epsilon; - } - } - } - - float m_epsilon; - - SDataViewContext m_positionViewContext; - SDataViewContext m_normalViewContext; - - struct SJointViewContext - { - SDataViewContext indices; - SDataViewContext weights; + struct SDataViewContext + { + uint32_t channelCount : 3; + uint32_t byteSize: 29; + }; + + static inline bool isIntegralElementEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, uint32_t byteSize) + { + const auto* basePtr = reinterpret_cast(view.getPointer()); + const auto stride = view.composed.stride; + return (memcmp(basePtr + (index1 * stride), basePtr + (index2 * stride), byteSize) == 0); + } + + static inline bool isRealElementEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, uint32_t channelCount, float epsilon) + { + hlsl::float64_t4 val1, val2; + view.decodeElement(index1, val1); + view.decodeElement(index2, val2); + for (auto channel_i = 0u; channel_i < channelCount; channel_i++) + { + const auto diff = abs(val1[channel_i] - val2[channel_i]); + if (diff > epsilon) return false; + } + return true; + } + + static inline bool isAttributeValEqual(const ICPUPolygonGeometry::SDataView& view, const SDataViewContext& context, uint32_t index1, uint32_t index2, float epsilon) + { + if (context.byteSize == 0) return true; + + assert(view); + assert(view.composed.isFormatted()); + assert(IGeometryBase::getMatchingAABBFormat(view.composed.format) == view.composed.rangeFormat); + switch (view.composed.rangeFormat) + { + case IGeometryBase::EAABBFormat::U64: + case IGeometryBase::EAABBFormat::U32: + case IGeometryBase::EAABBFormat::S64: + case IGeometryBase::EAABBFormat::S32: + { + return isIntegralElementEqual(view, index1, index2, context.byteSize); + } + default: + { + return isRealElementEqual(view, index1, index2, context.channelCount, epsilon); + } + } + return true; + } + + static inline bool isAttributeDirEqual(const ICPUPolygonGeometry::SDataView& view, const SDataViewContext& context, uint32_t index1, uint32_t index2, float epsilon) + { + if (context.byteSize == 0) return true; + + assert(view); + assert(view.composed.isFormatted()); + assert(IGeometryBase::getMatchingAABBFormat(view.composed.format) == view.composed.rangeFormat); + switch (view.composed.rangeFormat) + { + case IGeometryBase::EAABBFormat::U64: + case IGeometryBase::EAABBFormat::U32: + case IGeometryBase::EAABBFormat::S64: + case IGeometryBase::EAABBFormat::S32: + { + return isIntegralElementEqual(view, index1, index2, context.byteSize); + } + default: + { + if (context.channelCount != 3) + return isRealElementEqual(view, index1, index2, context.channelCount, epsilon); + + hlsl::float64_t4 val1, val2; + view.decodeElement(index1, val1); + view.decodeElement(index2, val2); + return (1.0 - hlsl::dot(val1, val2)) < epsilon; + } + } + } + + float m_epsilon; + + SDataViewContext m_positionViewContext; + SDataViewContext m_normalViewContext; + + struct SJointViewContext + { + SDataViewContext indices; + SDataViewContext weights; + }; + core::vector m_jointViewContexts; + + core::vector m_auxAttributeViewContexts; + + public: + inline DefaultWeldPredicate(float epsilon) : m_epsilon(epsilon) {} + + inline bool init(const ICPUPolygonGeometry* polygon) override + { + auto isViewFormatValid = [](const ICPUPolygonGeometry::SDataView& view) + { + return view.composed.isFormatted() && IGeometryBase::getMatchingAABBFormat(view.composed.format) == view.composed.rangeFormat; + }; + auto getViewContext = [](const ICPUPolygonGeometry::SDataView& view) -> SDataViewContext + { + if (!view) + { + return { + .channelCount = 0, + .byteSize = 0 + }; + } + return { + .channelCount = getFormatChannelCount(view.composed.format), + .byteSize = getTexelOrBlockBytesize(view.composed.format) + }; + }; + + if (!polygon->valid()) + return false; + + const auto& positionView = polygon->getPositionView(); + if (IGeometryBase::getMatchingAABBFormat(positionView.composed.format) != positionView.composed.rangeFormat) + return false; + m_positionViewContext = { + .channelCount = getFormatChannelCount(positionView.composed.format), + .byteSize = getTexelOrBlockBytesize(positionView.composed.format), + }; + + const auto& normalView = polygon->getNormalView(); + if (normalView && !isViewFormatValid(normalView)) + return false; + m_normalViewContext = getViewContext(normalView); + + m_jointViewContexts.reserve(polygon->getJointWeightViews().size()); + for (const auto& jointWeightView : polygon->getJointWeightViews()) + { + if (jointWeightView.indices && !isViewFormatValid(jointWeightView.indices)) + return false; + if (jointWeightView.weights && !isViewFormatValid(jointWeightView.weights)) + return false; + m_jointViewContexts.push_back({ + .indices = getViewContext(jointWeightView.indices), + .weights = getViewContext(jointWeightView.weights), + }); + } + + m_auxAttributeViewContexts.reserve(polygon->getAuxAttributeViews().size()); + for (const auto& auxAttributeView : polygon->getAuxAttributeViews()) + { + if (auxAttributeView && !isViewFormatValid(auxAttributeView)) + return false; + m_auxAttributeViewContexts.push_back(getViewContext(auxAttributeView)); + } + + return true; + } + + inline bool operator()(const ICPUPolygonGeometry* polygon, uint32_t index1, uint32_t index2) const override + { + if (!isAttributeValEqual(polygon->getPositionView(), m_positionViewContext, index1, index2, m_epsilon)) + return false; + + const auto& normalView = polygon->getNormalView(); + if (!isAttributeDirEqual(normalView, m_normalViewContext, index1, index2, m_epsilon)) + return false; + + for (uint64_t joint_i = 0; joint_i < polygon->getJointWeightViews().size(); joint_i++) + { + const auto& jointWeightView = polygon->getJointWeightViews()[joint_i]; + if (!isAttributeValEqual(jointWeightView.indices, m_jointViewContexts[joint_i].indices, index1, index2, m_epsilon)) + return false; + if (!isAttributeValEqual(jointWeightView.weights, m_jointViewContexts[joint_i].weights, index1, index2, m_epsilon)) + return false; + } + + const auto& auxAttrViews = polygon->getAuxAttributeViews(); + for (uint64_t aux_i = 0; aux_i < auxAttrViews.size(); aux_i++) + if (!isAttributeValEqual(auxAttrViews[aux_i], m_auxAttributeViewContexts[aux_i], index1, index2, m_epsilon)) + return false; + + return true; + } + + inline ~DefaultWeldPredicate() override = default; }; - core::vector m_jointViewContexts; - - core::vector m_auxAttributeViewContexts; - - public: - - inline DefaultWeldPredicate(float epsilon) : m_epsilon(epsilon) {} - inline bool init(const ICPUPolygonGeometry* polygon) override + template + static inline core::smart_refctd_ptr weldVertices(const ICPUPolygonGeometry* polygon, const AccelStructureT& as, WeldPredicate& shouldWeldFn, const bool recomputeHash=true) { - auto isViewFormatValid = [](const ICPUPolygonGeometry::SDataView& view) - { - return view.composed.isFormatted() && IGeometryBase::getMatchingAABBFormat(view.composed.format) == view.composed.rangeFormat; - }; - auto getViewContext = [](const ICPUPolygonGeometry::SDataView& view) -> SDataViewContext - { - if (!view) - { - return { - .channelCount = 0, - .byteSize = 0 + auto outPolygon = core::move_and_static_cast(polygon->clone(0u)); + + if (!shouldWeldFn.init(polygon)) + return nullptr; + + const auto& positionView = polygon->getPositionView(); + const auto vertexCount = positionView.getElementCount(); + + constexpr auto INVALID_INDEX = std::numeric_limits::max(); + core::vector remappedVertexIndexes(vertexCount); + + uint32_t maxRemappedIndex = 0; + // iterate by index, so that we always use the smallest index when multiple vertexes can be welded together + for (uint32_t index = 0; index < vertexCount; index++) + { + hlsl::float32_t3 position; + positionView.decodeElement(index, position); + auto remappedVertexIndex = INVALID_INDEX; + as.forEachBroadphaseNeighborCandidates(position, [&](const typename AccelStructureT::vertex_data_t& candidate) { + const auto neighborRemappedIndex = remappedVertexIndexes[candidate.index]; + // make sure we can only map higher indices to lower indices to disallow loops + if (candidate.indexgetIndexView(); + const auto remappedRangeFormat = (maxRemappedIndex - 1) < static_cast(std::numeric_limits::max()) ? IGeometryBase::EAABBFormat::U16 : IGeometryBase::EAABBFormat::U32; + + auto createRemappedIndexView = [&](size_t indexCount) { + const uint32_t indexSize = remappedRangeFormat == IGeometryBase::EAABBFormat::U16 ? sizeof(uint16_t) : sizeof(uint32_t); + auto remappedIndexBuffer = ICPUBuffer::create({indexSize * indexCount, IBuffer::EUF_INDEX_BUFFER_BIT}); + auto remappedIndexView = ICPUPolygonGeometry::SDataView{ + .composed = { + .stride = indexSize, + .rangeFormat = remappedRangeFormat + }, + .src = { + .offset = 0, + .size = remappedIndexBuffer->getSize(), + .buffer = std::move(remappedIndexBuffer) + } + }; + + if (remappedRangeFormat == IGeometryBase::EAABBFormat::U16) + { + hlsl::shapes::AABB<4, uint16_t> aabb; + aabb.minVx[0] = 0; + aabb.maxVx[0] = maxRemappedIndex; + remappedIndexView.composed.encodedDataRange.u16 = aabb; + remappedIndexView.composed.format = EF_R16_UINT; + } + else if (remappedRangeFormat == IGeometryBase::EAABBFormat::U32) { + hlsl::shapes::AABB<4, uint32_t> aabb; + aabb.minVx[0] = 0; + aabb.maxVx[0] = maxRemappedIndex; + remappedIndexView.composed.encodedDataRange.u32 = aabb; + remappedIndexView.composed.format = EF_R32_UINT; + } + + return remappedIndexView; }; - } - return { - .channelCount = getFormatChannelCount(view.composed.format), - .byteSize = getTexelOrBlockBytesize(view.composed.format) - }; - }; - - if (!polygon->valid()) return false; - - const auto& positionView = polygon->getPositionView(); - if (IGeometryBase::getMatchingAABBFormat(positionView.composed.format) != positionView.composed.rangeFormat) return false; - m_positionViewContext = { - .channelCount = getFormatChannelCount(positionView.composed.format), - .byteSize = getTexelOrBlockBytesize(positionView.composed.format), - }; - - const auto& normalView = polygon->getNormalView(); - if (normalView && !isViewFormatValid(normalView)) return false; - m_normalViewContext = getViewContext(normalView); - - m_jointViewContexts.reserve(polygon->getJointWeightViews().size()); - for (const auto& jointWeightView : polygon->getJointWeightViews()) - { - if (jointWeightView.indices && !isViewFormatValid(jointWeightView.indices)) return false; - if (jointWeightView.weights && !isViewFormatValid(jointWeightView.weights)) return false; - m_jointViewContexts.push_back({ - .indices = getViewContext(jointWeightView.indices), - .weights = getViewContext(jointWeightView.weights), - }); - } - - m_auxAttributeViewContexts.reserve(polygon->getAuxAttributeViews().size()); - for (const auto& auxAttributeView : polygon->getAuxAttributeViews()) - { - if (auxAttributeView && !isViewFormatValid(auxAttributeView)) return false; - m_auxAttributeViewContexts.push_back(getViewContext(auxAttributeView)); - } - - return true; - } - inline bool operator()(const ICPUPolygonGeometry* polygon, uint32_t index1, uint32_t index2) const override - { - if (!isAttributeValEqual(polygon->getPositionView(), m_positionViewContext, index1, index2, m_epsilon)) - return false; - - const auto& normalView = polygon->getNormalView(); - if (!isAttributeDirEqual(normalView, m_normalViewContext, index1, index2, m_epsilon)) - return false; - - for (uint64_t joint_i = 0; joint_i < polygon->getJointWeightViews().size(); joint_i++) - { - const auto& jointWeightView = polygon->getJointWeightViews()[joint_i]; - if (!isAttributeValEqual(jointWeightView.indices, m_jointViewContexts[joint_i].indices, index1, index2, m_epsilon)) return false; - if (!isAttributeValEqual(jointWeightView.weights, m_jointViewContexts[joint_i].weights, index1, index2, m_epsilon)) return false; - } - - const auto& auxAttrViews = polygon->getAuxAttributeViews(); - for (uint64_t aux_i = 0; aux_i < auxAttrViews.size(); aux_i++) - { - if (!isAttributeValEqual(auxAttrViews[aux_i], m_auxAttributeViewContexts[aux_i], index1, index2, m_epsilon)) return false; - } - - return true; + if (indexView) + { + auto remappedIndexView = createRemappedIndexView(polygon->getIndexCount()); + auto remappedIndexes = [&]() -> bool { + auto* remappedIndexPtr = reinterpret_cast(remappedIndexView.getPointer()); + for (uint32_t index_i = 0; index_i < polygon->getIndexCount(); index_i++) + { + hlsl::vector index; + indexView.decodeElement>(index_i, index); + const auto remappedIndex = remappedVertexIndexes[index.x]; + remappedIndexPtr[index_i] = static_cast(remappedIndex); + if (remappedIndex == INVALID_INDEX) return false; + } + return true; + }; + + if (remappedRangeFormat == IGeometryBase::EAABBFormat::U16) { + if (!remappedIndexes.template operator()()) return nullptr; + } + else if (remappedRangeFormat == IGeometryBase::EAABBFormat::U32) { + if (!remappedIndexes.template operator()()) return nullptr; + } + + outPolygon->setIndexView(std::move(remappedIndexView)); + + } else + { + auto remappedIndexView = createRemappedIndexView(remappedVertexIndexes.size()); + + auto fillRemappedIndex = [&](){ + auto remappedIndexBufferPtr = reinterpret_cast(remappedIndexView.getPointer()); + for (uint32_t index_i = 0; index_i < remappedVertexIndexes.size(); index_i++) + { + if (remappedVertexIndexes[index_i] == INVALID_INDEX) return false; + remappedIndexBufferPtr[index_i] = remappedVertexIndexes[index_i]; + } + return true; + }; + if (remappedRangeFormat == IGeometryBase::EAABBFormat::U16) { + if (!fillRemappedIndex.template operator()()) return nullptr; + } + else if (remappedRangeFormat == IGeometryBase::EAABBFormat::U32) { + if (!fillRemappedIndex.template operator()()) return nullptr; + } + + outPolygon->setIndexView(std::move(remappedIndexView)); + } + + if (recomputeHash) + CGeometryManipulator::recomputeContentHash(outPolygon->getIndexView()); + return outPolygon; } - - inline ~DefaultWeldPredicate() override = default; - - }; - - template - static inline core::smart_refctd_ptr weldVertices(const ICPUPolygonGeometry* polygon, const AccelStructureT& as, WeldPredicate& shouldWeldFn) { - auto outPolygon = core::move_and_static_cast(polygon->clone(0u)); - - if (!shouldWeldFn.init(polygon)) return nullptr; - - const auto& positionView = polygon->getPositionView(); - const auto vertexCount = positionView.getElementCount(); - - constexpr auto INVALID_INDEX = std::numeric_limits::max(); - core::vector remappedVertexIndexes(vertexCount); - - uint32_t maxRemappedIndex = 0; - // iterate by index, so that we always use the smallest index when multiple vertexes can be welded together - for (uint32_t index = 0; index < vertexCount; index++) - { - hlsl::float32_t3 position; - positionView.decodeElement(index, position); - auto remappedVertexIndex = INVALID_INDEX; - as.forEachBroadphaseNeighborCandidates(position, [&](const typename AccelStructureT::vertex_data_t& candidate) { - const auto neighborRemappedIndex = remappedVertexIndexes[candidate.index]; - // make sure we can only map higher indices to lower indices to disallow loops - if (candidate.indexgetIndexView(); - const auto remappedRangeFormat = (maxRemappedIndex - 1) < static_cast(std::numeric_limits::max()) ? IGeometryBase::EAABBFormat::U16 : IGeometryBase::EAABBFormat::U32; - - auto createRemappedIndexView = [&](size_t indexCount) { - const uint32_t indexSize = remappedRangeFormat == IGeometryBase::EAABBFormat::U16 ? sizeof(uint16_t) : sizeof(uint32_t); - auto remappedIndexBuffer = ICPUBuffer::create({indexSize * indexCount, IBuffer::EUF_INDEX_BUFFER_BIT}); - auto remappedIndexView = ICPUPolygonGeometry::SDataView{ - .composed = { - .stride = indexSize, - .rangeFormat = remappedRangeFormat - }, - .src = { - .offset = 0, - .size = remappedIndexBuffer->getSize(), - .buffer = std::move(remappedIndexBuffer) - } - }; - - if (remappedRangeFormat == IGeometryBase::EAABBFormat::U16) - { - hlsl::shapes::AABB<4, uint16_t> aabb; - aabb.minVx[0] = 0; - aabb.maxVx[0] = maxRemappedIndex; - remappedIndexView.composed.encodedDataRange.u16 = aabb; - remappedIndexView.composed.format = EF_R16_UINT; - } - else if (remappedRangeFormat == IGeometryBase::EAABBFormat::U32) { - hlsl::shapes::AABB<4, uint32_t> aabb; - aabb.minVx[0] = 0; - aabb.maxVx[0] = maxRemappedIndex; - remappedIndexView.composed.encodedDataRange.u32 = aabb; - remappedIndexView.composed.format = EF_R32_UINT; - } - - return remappedIndexView; - }; - - - if (indexView) - { - auto remappedIndexView = createRemappedIndexView(polygon->getIndexCount()); - auto remappedIndexes = [&]() -> bool { - auto* remappedIndexPtr = reinterpret_cast(remappedIndexView.getPointer()); - for (uint32_t index_i = 0; index_i < polygon->getIndexCount(); index_i++) - { - hlsl::vector index; - indexView.decodeElement>(index_i, index); - const auto remappedIndex = remappedVertexIndexes[index.x]; - remappedIndexPtr[index_i] = static_cast(remappedIndex); - if (remappedIndex == INVALID_INDEX) return false; - } - return true; - }; - - if (remappedRangeFormat == IGeometryBase::EAABBFormat::U16) { - if (!remappedIndexes.template operator()()) return nullptr; - } - else if (remappedRangeFormat == IGeometryBase::EAABBFormat::U32) { - if (!remappedIndexes.template operator()()) return nullptr; - } - - outPolygon->setIndexView(std::move(remappedIndexView)); - - } else - { - auto remappedIndexView = createRemappedIndexView(remappedVertexIndexes.size()); - - auto fillRemappedIndex = [&](){ - auto remappedIndexBufferPtr = reinterpret_cast(remappedIndexView.getPointer()); - for (uint32_t index_i = 0; index_i < remappedVertexIndexes.size(); index_i++) - { - if (remappedVertexIndexes[index_i] == INVALID_INDEX) return false; - remappedIndexBufferPtr[index_i] = remappedVertexIndexes[index_i]; - } - return true; - }; - if (remappedRangeFormat == IGeometryBase::EAABBFormat::U16) { - if (!fillRemappedIndex.template operator()()) return nullptr; - } - else if (remappedRangeFormat == IGeometryBase::EAABBFormat::U32) { - if (!fillRemappedIndex.template operator()()) return nullptr; - } - - outPolygon->setIndexView(std::move(remappedIndexView)); - } - - CGeometryManipulator::recomputeContentHash(outPolygon->getIndexView()); - return outPolygon; - } }; } diff --git a/include/nbl/asset/utils/IShaderCompiler.h b/include/nbl/asset/utils/IShaderCompiler.h index 9fd4eee833..0c24c2b1d0 100644 --- a/include/nbl/asset/utils/IShaderCompiler.h +++ b/include/nbl/asset/utils/IShaderCompiler.h @@ -12,9 +12,7 @@ #include "nbl/asset/IShader.h" #include "nbl/asset/utils/ISPIRVOptimizer.h" - -// Less leakage than "nlohmann/json.hpp" only forward declarations -#include "nlohmann/json_fwd.hpp" +#include "nbl/system/json.h" #include "nbl/builtin/hlsl/enums.hlsl" @@ -111,11 +109,10 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted // struct SMacroDefinition { - friend void to_json(nlohmann::json&, const SMacroDefinition&); - friend void from_json(const nlohmann::json&, SMacroDefinition&); - std::string_view identifier; std::string_view definition; + + friend struct system::json::adl_serializer; }; // @@ -222,9 +219,8 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted inline bool isStandardInclude() const { return standardInclude; } private: - friend void to_json(nlohmann::json& j, const SEntry::SPreprocessingDependency& dependency); - friend void from_json(const nlohmann::json& j, SEntry::SPreprocessingDependency& dependency); friend class CCache; + friend struct system::json::adl_serializer; // path or identifier system::path requestingSourceDir = ""; @@ -258,8 +254,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted friend class SCompilerArgs; friend class SEntry; friend class CCache; - friend void to_json(nlohmann::json&, const SPreprocessorArgs&); - friend void from_json(const nlohmann::json&, SPreprocessorArgs&); + friend struct system::json::adl_serializer; // Default constructor needed for json serialization of SCompilerArgs SPreprocessorArgs() {}; @@ -301,8 +296,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted private: friend class SEntry; friend class CCache; - friend void to_json(nlohmann::json&, const SCompilerArgs&); - friend void from_json(const nlohmann::json&, SCompilerArgs&); + friend struct system::json::adl_serializer; // Default constructor needed for json serialization of SEntry SCompilerArgs() {} diff --git a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl index 1887f4b51f..a34acf2507 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl @@ -29,7 +29,7 @@ struct Promote && (concepts array_set setter; To output; [[unroll]] - for (int i = 0; i < vector_traits::Dimension; ++i) + for (uint32_t i=0; i::Dimension; ++i) setter(output, i, v); return output; } diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl index af23d6f07d..0663d89c0b 100644 --- a/include/nbl/builtin/hlsl/ieee754.hlsl +++ b/include/nbl/builtin/hlsl/ieee754.hlsl @@ -261,8 +261,8 @@ struct flipSignIfRHSNegative_helper -NBL_CONSTEXPR_FUNC T flipSign(T val, U flip) +template +NBL_CONSTEXPR_FUNC T flipSign(T val, U flip = true) { return impl::flipSign_helper::__call(val, flip); } diff --git a/include/nbl/builtin/hlsl/math/linalg/basic.hlsl b/include/nbl/builtin/hlsl/math/linalg/basic.hlsl index 15b9014998..64f923a521 100644 --- a/include/nbl/builtin/hlsl/math/linalg/basic.hlsl +++ b/include/nbl/builtin/hlsl/math/linalg/basic.hlsl @@ -42,7 +42,7 @@ MatT identity() } template truncate(const NBL_CONST_REF_ARG(matrix) inMatrix) +inline matrix truncate(NBL_CONST_REF_ARG(matrix) inMatrix) { matrix retval; diff --git a/include/nbl/builtin/hlsl/sampling/bilinear.hlsl b/include/nbl/builtin/hlsl/sampling/bilinear.hlsl index 2b6282eb8d..af84e49544 100644 --- a/include/nbl/builtin/hlsl/sampling/bilinear.hlsl +++ b/include/nbl/builtin/hlsl/sampling/bilinear.hlsl @@ -19,50 +19,69 @@ namespace sampling template struct Bilinear { - using scalar_type = T; - using vector2_type = vector; - using vector3_type = vector; - using vector4_type = vector; - - // BijectiveSampler concept types - using domain_type = vector2_type; - using codomain_type = vector2_type; - using density_type = scalar_type; - using sample_type = codomain_and_rcpPdf; - using inverse_sample_type = domain_and_rcpPdf; - - static Bilinear create(const vector4_type bilinearCoeffs) - { - Bilinear retval; - retval.bilinearCoeffs = bilinearCoeffs; - retval.twiceAreasUnderXCurve = vector2_type(bilinearCoeffs[0] + bilinearCoeffs[1], bilinearCoeffs[2] + bilinearCoeffs[3]); - return retval; - } - - vector2_type generate(NBL_REF_ARG(scalar_type) rcpPdf, const vector2_type _u) - { - vector2_type u; - Linear lineary = Linear::create(twiceAreasUnderXCurve); - u.y = lineary.generate(_u.y); - - const vector2_type ySliceEndPoints = vector2_type(nbl::hlsl::mix(bilinearCoeffs[0], bilinearCoeffs[2], u.y), nbl::hlsl::mix(bilinearCoeffs[1], bilinearCoeffs[3], u.y)); - Linear linearx = Linear::create(ySliceEndPoints); - u.x = linearx.generate(_u.x); - - rcpPdf = (twiceAreasUnderXCurve[0] + twiceAreasUnderXCurve[1]) / (4.0 * nbl::hlsl::mix(ySliceEndPoints[0], ySliceEndPoints[1], u.x)); - - return u; - } - - scalar_type pdf(const vector2_type u) - { - return 4.0 * nbl::hlsl::mix(nbl::hlsl::mix(bilinearCoeffs[0], bilinearCoeffs[1], u.x), nbl::hlsl::mix(bilinearCoeffs[2], bilinearCoeffs[3], u.x), u.y) / (bilinearCoeffs[0] + bilinearCoeffs[1] + bilinearCoeffs[2] + bilinearCoeffs[3]); - } - - // unit square: x0y0 x1y0 - // x0y1 x1y1 - vector4_type bilinearCoeffs; // (x0y0, x0y1, x1y0, x1y1) - vector2_type twiceAreasUnderXCurve; + using scalar_type = T; + using vector2_type = vector; + using vector3_type = vector; + using vector4_type = vector; + + // BijectiveSampler concept types + using domain_type = vector2_type; + using codomain_type = vector2_type; + using density_type = scalar_type; + using sample_type = codomain_and_rcpPdf; + using inverse_sample_type = domain_and_rcpPdf; + + static Bilinear create(const vector4_type bilinearCoeffs) + { + Bilinear retval; + retval.bilinearCoeffs = bilinearCoeffs; + retval.bilinearCoeffDiffs = vector2_type(bilinearCoeffs[2]-bilinearCoeffs[0], bilinearCoeffs[3]-bilinearCoeffs[1]); + vector2_type twiceAreasUnderXCurve = vector2_type(bilinearCoeffs[0] + bilinearCoeffs[1], bilinearCoeffs[2] + bilinearCoeffs[3]); + retval.twiceAreasUnderXCurveSumOverFour = scalar_type(4.0) / (twiceAreasUnderXCurve[0] + twiceAreasUnderXCurve[1]); + retval.lineary = Linear::create(twiceAreasUnderXCurve); + return retval; + } + + vector2_type generate(const vector2_type u) + { + vector2_type p; + p.y = lineary.generate(u.y); + + const vector2_type ySliceEndPoints = vector2_type(bilinearCoeffs[0] + p.y * bilinearCoeffDiffs[0], bilinearCoeffs[1] + p.y * bilinearCoeffDiffs[1]); + Linear linearx = Linear::create(ySliceEndPoints); + p.x = linearx.generate(u.x); + + return p; + } + + vector2_type generateInverse(const vector2_type p) + { + vector2_type u; + const vector2_type ySliceEndPoints = vector2_type(bilinearCoeffs[0] + p.y * bilinearCoeffDiffs[0], bilinearCoeffs[1] + p.y * bilinearCoeffDiffs[1]); + Linear linearx = Linear::create(ySliceEndPoints); + u.x = linearx.generateInverse(p.x); + u.y = lineary.generateInverse(p.y); + + return u; + } + + scalar_type forwardPdf(const vector2_type u) + { + return backwardPdf(generate(u)); + } + + scalar_type backwardPdf(const vector2_type p) + { + const vector2_type ySliceEndPoints = vector2_type(bilinearCoeffs[0] + p.y * bilinearCoeffDiffs[0], bilinearCoeffs[1] + p.y * bilinearCoeffDiffs[1]); + return nbl::hlsl::mix(ySliceEndPoints[0], ySliceEndPoints[1], p.x) * fourOverTwiceAreasUnderXCurveSum; + } + + // unit square: x0y0 x1y0 + // x0y1 x1y1 + vector4_type bilinearCoeffs; // (x0y0, x0y1, x1y0, x1y1) + vector2_type bilinearCoeffDiffs; + vector2_type fourOverTwiceAreasUnderXCurveSum; + Linear lineary; }; } // namespace sampling diff --git a/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl b/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl index 4dd774c8ba..01d6143de5 100644 --- a/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl +++ b/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl @@ -19,23 +19,34 @@ namespace sampling template) struct BoxMullerTransform { - using scalar_type = T; - using vector2_type = vector; - - // ResamplableSampler concept types - using domain_type = vector2_type; - using codomain_type = vector2_type; - using density_type = scalar_type; - using sample_type = codomain_and_rcpPdf; - - vector2_type operator()(const vector2_type xi) - { - scalar_type sinPhi, cosPhi; - math::sincos(2.0 * numbers::pi * xi.y - numbers::pi, sinPhi, cosPhi); - return vector2_type(cosPhi, sinPhi) * nbl::hlsl::sqrt(-2.0 * nbl::hlsl::log(xi.x)) * stddev; - } - - T stddev; + using scalar_type = T; + using vector2_type = vector; + + // ResamplableSampler concept types + using domain_type = vector2_type; + using codomain_type = vector2_type; + using density_type = scalar_type; + using sample_type = codomain_and_rcpPdf; + + vector2_type generate(const vector2_type u) + { + scalar_type sinPhi, cosPhi; + math::sincos(2.0 * numbers::pi * xi.y - numbers::pi, sinPhi, cosPhi); + return vector2_type(cosPhi, sinPhi) * nbl::hlsl::sqrt(-2.0 * nbl::hlsl::log(xi.x)) * stddev; + } + + vector2_type forwardPdf(const vector2_type u) + { + return backwardPdf(generate(u)); + } + + vector2_type backwardPdf(const vector2_type outPos) + { + const vector2_type outPos2 = outPos * outPos; + return vector2_type(nbl::hlsl::exp(scalar_type(-0.5) * (outPos2.x + outPos2.y)), numbers::pi * scalar_type(0.5) * hlsl::atan2(outPos.y, outPos.x)); + } + + T stddev; }; } // namespace sampling diff --git a/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl b/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl index 4d80e14861..342b754c5a 100644 --- a/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl +++ b/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl @@ -46,6 +46,43 @@ vector concentricMapping(const vector _u) return p; } +template +vector invertConcentricMapping(const vector p) +{ + T theta = hlsl::atan2(p.y, p.x); // -pi -> pi + T r = hlsl::sqrt(p.x * p.x + p.y * p.y); + const T PiOver4 = T(0.25) * numbers::pi; + + vector u; + // TODO: should reduce branching somehow? + if (hlsl::abs(theta) < PiOver4 || hlsl::abs(theta) > 3 * PiOver4) + { + r = ieee754::copySign(r, p.x); + u.x = r; + if (p.x < 0) { + if (p.y < 0) { + u.y = (numbers::pi + theta) * r / PiOver4; + } else { + u.y = (theta - numbers::pi) * r / PiOver4; + } + } else { + u.y = (theta * r) / PiOver4; + } + } + else + { + r = ieee754::copySign(r, p.y); + u.y = r; + if (p.y < 0) { + u.x = -(T(0.5) * numbers::pi + theta) * r / PiOver4; + } else { + u.x = (T(0.5) * numbers::pi - theta) * r / PiOver4; + } + } + + return (u + hlsl::promote >(1.0)) * T(0.5); +} + } // namespace sampling } // namespace hlsl } // namespace nbl diff --git a/include/nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl b/include/nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl index c65a688eb3..ed6c574284 100644 --- a/include/nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl +++ b/include/nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl @@ -31,18 +31,43 @@ struct ProjectedHemisphere using sample_type = codomain_and_rcpPdf; using inverse_sample_type = domain_and_rcpPdf; - static vector_t3 generate(const vector_t2 _sample) + static vector_t3 __generate(const vector_t2 _sample) { vector_t2 p = concentricMapping(_sample * T(0.99999) + T(0.000005)); T z = hlsl::sqrt(hlsl::max(T(0.0), T(1.0) - p.x * p.x - p.y * p.y)); return vector_t3(p.x, p.y, z); } - static T pdf(const T L_z) + vector_t3 generate(const vector_t2 _sample) + { + return __generate(_sample); + } + + static vector_t2 __generateInverse(const vector_t3 L) + { + return invertConcentricMapping(L.xy); + } + + vector_t2 generateInverse(const vector_t3 L) + { + return __generateInverse(L); + } + + static T __pdf(const T L_z) { return L_z * numbers::inv_pi; } + scalar_type forwardPdf(const vector_t2 _sample) + { + return __pdf(__generate(_sample).z); + } + + scalar_type backwardPdf(const vector_t3 L) + { + return __pdf(L.z); + } + template > static ::nbl::hlsl::sampling::quotient_and_pdf quotient_and_pdf(const T L) { @@ -71,9 +96,9 @@ struct ProjectedSphere using sample_type = codomain_and_rcpPdf; using inverse_sample_type = domain_and_rcpPdf; - static vector_t3 generate(NBL_REF_ARG(vector_t3) _sample) + static vector_t3 __generate(NBL_REF_ARG(vector_t3) _sample) { - vector_t3 retval = hemisphere_t::generate(_sample.xy); + vector_t3 retval = hemisphere_t::__generate(_sample.xy); const bool chooseLower = _sample.z > T(0.5); retval.z = chooseLower ? (-retval.z) : retval.z; if (chooseLower) @@ -82,9 +107,36 @@ struct ProjectedSphere return retval; } - static T pdf(T L_z) + vector_t3 generate(NBL_REF_ARG(vector_t3) _sample) + { + return __generate(_sample); + } + + static vector_t3 __generateInverse(const vector_t3 L) + { + // TODO: incomplete information to get z component, we only know mapping of (u.z > 0.5 <-> L +ve) and (u.z < 0.5 <-> L -ve) + // so set to 0 or 1 for now + return vector_t3(hemisphere_t::__generateInverse(L.xy), hlsl::mix(T(0.0), T(1.0), L.z > T(0.0))); + } + + vector_t3 generateInverse(const vector_t3 L) + { + return __generateInverse(L); + } + + static T __pdf(T L_z) + { + return T(0.5) * hemisphere_t::__pdf(L_z); + } + + scalar_type forwardPdf(const vector_t2 _sample) + { + return __pdf(__generate(_sample).z); + } + + scalar_type backwardPdf(const vector_t3 L) { - return T(0.5) * hemisphere_t::pdf(L_z); + return __pdf(L.z); } template > diff --git a/include/nbl/builtin/hlsl/sampling/linear.hlsl b/include/nbl/builtin/hlsl/sampling/linear.hlsl index 16f583bbbf..78c57a53bb 100644 --- a/include/nbl/builtin/hlsl/sampling/linear.hlsl +++ b/include/nbl/builtin/hlsl/sampling/linear.hlsl @@ -19,36 +19,57 @@ namespace sampling template struct Linear { - using scalar_type = T; - using vector2_type = vector; - - // BijectiveSampler concept types - using domain_type = scalar_type; - using codomain_type = scalar_type; - using density_type = scalar_type; - using sample_type = codomain_and_rcpPdf; - using inverse_sample_type = domain_and_rcpPdf; - - static Linear create(const vector2_type linearCoeffs) // start and end importance values (start, end) - { - Linear retval; - retval.linearCoeffStart = linearCoeffs[0]; - retval.rcpDiff = 1.0 / (linearCoeffs[0] - linearCoeffs[1]); - vector2_type squaredCoeffs = linearCoeffs * linearCoeffs; - retval.squaredCoeffStart = squaredCoeffs[0]; - retval.squaredCoeffDiff = squaredCoeffs[1] - squaredCoeffs[0]; - return retval; - } - - scalar_type generate(const scalar_type u) - { - return hlsl::mix(u, (linearCoeffStart - hlsl::sqrt(squaredCoeffStart + u * squaredCoeffDiff)) * rcpDiff, hlsl::abs(rcpDiff) < numeric_limits::max); - } - - scalar_type linearCoeffStart; - scalar_type rcpDiff; - scalar_type squaredCoeffStart; - scalar_type squaredCoeffDiff; + using scalar_type = T; + using vector2_type = vector; + + // BijectiveSampler concept types + using domain_type = scalar_type; + using codomain_type = scalar_type; + using density_type = scalar_type; + using sample_type = codomain_and_rcpPdf; + using inverse_sample_type = domain_and_rcpPdf; + + static Linear create(const vector2_type linearCoeffs) // start and end importance values (start, end), assumed to be at x=0 and x=1 + { + Linear retval; + retval.linearCoeffStart = linearCoeffs[0]; + retval.linearCoeffDiff = linearCoeffs[1] - linearCoeffs[0]; + retval.rcpCoeffSum = scalar_type(1.0) / (linearCoeffs[0] + linearCoeffs[1]); + retval.rcpDiff = -scalar_type(1.0) / retval.linearCoeffDiff; + vector2_type squaredCoeffs = linearCoeffs * linearCoeffs; + retval.squaredCoeffStart = squaredCoeffs[0]; + retval.squaredCoeffDiff = squaredCoeffs[1] - squaredCoeffs[0]; + return retval; + } + + scalar_type generate(const scalar_type u) + { + return hlsl::mix(u, (linearCoeffStart - hlsl::sqrt(squaredCoeffStart + u * squaredCoeffDiff)) * rcpDiff, hlsl::abs(rcpDiff) < numeric_limits::max); + } + + scalar_type generateInverse(const scalar_type x) + { + return x * (scalar_type(2.0) * linearCoeffStart + linearCoeffDiff * x) * rcpCoeffSum; + } + + scalar_type forwardPdf(const scalar_type u) + { + return backwardPdf(generate(u)); + } + + scalar_type backwardPdf(const scalar_type x) + { + if (x < scalar_type(0.0) || x > scalar_type(1.0)) + return scalar_type(0.0); + return scalar_type(2.0) * (linearCoeffStart + x * linearCoeffDiff) * rcpCoeffSum; + } + + scalar_type linearCoeffStart; + scalar_type linearCoeffDiff; + scalar_type rcpCoeffSum; + scalar_type rcpDiff; + scalar_type squaredCoeffStart; + scalar_type squaredCoeffDiff; }; } // namespace sampling diff --git a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl index eeb48ea388..5fba1df2d7 100644 --- a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl @@ -22,79 +22,57 @@ namespace sampling template struct ProjectedSphericalTriangle { - using scalar_type = T; - using vector2_type = vector; - using vector3_type = vector; - using vector4_type = vector; - - // ResamplableSampler concept types - using domain_type = vector2_type; - using codomain_type = vector3_type; - using density_type = scalar_type; - using sample_type = codomain_and_rcpPdf; - - static ProjectedSphericalTriangle create(NBL_CONST_REF_ARG(shapes::SphericalTriangle) tri) - { - ProjectedSphericalTriangle retval; - retval.tri = tri; - return retval; - } - - vector4_type computeBilinearPatch(const vector3_type receiverNormal, bool isBSDF) - { - const scalar_type minimumProjSolidAngle = 0.0; - - matrix m = matrix(tri.vertex0, tri.vertex1, tri.vertex2); - const vector3_type bxdfPdfAtVertex = math::conditionalAbsOrMax(isBSDF, nbl::hlsl::mul(m, receiverNormal), hlsl::promote(minimumProjSolidAngle)); - - return bxdfPdfAtVertex.yyxz; - } - - vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector3_type receiverNormal, bool isBSDF, const vector2_type _u) - { - vector2_type u; - // pre-warp according to proj solid angle approximation - vector4_type patch = computeBilinearPatch(receiverNormal, isBSDF); - Bilinear bilinear = Bilinear::create(patch); - u = bilinear.generate(rcpPdf, _u); - - // now warp the points onto a spherical triangle - const vector3_type L = sphtri.generate(solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, u); - rcpPdf *= solidAngle; - - return L; - } - - vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, const vector3_type receiverNormal, bool isBSDF, const vector2_type u) - { - scalar_type cos_a, cos_c, csc_b, csc_c; - vector3_type cos_vertices, sin_vertices; - const scalar_type solidAngle = tri.solidAngleOfTriangle(cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c); - return generate(rcpPdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, receiverNormal, isBSDF, u); - } - - scalar_type pdf(scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector3_type receiverNormal, bool receiverWasBSDF, const vector3_type L) - { - scalar_type pdf; - const vector2_type u = sphtri.generateInverse(pdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, L); - - vector4_type patch = computeBilinearPatch(receiverNormal, receiverWasBSDF); - Bilinear bilinear = Bilinear::create(patch); - return pdf * bilinear.pdf(u); - } - - scalar_type pdf(const vector3_type receiverNormal, bool receiverWasBSDF, const vector3_type L) - { - scalar_type pdf; - const vector2_type u = sphtri.generateInverse(pdf, L); - - vector4_type patch = computeBilinearPatch(receiverNormal, receiverWasBSDF); - Bilinear bilinear = Bilinear::create(patch); - return pdf * bilinear.pdf(u); - } - - shapes::SphericalTriangle tri; - sampling::SphericalTriangle sphtri; + using scalar_type = T; + using vector2_type = vector; + using vector3_type = vector; + using vector4_type = vector; + + // ResamplableSampler concept types + using domain_type = vector2_type; + using codomain_type = vector3_type; + using density_type = scalar_type; + using sample_type = codomain_and_rcpPdf; + + Bilinear computeBilinearPatch() + { + const scalar_type minimumProjSolidAngle = 0.0; + + matrix m = matrix(sphtri.tri.vertices[0], sphtri.tri.vertices[1], sphtri.tri.vertices[2]); + const vector3_type bxdfPdfAtVertex = math::conditionalAbsOrMax(receiverWasBSDF, hlsl::mul(m, receiverNormal), hlsl::promote(minimumProjSolidAngle)); + + return Bilinear::create(bxdfPdfAtVertex.yyxz); + } + + vector3_type generate(const vector2_type u) + { + vector2_type u; + // pre-warp according to proj solid angle approximation + Bilinear bilinear = computeBilinearPatch(); + u = bilinear.generate(_u); + + // now warp the points onto a spherical triangle + const vector3_type L = sphtri.generate(u); + return L; + } + + scalar_type forwardPdf(const vector2_type u) + { + const scalar_type pdf = sphtri.forwardPdf(u); + Bilinear bilinear = computeBilinearPatch(); + return pdf * bilinear.backwardPdf(u); + } + + scalar_type backwardPdf(const vector3_type L) + { + const scalar_type pdf = sphtri.backwardPdf(L); + const vector2_type u = sphtri.generateInverse(L); + Bilinear bilinear = computeBilinearPatch(); + return pdf * bilinear.backwardPdf(u); + } + + sampling::SphericalTriangle sphtri; + vector3_type receiverNormal; + bool receiverWasBSDF; }; } // namespace sampling diff --git a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl index 8f90be6b3a..c80406a8f8 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl @@ -21,72 +21,94 @@ namespace sampling template struct SphericalRectangle { - using scalar_type = T; - using vector2_type = vector; - using vector3_type = vector; - using vector4_type = vector; - - // ResamplableSampler concept types - using domain_type = vector2_type; - using codomain_type = vector2_type; - using density_type = scalar_type; - using sample_type = codomain_and_rcpPdf; - - static SphericalRectangle create(NBL_CONST_REF_ARG(shapes::SphericalRectangle) rect) - { - SphericalRectangle retval; - retval.rect = rect; - return retval; - } - - vector2_type generate(const vector2_type rectangleExtents, const vector2_type uv, NBL_REF_ARG(scalar_type) S) - { - const vector4_type denorm_n_z = vector4_type(-rect.r0.y, rect.r0.x + rectangleExtents.x, rect.r0.y + rectangleExtents.y, -rect.r0.x); - const vector4_type n_z = denorm_n_z / hlsl::sqrt(hlsl::promote(rect.r0.z * rect.r0.z) + denorm_n_z * denorm_n_z); - const vector4_type cosGamma = vector4_type( - -n_z[0] * n_z[1], - -n_z[1] * n_z[2], - -n_z[2] * n_z[3], - -n_z[3] * n_z[0]); - - math::sincos_accumulator angle_adder = math::sincos_accumulator::create(cosGamma[0]); - angle_adder.addCosine(cosGamma[1]); - scalar_type p = angle_adder.getSumofArccos(); - angle_adder = math::sincos_accumulator::create(cosGamma[2]); - angle_adder.addCosine(cosGamma[3]); - scalar_type q = angle_adder.getSumofArccos(); - - const scalar_type k = scalar_type(2.0) * numbers::pi - q; - const scalar_type b0 = n_z[0]; - const scalar_type b1 = n_z[2]; - S = p + q - scalar_type(2.0) * numbers::pi; - - const scalar_type CLAMP_EPS = 1e-5; - - // flip z axis if rect.r0.z > 0 - rect.r0.z = ieee754::flipSignIfRHSNegative(rect.r0.z, -rect.r0.z); - vector3_type r1 = rect.r0 + vector3_type(rectangleExtents.x, rectangleExtents.y, 0); - - const scalar_type au = uv.x * S + k; - const scalar_type fu = (hlsl::cos(au) * b0 - b1) / hlsl::sin(au); - const scalar_type cu_2 = hlsl::max(fu * fu + b0 * b0, 1.f); // forces `cu` to be in [-1,1] - const scalar_type cu = ieee754::flipSignIfRHSNegative(scalar_type(1.0) / hlsl::sqrt(cu_2), fu); - - scalar_type xu = -(cu * rect.r0.z) / hlsl::sqrt(scalar_type(1.0) - cu * cu); - xu = hlsl::clamp(xu, rect.r0.x, r1.x); // avoid Infs - const scalar_type d_2 = xu * xu + rect.r0.z * rect.r0.z; - const scalar_type d = hlsl::sqrt(d_2); - - const scalar_type h0 = rect.r0.y / hlsl::sqrt(d_2 + rect.r0.y * rect.r0.y); - const scalar_type h1 = r1.y / hlsl::sqrt(d_2 + r1.y * r1.y); - const scalar_type hv = h0 + uv.y * (h1 - h0); - const scalar_type hv2 = hv * hv; - const scalar_type yv = hlsl::mix(r1.y, (hv * d) / hlsl::sqrt(scalar_type(1.0) - hv2), hv2 < scalar_type(1.0) - CLAMP_EPS); - - return vector2_type((xu - rect.r0.x) / rectangleExtents.x, (yv - rect.r0.y) / rectangleExtents.y); - } - - shapes::SphericalRectangle rect; + using scalar_type = T; + using vector2_type = vector; + using vector3_type = vector; + using vector4_type = vector; + + // ResamplableSampler concept types + using domain_type = vector2_type; + using codomain_type = vector2_type; + using density_type = scalar_type; + using sample_type = codomain_and_rcpPdf; + + NBL_CONSTEXPR_STATIC_INLINE scalar_type ClampEps = 1e-5; + + static SphericalRectangle create(NBL_CONST_REF_ARG(shapes::SphericalRectangle) rect, const vector3_type observer) + { + SphericalRectangle retval; + + retval.r0 = hlsl::mul(rect.basis, rect.origin - observer); + const vector4_type denorm_n_z = vector4_type(-retval.r0.y, retval.r0.x + rect.extents.x, retval.r0.y + rect.extents.y, -retval.r0.x); + const vector4_type n_z = denorm_n_z / hlsl::sqrt(hlsl::promote(retval.r0.z * retval.r0.z) + denorm_n_z * denorm_n_z); + retval.cosGamma = vector4_type( + -n_z[0] * n_z[1], + -n_z[1] * n_z[2], + -n_z[2] * n_z[3], + -n_z[3] * n_z[0] + ); + + math::sincos_accumulator angle_adder = math::sincos_accumulator::create(cosGamma[0]); + angle_adder.addCosine(cosGamma[1]); + scalar_type p = angle_adder.getSumofArccos(); + angle_adder = math::sincos_accumulator::create(cosGamma[2]); + angle_adder.addCosine(cosGamma[3]); + scalar_type q = angle_adder.getSumofArccos(); + + const scalar_type k = scalar_type(2.0) * numbers::pi - q; + retval.solidAngle = p + q - scalar_type(2.0) * numbers::pi; + + // flip z axis if r0.z > 0 + retval.r0 = -hlsl::abs(retval.r0.z); + retval.r1 = retval.r0 + vector3_type(rect.extents.x, rect.extents.y, 0); + + retval.b0 = n_z[0]; + retval.b1 = n_z[2]; + return retval; + } + + vector2_type generate(const vector2_type u) + { + math::sincos_accumulator angle_adder = math::sincos_accumulator::create(cosGamma[2]); + angle_adder.addCosine(cosGamma[3]); + scalar_type q = angle_adder.getSumofArccos(); + const scalar_type k = scalar_type(2.0) * numbers::pi - q; + + const scalar_type au = u.x * solidAngle + k; + const scalar_type fu = (hlsl::cos(au) * b0 - b1) / hlsl::sin(au); + const scalar_type cu_2 = hlsl::max(fu * fu + b0 * b0, 1.f); // forces `cu` to be in [-1,1] + const scalar_type cu = ieee754::flipSignIfRHSNegative(scalar_type(1.0) / hlsl::sqrt(cu_2), fu); + + scalar_type xu = -(cu * r0.z) / hlsl::sqrt(scalar_type(1.0) - cu * cu); + xu = hlsl::clamp(xu, r0.x, r1.x); // avoid Infs + const scalar_type d_2 = xu * xu + r0.z * r0.z; + const scalar_type d = hlsl::sqrt(d_2); + + const scalar_type h0 = r0.y / hlsl::sqrt(d_2 + r0.y * r0.y); + const scalar_type h1 = r1.y / hlsl::sqrt(d_2 + r1.y * r1.y); + const scalar_type hv = h0 + u.y * (h1 - h0); + const scalar_type hv2 = hv * hv; + const scalar_type yv = hlsl::mix(r1.y, (hv * d) / hlsl::sqrt(scalar_type(1.0) - hv2), hv2 < scalar_type(1.0) - ClampEps); + + return vector2_type((xu - r0.x), (yv - r0.y)); + } + + scalar_type forwardPdf(const vector2_type u) + { + return scalar_type(1.0) / solidAngle; + } + + scalar_type backwardPdf(const vector2_type L) + { + return scalar_type(1.0) / solidAngle; + } + + scalar_type solidAngle; + vector4_type cosGamma; + scalar_type b0; + scalar_type b1; + vector3_type r0; + vector3_type r1; }; } // namespace sampling diff --git a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl index 5d9d32ad21..83cde18a96 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl @@ -22,105 +22,106 @@ namespace sampling template struct SphericalTriangle { - using scalar_type = T; - using vector2_type = vector; - using vector3_type = vector; - - // BijectiveSampler concept types - using domain_type = vector2_type; - using codomain_type = vector3_type; - using density_type = scalar_type; - using sample_type = codomain_and_rcpPdf; - using inverse_sample_type = domain_and_rcpPdf; - - static SphericalTriangle create(NBL_CONST_REF_ARG(shapes::SphericalTriangle) tri) - { - SphericalTriangle retval; - retval.tri = tri; - return retval; - } - - // WARNING: can and will return NAN if one or three of the triangle edges are near zero length - vector3_type generate(scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector2_type u) - { - scalar_type negSinSubSolidAngle, negCosSubSolidAngle; - math::sincos(solidAngle * u.x - numbers::pi, negSinSubSolidAngle, negCosSubSolidAngle); - - const scalar_type p = negCosSubSolidAngle * sin_vertices[0] - negSinSubSolidAngle * cos_vertices[0]; - const scalar_type q = -negSinSubSolidAngle * sin_vertices[0] - negCosSubSolidAngle * cos_vertices[0]; - - // TODO: we could optimize everything up and including to the first slerp, because precision here is just godawful - scalar_type u_ = q - cos_vertices[0]; - scalar_type v_ = p + sin_vertices[0] * cos_c; - - // the slerps could probably be optimized by sidestepping `normalize` calls and accumulating scaling factors - vector3_type C_s = tri.vertex0; - if (csc_b < numeric_limits::max) - { - const scalar_type cosAngleAlongAC = ((v_ * q - u_ * p) * cos_vertices[0] - v_) / ((v_ * p + u_ * q) * sin_vertices[0]); - if (nbl::hlsl::abs(cosAngleAlongAC) < 1.f) - C_s += math::quaternion::slerp_delta(tri.vertex0, tri.vertex2 * csc_b, cosAngleAlongAC); - } - - vector3_type retval = tri.vertex1; - const scalar_type cosBC_s = nbl::hlsl::dot(C_s, tri.vertex1); - const scalar_type csc_b_s = 1.0 / nbl::hlsl::sqrt(1.0 - cosBC_s * cosBC_s); - if (csc_b_s < numeric_limits::max) - { - const scalar_type cosAngleAlongBC_s = nbl::hlsl::clamp(1.0 + cosBC_s * u.y - u.y, -1.f, 1.f); - if (nbl::hlsl::abs(cosAngleAlongBC_s) < 1.f) - retval += math::quaternion::slerp_delta(tri.vertex1, C_s * csc_b_s, cosAngleAlongBC_s); - } - return retval; - } - - vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, const vector2_type u) - { - scalar_type cos_a, cos_c, csc_b, csc_c; - vector3_type cos_vertices, sin_vertices; - - rcpPdf = tri.solidAngleOfTriangle(cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c); - - return generate(rcpPdf, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, u); - } - - vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector3_type L) - { - pdf = 1.0 / solidAngle; - - const scalar_type cosAngleAlongBC_s = nbl::hlsl::dot(L, tri.vertex1); - const scalar_type csc_a_ = 1.0 / nbl::hlsl::sqrt(1.0 - cosAngleAlongBC_s * cosAngleAlongBC_s); - const scalar_type cos_b_ = nbl::hlsl::dot(L, tri.vertex0); - - const scalar_type cosB_ = (cos_b_ - cosAngleAlongBC_s * cos_c) * csc_a_ * csc_c; - const scalar_type sinB_ = nbl::hlsl::sqrt(1.0 - cosB_ * cosB_); - - const scalar_type cosC_ = sin_vertices[0] * sinB_ * cos_c - cos_vertices[0] * cosB_; - const scalar_type sinC_ = nbl::hlsl::sqrt(1.0 - cosC_ * cosC_); - - math::sincos_accumulator angle_adder = math::sincos_accumulator::create(cos_vertices[0], sin_vertices[0]); - angle_adder.addAngle(cosB_, sinB_); - angle_adder.addAngle(cosC_, sinC_); - const scalar_type subTriSolidAngleRatio = (angle_adder.getSumofArccos() - numbers::pi)*pdf; - const scalar_type u = subTriSolidAngleRatio > numeric_limits::min ? subTriSolidAngleRatio : 0.0; - - const scalar_type cosBC_s = (cos_vertices[0] + cosB_ * cosC_) / (sinB_ * sinC_); - const scalar_type v = (1.0 - cosAngleAlongBC_s) / (1.0 - (cosBC_s < bit_cast(0x3f7fffff) ? cosBC_s : cos_c)); - - return vector2_type(u, v); - } - - vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, const vector3_type L) - { - scalar_type cos_a, cos_c, csc_b, csc_c; - vector3_type cos_vertices, sin_vertices; - - const scalar_type solidAngle = tri.solidAngleOfTriangle(cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c); - - return generateInverse(pdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, L); - } - - shapes::SphericalTriangle tri; + using scalar_type = T; + using vector2_type = vector; + using vector3_type = vector; + + // BijectiveSampler concept types + using domain_type = vector2_type; + using codomain_type = vector3_type; + using density_type = scalar_type; + using sample_type = codomain_and_rcpPdf; + using inverse_sample_type = domain_and_rcpPdf; + + static SphericalTriangle create(NBL_CONST_REF_ARG(shapes::SphericalTriangle) tri) + { + SphericalTriangle retval; + vector3_type cos_vertices, sin_vertices; + retval.solidAngle = tri.solidAngle(cos_vertices, sin_vertices); + retval.cosA = cos_vertices[0]; + retval.sinA = sin_vertices[0]; + retval.tri_vertices = tri.vertices; + retval.triCosC = tri.cos_sides[2]; + retval.triCscB = tri.csc_sides[1]; + retval.triCscC = tri.csc_sides[2]; + return retval; + } + + vector3_type generate(const vector2_type u) + { + scalar_type negSinSubSolidAngle,negCosSubSolidAngle; + math::sincos(solidAngle * u.x - numbers::pi, negSinSubSolidAngle, negCosSubSolidAngle); + + const scalar_type p = negCosSubSolidAngle * sinA - negSinSubSolidAngle * cosA; + const scalar_type q = -negSinSubSolidAngle * sinA - negCosSubSolidAngle * cosA; + + // TODO: we could optimize everything up and including to the first slerp, because precision here is just godawful + scalar_type u_ = q - cosA; + scalar_type v_ = p + sinA * triCosC; + + // the slerps could probably be optimized by sidestepping `normalize` calls and accumulating scaling factors + vector3_type C_s = tri_vertices[0]; + if (triCscB < numeric_limits::max) + { + const scalar_type cosAngleAlongAC = ((v_ * q - u_ * p) * cosA - v_) / ((v_ * p + u_ * q) * sinA); + if (nbl::hlsl::abs(cosAngleAlongAC) < 1.f) + C_s += math::quaternion::slerp_delta(tri_vertices[0], tri_vertices[2] * triCscB, cosAngleAlongAC); + } + + vector3_type retval = tri_vertices[1]; + const scalar_type cosBC_s = nbl::hlsl::dot(C_s, tri_vertices[1]); + const scalar_type csc_b_s = 1.0 / nbl::hlsl::sqrt(1.0 - cosBC_s * cosBC_s); + if (csc_b_s < numeric_limits::max) + { + const scalar_type cosAngleAlongBC_s = nbl::hlsl::clamp(1.0 + cosBC_s * u.y - u.y, -1.f, 1.f); + if (nbl::hlsl::abs(cosAngleAlongBC_s) < 1.f) + retval += math::quaternion::slerp_delta(tri_vertices[1], C_s * csc_b_s, cosAngleAlongBC_s); + } + return retval; + } + + vector2_type generateInverse(const vector3_type L) + { + const scalar_type cosAngleAlongBC_s = nbl::hlsl::dot(L, tri_vertices[1]); + const scalar_type csc_a_ = 1.0 / nbl::hlsl::sqrt(1.0 - cosAngleAlongBC_s * cosAngleAlongBC_s); + const scalar_type cos_b_ = nbl::hlsl::dot(L, tri_vertices[0]); + + const scalar_type cosB_ = (cos_b_ - cosAngleAlongBC_s * triCosC) * csc_a_ * triCscC; + const scalar_type sinB_ = nbl::hlsl::sqrt(1.0 - cosB_ * cosB_); + + const scalar_type cosC_ = sinA * sinB_* triCosC - cosA * cosB_; + const scalar_type sinC_ = nbl::hlsl::sqrt(1.0 - cosC_ * cosC_); + + math::sincos_accumulator angle_adder = math::sincos_accumulator::create(cosA, sinA); + angle_adder.addAngle(cosB_, sinB_); + angle_adder.addAngle(cosC_, sinC_); + const scalar_type subTriSolidAngleRatio = (angle_adder.getSumofArccos() - numbers::pi) * pdf; + const scalar_type u = subTriSolidAngleRatio > numeric_limits::min ? subTriSolidAngleRatio : 0.0; + + const scalar_type cosBC_s = (cosA + cosB_ * cosC_) / (sinB_ * sinC_); + const scalar_type v = (1.0 - cosAngleAlongBC_s) / (1.0 - (cosBC_s < bit_cast(0x3f7fffff) ? cosBC_s : triCosC)); + + return vector2_type(u,v); + } + + scalar_type forwardPdf(const vector2_type u) + { + return scalar_type(1.0) / solidAngle; + } + + scalar_type backwardPdf(const vector3_type L) + { + return scalar_type(1.0) / solidAngle; + } + + scalar_type solidAngle; + scalar_type cosA; + scalar_type sinA; + + vector3_type tri_vertices[3]; + scalar_type triCosC; + scalar_type triCscB; + scalar_type triCscC; }; } // namespace sampling diff --git a/include/nbl/builtin/hlsl/sampling/uniform_spheres.hlsl b/include/nbl/builtin/hlsl/sampling/uniform_spheres.hlsl index c92d732b43..6f3200f4d9 100644 --- a/include/nbl/builtin/hlsl/sampling/uniform_spheres.hlsl +++ b/include/nbl/builtin/hlsl/sampling/uniform_spheres.hlsl @@ -32,7 +32,7 @@ struct UniformHemisphere using sample_type = codomain_and_rcpPdf; using inverse_sample_type = domain_and_rcpPdf; - static vector_t3 generate(const vector_t2 _sample) + static vector_t3 __generate(const vector_t2 _sample) { T z = _sample.x; T r = hlsl::sqrt(hlsl::max(T(0.0), T(1.0) - z * z)); @@ -40,11 +40,39 @@ struct UniformHemisphere return vector_t3(r * hlsl::cos(phi), r * hlsl::sin(phi), z); } - static T pdf() + vector_t3 generate(const vector_t2 _sample) + { + return __generate(_sample); + } + + static vector_t2 __generateInverse(const vector_t3 _sample) + { + T phi = hlsl::atan2(_sample.y, _sample.x); + const T twopi = T(2.0) * numbers::pi; + phi += hlsl::mix(T(0.0), twopi, phi < T(0.0)); + return vector_t2(_sample.z, phi / twopi); + } + + vector_t2 generateInverse(const vector_t3 _sample) + { + return __generateInverse(_sample); + } + + static scalar_type __pdf() { return T(1.0) / (T(2.0) * numbers::pi); } + scalar_type forwardPdf(const vector_t2 _sample) + { + return __pdf(); + } + + scalar_type backwardPdf(const vector_t3 _sample) + { + return __pdf(); + } + template > static ::nbl::hlsl::sampling::quotient_and_pdf quotient_and_pdf() { @@ -66,7 +94,7 @@ struct UniformSphere using sample_type = codomain_and_rcpPdf; using inverse_sample_type = domain_and_rcpPdf; - static vector_t3 generate(const vector_t2 _sample) + static vector_t3 __generate(const vector_t2 _sample) { T z = T(1.0) - T(2.0) * _sample.x; T r = hlsl::sqrt(hlsl::max(T(0.0), T(1.0) - z * z)); @@ -74,11 +102,39 @@ struct UniformSphere return vector_t3(r * hlsl::cos(phi), r * hlsl::sin(phi), z); } - static T pdf() + vector_t3 generate(const vector_t2 _sample) + { + return __generate(_sample); + } + + static vector_t2 __generateInverse(const vector_t3 _sample) + { + T phi = hlsl::atan2(_sample.y, _sample.x); + const T twopi = T(2.0) * numbers::pi; + phi += hlsl::mix(T(0.0), twopi, phi < T(0.0)); + return vector_t2((T(1.0) - _sample.z) * T(0.5), phi / twopi); + } + + vector_t2 generateInverse(const vector_t3 _sample) + { + return __generateInverse(_sample); + } + + static T __pdf() { return T(1.0) / (T(4.0) * numbers::pi); } + scalar_type forwardPdf(const vector_t2 _sample) + { + return __pdf(); + } + + scalar_type backwardPdf(const vector_t3 _sample) + { + return __pdf(); + } + template > static ::nbl::hlsl::sampling::quotient_and_pdf quotient_and_pdf() { diff --git a/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl index 11442bef7c..9743049a60 100644 --- a/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl @@ -17,32 +17,91 @@ namespace hlsl namespace shapes { +// What are we likely to do with a Spherical Rectangle? +// 1) Initialize it multiple times from different observers +// 2) Sample it repeatedly + +// How are we likely to get a spherical rect? +// 1) from OBB matrix (with a model space z-axis scale thats irrelevant - but should be forced to 1.f to not mess with distance) +// 2) in a compressed form + +// So, to bring multiple world-space observers into Spherical Rectangle's own space, we need the basis matrix. +// The matrix should be a matrix where the last column is the translation, a 3x3 matrix with a pre-transform translation (worldSpace rectangle origin to be subtracted). + +// You can compute it from an OBB matrix (as given by/to imguizmo to position a [0,1]^2 rectangle mesh where Z+ is the front face. + +/* +matrix check = mul(modelSpace,tranpose(modelSpace)); +// orthogonality (don't need to check the other 3 lower half numbers, cause MM^T is symmetric) +assert(check[0][1]==0.f); +assert(check[0][2]==0.f); +assert(check[1][2]==0.f); +// the scales are squared +const vector2_type scalesSq = vector2_type(check[0][0],check[1][1]); +const vector2_type scalesRcp = rsqrt(scalesSq); +// only rotation, scale needs to be thrown away +basis = tranpose(modelSpace); +// right now `mul(basis,fromObserver)` will apply extent scales on the dot product +// need to remove that +basis[0] *= scalesRcp[0]; +basis[1] *= scalesRcp[1]; +// but also back it up so we know the size of the original rectangle +extents = promote(vector2_type>(1.f)/scalesRcp; +if (dontAssertZScaleIsOne) + basis[2] *= rsqrt(check[2][2]); +else +{ + assert(check[2][2]==1.f); +} +*/ + +// Now, can apply translation: +// 1) post-rotation so a it automatically gets added during a affine pseudo-mul of a 3x4, so pseudo_mul(basis,observer) +// 2) pre-rotation so you keep a worldspace rectangle origin and subtract it before, e.g. mul(basis,worldSpaceOrigin-observer) - this one is possibly better due to next point + +// So we need to store: +// 1) first two COLUMNS of the original OBB matrix (rows of 3x3 basis matrix with the scale still in there), thats kinda your right and up vectors +// 2) pre-rotation translation / the world-space translation of the rectangle +// Theoretically you could get away with not storing one of the up vector components but its not always the same component you can reconstruct (plane orthogonal to up isn't always the XY plane). +// Could compress up vector as a rotation of the default vector orthogonal to right as given by the frisvad-basis function around the right vector plus a scale +// but that becomes a very expensive decompression step involving a quaternion with uniform scale. + +template +struct CompressedSphericalRectangle +{ + using vector3_type = vector; + + vector3_type origin; + vector3_type right; + vector3_type up; +}; + template struct SphericalRectangle { using scalar_type = Scalar; + using vector2_type = vector; using vector3_type = vector; - using vector4_type = vector; using matrix3x3_type = matrix; - static SphericalRectangle create(const vector3_type observer, const vector3_type rectangleOrigin, const matrix3x3_type basis) + static SphericalRectangle create(NBL_CONST_REF_ARG(CompressedSphericalRectangle) compressed) { SphericalRectangle retval; - retval.r0 = nbl::hlsl::mul(basis, rectangleOrigin - observer); + retval.origin = compressed.origin; + retval.extents = vector2_type(hlsl::length(compressed.right), hlsl::length(compressed.up)); + retval.basis[0] = compressed.right / retval.extents[0]; + retval.basis[1] = compressed.up / retval.extents[1]; + assert(hlsl::dot(retval.basis[0], retval.basis[1]) > scalar_type(0.0)); + retval.basis[2] = hlsl::normalize(hlsl::cross(retval.basis[0], retval.basis[1])); return retval; } - static SphericalRectangle create(const vector3_type observer, const vector3_type rectangleOrigin, const vector3_type T, vector3_type B, const vector3_type N) + scalar_type solidAngle(const vector3_type observer) { - SphericalRectangle retval; - matrix3x3_type TBN = nbl::hlsl::transpose(matrix3x3_type(T, B, N)); - retval.r0 = nbl::hlsl::mul(TBN, rectangleOrigin - observer); - return retval; - } + const vector3_type r0 = hlsl::mul(basis, origin - observer); - scalar_type solidAngleOfRectangle(const vector rectangleExtents) - { - const vector4_type denorm_n_z = vector4_type(-r0.y, r0.x + rectangleExtents.x, r0.y + rectangleExtents.y, -r0.x); + using vector4_type = vector; + const vector4_type denorm_n_z = vector4_type(-r0.y, r0.x + extents.x, r0.y + extents.y, -r0.x); const vector4_type n_z = denorm_n_z / nbl::hlsl::sqrt((vector4_type)(r0.z * r0.z) + denorm_n_z * denorm_n_z); const vector4_type cosGamma = vector4_type( -n_z[0] * n_z[1], @@ -57,7 +116,9 @@ struct SphericalRectangle return angle_adder.getSumofArccos() - scalar_type(2.0) * numbers::pi; } - vector3_type r0; + vector3_type origin; + vector2_type extents; + matrix3x3_type basis; }; } diff --git a/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl index f574b106ce..118f022640 100644 --- a/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl @@ -25,38 +25,37 @@ struct SphericalTriangle using scalar_type = T; using vector3_type = vector; - static SphericalTriangle create(const vector3_type vertex0, const vector3_type vertex1, const vector3_type vertex2, const vector3_type origin) + static SphericalTriangle create(const vector3_type vertices[3], const vector3_type origin) { SphericalTriangle retval; - retval.vertex0 = nbl::hlsl::normalize(vertex0 - origin); - retval.vertex1 = nbl::hlsl::normalize(vertex1 - origin); - retval.vertex2 = nbl::hlsl::normalize(vertex2 - origin); - retval.cos_sides = vector3_type(hlsl::dot(retval.vertex1, retval.vertex2), hlsl::dot(retval.vertex2, retval.vertex0), hlsl::dot(retval.vertex0, retval.vertex1)); - const vector3_type csc_sides2 = hlsl::promote(1.0) - retval.cos_sides * retval.cos_sides; - retval.csc_sides.x = hlsl::rsqrt(csc_sides2.x); - retval.csc_sides.y = hlsl::rsqrt(csc_sides2.y); - retval.csc_sides.z = hlsl::rsqrt(csc_sides2.z); + retval.vertices[0] = nbl::hlsl::normalize(vertices[0] - origin); + retval.vertices[1] = nbl::hlsl::normalize(vertices[1] - origin); + retval.vertices[2] = nbl::hlsl::normalize(vertices[2] - origin); + retval.cos_sides = vector3_type(hlsl::dot(retval.vertices[1], retval.vertices[2]), hlsl::dot(retval.vertices[2], retval.vertices[0]), hlsl::dot(retval.vertices[0], retval.vertices[1])); + const vector3_type sin_sides2 = hlsl::promote(1.0) - retval.cos_sides * retval.cos_sides; + retval.csc_sides = hlsl::rsqrt(sin_sides2); return retval; } + // checks if any angles are small enough to disregard bool pyramidAngles() { - return hlsl::any >(csc_sides >= (vector3_type)(numeric_limits::max)); + return hlsl::any >(csc_sides >= hlsl::promote(numeric_limits::max)); } - scalar_type solidAngleOfTriangle(NBL_REF_ARG(vector3_type) cos_vertices, NBL_REF_ARG(vector3_type) sin_vertices, NBL_REF_ARG(scalar_type) cos_a, NBL_REF_ARG(scalar_type) cos_c, NBL_REF_ARG(scalar_type) csc_b, NBL_REF_ARG(scalar_type) csc_c) + vector3_type __getCosVertices() + { + // using Spherical Law of Cosines (TODO: do we need to clamp anymore? since the pyramid angles method introduction?) + return hlsl::clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, hlsl::promote(-1.0), hlsl::promote(1.0)); + } + + scalar_type solidAngle(NBL_REF_ARG(vector3_type) cos_vertices, NBL_REF_ARG(vector3_type) sin_vertices) { if (pyramidAngles()) return 0.f; - // these variables might eventually get optimized out - cos_a = cos_sides[0]; - cos_c = cos_sides[2]; - csc_b = csc_sides[1]; - csc_c = csc_sides[2]; - // Both vertices and angles at the vertices are denoted by the same upper case letters A, B, and C. The angles A, B, C of the triangle are equal to the angles between the planes that intersect the surface of the sphere or, equivalently, the angles between the tangent vectors of the great circle arcs where they meet at the vertices. Angles are in radians. The angles of proper spherical triangles are (by convention) less than PI - cos_vertices = hlsl::clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, hlsl::promote(-1.0), hlsl::promote(1.0)); // using Spherical Law of Cosines (TODO: do we need to clamp anymore? since the pyramid angles method introduction?) + cos_vertices = __getCosVertices(); sin_vertices = hlsl::sqrt(hlsl::promote(1.0) - cos_vertices * cos_vertices); math::sincos_accumulator angle_adder = math::sincos_accumulator::create(cos_vertices[0], sin_vertices[0]); @@ -65,39 +64,30 @@ struct SphericalTriangle return angle_adder.getSumofArccos() - numbers::pi; } - scalar_type solidAngleOfTriangle() + scalar_type solidAngle() { vector3_type dummy0,dummy1; - scalar_type dummy2,dummy3,dummy4,dummy5; - return solidAngleOfTriangle(dummy0,dummy1,dummy2,dummy3,dummy4,dummy5); + return solidAngle(dummy0,dummy1); } - scalar_type projectedSolidAngleOfTriangle(const vector3_type receiverNormal, NBL_REF_ARG(vector3_type) cos_sides, NBL_REF_ARG(vector3_type) csc_sides, NBL_REF_ARG(vector3_type) cos_vertices) + scalar_type projectedSolidAngle(const vector3_type receiverNormal, NBL_REF_ARG(vector3_type) cos_sides, NBL_REF_ARG(vector3_type) csc_sides, NBL_REF_ARG(vector3_type) cos_vertices) { if (pyramidAngles()) return 0.f; - vector3_type awayFromEdgePlane0 = hlsl::cross(vertex1, vertex2) * csc_sides[0]; - vector3_type awayFromEdgePlane1 = hlsl::cross(vertex2, vertex0) * csc_sides[1]; - vector3_type awayFromEdgePlane2 = hlsl::cross(vertex0, vertex1) * csc_sides[2]; - - // useless here but could be useful somewhere else - cos_vertices[0] = hlsl::dot(awayFromEdgePlane1, awayFromEdgePlane2); - cos_vertices[1] = hlsl::dot(awayFromEdgePlane2, awayFromEdgePlane0); - cos_vertices[2] = hlsl::dot(awayFromEdgePlane0, awayFromEdgePlane1); - // TODO: above dot products are in the wrong order, either work out which is which, or try all 6 permutations till it works - cos_vertices = hlsl::clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, hlsl::promote(-1.0), hlsl::promote(1.0)); + cos_vertices = __getCosVertices(); - matrix awayFromEdgePlane = matrix(awayFromEdgePlane0, awayFromEdgePlane1, awayFromEdgePlane2); + matrix awayFromEdgePlane; + awayFromEdgePlane[0] = hlsl::cross(vertices[1], vertices[2]) * csc_sides[0]; + awayFromEdgePlane[1] = hlsl::cross(vertices[2], vertices[0]) * csc_sides[1]; + awayFromEdgePlane[2] = hlsl::cross(vertices[0], vertices[1]) * csc_sides[2]; const vector3_type externalProducts = hlsl::abs(hlsl::mul(/* transposed already */awayFromEdgePlane, receiverNormal)); - const vector3_type pyramidAngles = acos(cos_sides); - return hlsl::dot(pyramidAngles, externalProducts) / (2.f * numbers::pi); + const vector3_type pyramidAngles = hlsl::acos(cos_sides); + return hlsl::dot(pyramidAngles, externalProducts) / (2.f * numbers::pi); } - vector3_type vertex0; - vector3_type vertex1; - vector3_type vertex2; + vector3_type vertices[3]; vector3_type cos_sides; vector3_type csc_sides; }; diff --git a/include/nbl/ext/MitsubaLoader/CMitsubaMetadata.h b/include/nbl/ext/MitsubaLoader/CMitsubaMetadata.h index 9ffdd8f7fd..8805b7bc2f 100644 --- a/include/nbl/ext/MitsubaLoader/CMitsubaMetadata.h +++ b/include/nbl/ext/MitsubaLoader/CMitsubaMetadata.h @@ -25,33 +25,21 @@ class CMitsubaMetadata : public asset::IAssetMetadata public: std::string m_id; }; - class IGeometry : public CID - { - public: - inline IGeometry() : CID(), type(CElementShape::Type::INVALID) {} - inline ~IGeometry() = default; - - CElementShape::Type type; - }; - class CPolygonGeometry final : public asset::IPolygonGeometryMetadata, public IGeometry + class CGeometryCollection final : public asset::IGeometryCollectionMetadata, public CID { public: - inline CPolygonGeometry() : asset::IPolygonGeometryMetadata(), IGeometry() {} - inline CPolygonGeometry(CPolygonGeometry&& other) : CPolygonGeometry() {operator=(std::move(other));} - inline ~CPolygonGeometry() = default; + inline CGeometryCollection() : asset::IGeometryCollectionMetadata(), CID(), type(CElementShape::Type::INVALID) {} + inline CGeometryCollection(CGeometryCollection&& other) : CGeometryCollection() {operator=(std::move(other));} + inline ~CGeometryCollection() = default; - inline CPolygonGeometry& operator=(CPolygonGeometry&& other) + inline CGeometryCollection& operator=(CGeometryCollection&& other) { - asset::IPolygonGeometryMetadata::operator=(std::move(other)); - IGeometry::operator=(std::move(other)); + asset::IGeometryCollectionMetadata::operator=(std::move(other)); + CID::operator=(std::move(other)); return *this; } - }; - class CGeometryCollection final : public asset::IGeometryCollectionMetadata, public CID - { - public: - inline CGeometryCollection() : asset::IGeometryCollectionMetadata(), CID() {} - inline ~CGeometryCollection() = default; + + CElementShape::Type type; }; struct SGlobal @@ -69,32 +57,33 @@ class CMitsubaMetadata : public asset::IAssetMetadata const char* getLoaderName() const override {return LoaderName;} // add more overloads when more asset implementations of IGeometry exist - inline const CPolygonGeometry* getAssetSpecificMetadata(const asset::ICPUPolygonGeometry* asset) const + inline const CGeometryCollection* getAssetSpecificMetadata(const asset::ICPUGeometryCollection* asset) const { const auto found = IAssetMetadata::getAssetSpecificMetadata(asset); - return static_cast(found); + return static_cast(found); } private: friend struct SContext; - struct SGeometryMetaPair + struct SGeometryCollectionMetaPair { - core::smart_refctd_ptr geom; - CMitsubaMetadata::CPolygonGeometry meta; + core::smart_refctd_ptr collection; + CMitsubaMetadata::CGeometryCollection meta; }; - inline void setPolygonGeometryMeta(core::unordered_map&& container) + template + inline void setGeometryCollectionMeta(core::unordered_map&& container) { const uint32_t count = container.size(); - m_metaPolygonGeometryStorage = IAssetMetadata::createContainer(count); + m_metaPolygonGeometryStorage = IAssetMetadata::createContainer(count); auto outIt = m_metaPolygonGeometryStorage->begin(); for (auto& el : container) { *outIt = std::move(el.second.meta); - IAssetMetadata::insertAssetSpecificMetadata(el.second.geom.get(),outIt++); + IAssetMetadata::insertAssetSpecificMetadata(el.second.collection.get(),outIt++); } } - meta_container_t m_metaPolygonGeometryStorage; + meta_container_t m_metaPolygonGeometryStorage; }; } diff --git a/include/nbl/ext/MitsubaLoader/SContext.h b/include/nbl/ext/MitsubaLoader/SContext.h index f3c952935c..3370ed5535 100644 --- a/include/nbl/ext/MitsubaLoader/SContext.h +++ b/include/nbl/ext/MitsubaLoader/SContext.h @@ -14,40 +14,41 @@ namespace nbl::ext::MitsubaLoader { +class CMitsubaLoader; struct SContext final { public: + using interm_getAssetInHierarchy_t = asset::SAssetBundle(const char*, const uint16_t); + SContext( -// const asset::IGeometryCreator* _geomCreator, -// const asset::IMeshManipulator* _manipulator, const asset::IAssetLoader::SAssetLoadContext& _params, asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* _metadata ); - using shape_ass_type = core::smart_refctd_ptr; - shape_ass_type loadBasicShape(const uint32_t hierarchyLevel, const CElementShape* shape); - using group_ass_type = core::smart_refctd_ptr; - group_ass_type loadShapeGroup(const uint32_t hierarchyLevel, const CElementShape::ShapeGroup* shapegroup); + using shape_ass_type = core::smart_refctd_ptr; + shape_ass_type loadBasicShape(const CElementShape* shape); + // the `shape` will have to be `Type::SHAPEGROUP` + shape_ass_type loadShapeGroup(const CElementShape* shape); inline void transferMetadata() { - meta->setPolygonGeometryMeta(std::move(shapeCache)); + meta->setGeometryCollectionMeta(std::move(shapeCache)); + meta->setGeometryCollectionMeta(std::move(groupCache)); } -// const asset::IGeometryCreator* creator; -// const asset::IMeshManipulator* manipulator; const asset::IAssetLoader::SAssetLoadContext inner; asset::IAssetLoader::IAssetLoaderOverride* override_; + std::function interm_getAssetInHierarchy; CMitsubaMetadata* meta; core::smart_refctd_ptr scene; private: // - core::unordered_map groupCache; + core::unordered_map shapeCache; // - core::unordered_map shapeCache; + core::unordered_map groupCache; #if 0 // stuff that belongs in the Material Compiler backend //image, sampler diff --git a/include/nbl/system/CSystemWin32.h b/include/nbl/system/CSystemWin32.h index 01766ddaa8..7c73525c43 100644 --- a/include/nbl/system/CSystemWin32.h +++ b/include/nbl/system/CSystemWin32.h @@ -2,9 +2,12 @@ #define _NBL_SYSTEM_C_SYSTEM_WIN32_H_INCLUDED_ #include "nbl/system/ISystem.h" +#include "nbl/system/ModuleLookupUtils.h" #ifdef _NBL_PLATFORM_WINDOWS_ +#ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN +#endif #include #include @@ -51,13 +54,7 @@ class NBL_API2 CSystemWin32 : public ISystem #endif ; // legal & on purpose - const auto executableDirectory = []() -> std::filesystem::path - { - wchar_t path[MAX_PATH] = { 0 }; - GetModuleFileNameW(NULL, path, MAX_PATH); - - return std::filesystem::path(path).parent_path(); - }(); + const auto exeDirectory = executableDirectory(); // load from right next to the executable (always be able to override like this) HMODULE res = LoadLibraryExA(dllName, NULL, LOAD_LIBRARY_SEARCH_APPLICATION_DIR); @@ -80,7 +77,7 @@ class NBL_API2 CSystemWin32 : public ISystem // then relative to the executable's directory { - const auto path = std::filesystem::absolute(executableDirectory / requestModulePath).string(); + const auto path = std::filesystem::absolute(exeDirectory / requestModulePath).string(); if (logRequests) printf("[INFO]: Requesting \"%s\" module load with \"%s\" search path...\n", dllName, path.c_str()); @@ -124,4 +121,4 @@ class NBL_API2 CSystemWin32 : public ISystem #endif -#endif \ No newline at end of file +#endif diff --git a/include/nbl/system/IApplicationFramework.h b/include/nbl/system/IApplicationFramework.h index 44d3de0427..53af9c9b94 100644 --- a/include/nbl/system/IApplicationFramework.h +++ b/include/nbl/system/IApplicationFramework.h @@ -15,6 +15,7 @@ #include "nbl/system/CSystemAndroid.h" #include "nbl/system/CSystemLinux.h" #include "nbl/system/CSystemWin32.h" +#include "nbl/system/RuntimeModuleLookup.h" namespace nbl::system { @@ -25,73 +26,33 @@ class IApplicationFramework : public core::IReferenceCounted // this is safe to call multiple times static bool GlobalsInit() { - // TODO: update CMake and rename "DLL" in all of those defines here to "MODULE" or "RUNTIME" - - auto getEnvInstallDirectory = []() - { - const char* sdk = std::getenv("NBL_INSTALL_DIRECTORY"); - - if (sdk) - { - const auto directory = system::path(sdk); - - if (std::filesystem::exists(directory)) - return directory; - } - - return system::path(""); - }; - - constexpr struct - { - std::string_view nabla, dxc; - } module = - { - #ifdef _NBL_SHARED_BUILD_ - _NABLA_DLL_NAME_ - #else - "" - #endif - , - "dxcompiler" - }; - - const auto sdk = getEnvInstallDirectory(); - - struct - { - system::path nabla, dxc; - } install, env, build, rel; - - #if defined(NBL_CPACK_PACKAGE_NABLA_DLL_DIR_ABS_KEY) && defined(NBL_CPACK_PACKAGE_DXC_DLL_DIR_ABS_KEY) - - #if defined(_NABLA_INSTALL_DIR_) - install.nabla = std::filesystem::absolute(system::path(_NABLA_INSTALL_DIR_) / NBL_CPACK_PACKAGE_NABLA_DLL_DIR_ABS_KEY); - install.dxc = std::filesystem::absolute(system::path(_NABLA_INSTALL_DIR_) / NBL_CPACK_PACKAGE_DXC_DLL_DIR_ABS_KEY); - #endif - - env.nabla = sdk / NBL_CPACK_PACKAGE_NABLA_DLL_DIR_ABS_KEY; - env.dxc = sdk / NBL_CPACK_PACKAGE_DXC_DLL_DIR_ABS_KEY; - #endif - - #ifdef _NBL_SHARED_BUILD_ - #if defined(_NABLA_OUTPUT_DIR_) - build.nabla = _NABLA_OUTPUT_DIR_; - #endif - #endif - #if defined(_DXC_DLL_) - build.dxc = path(_DXC_DLL_).parent_path(); - #endif - - #ifdef NBL_CPACK_PACKAGE_NABLA_DLL_DIR - rel.nabla = NBL_CPACK_PACKAGE_NABLA_DLL_DIR; - #endif - - #ifdef NBL_CPACK_PACKAGE_DXC_DLL_DIR - rel.dxc = NBL_CPACK_PACKAGE_DXC_DLL_DIR; - #endif - - auto load = [](std::string_view moduleName, const std::vector& searchPaths) + RuntimeModuleLookup lookup; + + const auto exeDirectory = system::executableDirectory(); + lookup.applyInstallOverrides(exeDirectory); + /* + In the current design build interface and install interface cannot share one lookup set. + + Build lookup may point to host-only output folders while install lookup must stay relocatable. + Mixing them can load stale modules from host build trees and break packaged consumers. + Another big issue is Nabla build-system layout because runtime binaries are emitted into + source-side locations instead of a binary-tree runtime prefix that mirrors install layout. + This makes executable-relative lookup ambiguous and forces a split between build and install lookup modes. + There are more issues caused by this non-unified layout than the ones handled in this file. + + Desired end state is that build outputs follow the same relative runtime layout as install so lookup can stay install-style + for both host build and package consumers while still allowing consumer override paths like "./Libraries". + No interface should ever expose any define that contains an absolute path. + All binaries must be emitted into the build directory and Nabla + should remain fully buildable with a read-only source filesystem. + + I cannot address all of that here because it requires a broader Nabla build-system refactor. + */ + const bool useInstallLookups = lookup.chooseInstallLookupMode(exeDirectory); + lookup.finalizeInstallLookups(useInstallLookups); + + using SearchPaths = std::vector; + const auto load = [](std::string_view moduleName, const SearchPaths& searchPaths) { #ifdef _NBL_PLATFORM_WINDOWS_ const bool isAlreadyLoaded = GetModuleHandleA(moduleName.data()); @@ -114,11 +75,11 @@ class IApplicationFramework : public core::IReferenceCounted return true; }; - if (not load(module.dxc, { install.dxc, env.dxc, build.dxc, rel.dxc })) + if (not load(lookup.dxc.name, useInstallLookups ? SearchPaths{ lookup.dxc.paths.install } : SearchPaths{ lookup.dxc.paths.build })) return false; #ifdef _NBL_SHARED_BUILD_ - if (not load(module.nabla, { install.nabla, env.nabla, build.nabla, rel.nabla })) + if (not load(lookup.nabla.name, useInstallLookups ? SearchPaths{ lookup.nabla.paths.install } : SearchPaths{ lookup.nabla.paths.build })) return false; #endif @@ -234,4 +195,4 @@ class IApplicationFramework : public core::IReferenceCounted } #endif -#endif \ No newline at end of file +#endif diff --git a/include/nbl/system/ILogger.h b/include/nbl/system/ILogger.h index db013ebeb4..72c271e72e 100644 --- a/include/nbl/system/ILogger.h +++ b/include/nbl/system/ILogger.h @@ -61,20 +61,15 @@ class ILogger : public core::IReferenceCounted using namespace std::chrono; auto currentTime = std::chrono::system_clock::now(); const std::time_t t = std::chrono::system_clock::to_time_t(currentTime); - - // Since there is no real way in c++ to get current time with microseconds, this is my weird approach - auto time_since_epoch = duration_cast(system_clock::now().time_since_epoch()); - auto time_since_epoch_s = duration_cast(system_clock::now().time_since_epoch()); - time_since_epoch -= duration_cast(time_since_epoch_s); - - // This while is for the microseconds which are less that 6 digits long to be aligned with the others - while (time_since_epoch.count() / 100000 == 0) time_since_epoch *= 10; auto time = std::localtime(&t); + // since there's no microseconds in `time` + const auto us_fraction = duration_cast(currentTime.time_since_epoch()) - duration_cast(duration_cast(currentTime.time_since_epoch())); + constexpr size_t DATE_STR_LENGTH = 28; std::string timeStr(DATE_STR_LENGTH, '\0'); - sprintf(timeStr.data(), "[%02d.%02d.%d %02d:%02d:%02d:%d]", time->tm_mday, time->tm_mon + 1, 1900 + time->tm_year, time->tm_hour, time->tm_min, time->tm_sec, (int)time_since_epoch.count()); + sprintf(timeStr.data(), "[%02d.%02d.%d %02d:%02d:%02d:%06d]", time->tm_mday, time->tm_mon + 1, 1900 + time->tm_year, time->tm_hour, time->tm_min, time->tm_sec, (int)us_fraction.count()); std::string messageTypeStr; switch (logLevel) diff --git a/include/nbl/system/ModuleLookupUtils.h b/include/nbl/system/ModuleLookupUtils.h new file mode 100644 index 0000000000..c763cc8e30 --- /dev/null +++ b/include/nbl/system/ModuleLookupUtils.h @@ -0,0 +1,114 @@ +#ifndef _NBL_SYSTEM_MODULE_LOOKUP_UTILS_H_INCLUDED_ +#define _NBL_SYSTEM_MODULE_LOOKUP_UTILS_H_INCLUDED_ + +#include "nbl/system/path.h" + +#include +#include +#include +#include + +#if defined(_NBL_PLATFORM_WINDOWS_) +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#include +#endif + +namespace nbl::system +{ +inline bool moduleExistsInDirectory(const system::path& dir, std::string_view moduleName) +{ + if (dir.empty() || moduleName.empty() || !std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) + return false; + + const std::string baseName(moduleName); + const auto hasRegularFile = [&dir](const std::string& fileName) + { + const auto filePath = dir / fileName; + return std::filesystem::exists(filePath) && std::filesystem::is_regular_file(filePath); + }; + + if (hasRegularFile(baseName)) + return true; + + #if defined(_NBL_PLATFORM_WINDOWS_) + if (hasRegularFile(baseName + ".dll")) + return true; + #elif defined(_NBL_PLATFORM_LINUX_) || defined(_NBL_PLATFORM_ANDROID_) + if (hasRegularFile(baseName + ".so")) + return true; + + const bool hasLibPrefix = (baseName.rfind("lib", 0) == 0); + const std::string libBaseName = hasLibPrefix ? baseName : ("lib" + baseName); + if (hasRegularFile(libBaseName + ".so")) + return true; + + const std::string versionedPrefix = libBaseName + ".so."; + std::error_code ec; + for (const auto& entry : std::filesystem::directory_iterator(dir, ec)) + { + if (ec) + break; + if (!entry.is_regular_file(ec)) + continue; + + const auto fileName = entry.path().filename().string(); + if (fileName.rfind(versionedPrefix, 0) == 0) + return true; + } + #elif defined(__APPLE__) + if (hasRegularFile(baseName + ".dylib")) + return true; + + const bool hasLibPrefix = (baseName.rfind("lib", 0) == 0); + if (!hasLibPrefix && hasRegularFile("lib" + baseName + ".dylib")) + return true; + #endif + + return false; +} + +inline system::path executableDirectory() +{ + #if defined(_NBL_PLATFORM_WINDOWS_) + wchar_t modulePath[MAX_PATH] = {}; + const auto length = GetModuleFileNameW(nullptr, modulePath, MAX_PATH); + if ((length == 0) || (length >= MAX_PATH)) + return system::path(""); + return std::filesystem::path(modulePath).parent_path(); + #elif defined(_NBL_PLATFORM_LINUX_) || defined(_NBL_PLATFORM_ANDROID_) + std::error_code ec; + const auto executablePath = std::filesystem::read_symlink("/proc/self/exe", ec); + if (ec) + return system::path(""); + return executablePath.parent_path(); + #else + return system::path(""); + #endif +} + +inline system::path loadedModuleDirectory(std::string_view moduleName) +{ + #if defined(_NBL_PLATFORM_WINDOWS_) + if (moduleName.empty()) + return system::path(""); + + const auto moduleHandle = GetModuleHandleA(moduleName.data()); + if (moduleHandle == nullptr) + return system::path(""); + + wchar_t modulePath[MAX_PATH] = {}; + const auto length = GetModuleFileNameW(moduleHandle, modulePath, MAX_PATH); + if ((length == 0) || (length >= MAX_PATH)) + return system::path(""); + + return std::filesystem::path(modulePath).parent_path(); + #else + // TODO: implement loaded module directory lookup for non-Windows platforms. + return system::path(""); + #endif +} +} + +#endif diff --git a/include/nbl/system/RuntimeModuleLookup.h b/include/nbl/system/RuntimeModuleLookup.h new file mode 100644 index 0000000000..dd6ce35e67 --- /dev/null +++ b/include/nbl/system/RuntimeModuleLookup.h @@ -0,0 +1,244 @@ +#ifndef _NBL_SYSTEM_RUNTIME_MODULE_LOOKUP_H_INCLUDED_ +#define _NBL_SYSTEM_RUNTIME_MODULE_LOOKUP_H_INCLUDED_ + +#include "nbl/system/ModuleLookupUtils.h" + +namespace nbl::system +{ +struct RuntimeModuleLookup final +{ + struct LookupPaths + { + system::path install; + system::path build; + }; + + struct Module + { + LookupPaths paths; + std::string_view name = ""; + std::string_view buildOutputDir = ""; + std::string_view buildDllPath = ""; + std::string_view installOverrideRel = ""; + std::string_view installBuildFallbackRel = ""; + std::string_view runtimeAbsKey = ""; + }; + + bool sharedBuild = false; + bool relocatablePackage = false; + Module nabla; + Module dxc; + + RuntimeModuleLookup() + { + dxc.name = "dxcompiler"; + #if defined(_NBL_SHARED_BUILD_) + sharedBuild = true; + nabla.name = _NABLA_DLL_NAME_; + #endif + #if defined(NBL_RELOCATABLE_PACKAGE) + relocatablePackage = true; + #endif + #if defined(_NABLA_OUTPUT_DIR_) + nabla.buildOutputDir = _NABLA_OUTPUT_DIR_; + #endif + #if defined(_DXC_DLL_) + dxc.buildDllPath = _DXC_DLL_; + #endif + #if defined(NBL_CPACK_PACKAGE_NABLA_DLL_DIR) + nabla.installOverrideRel = NBL_CPACK_PACKAGE_NABLA_DLL_DIR; + #endif + #if defined(NBL_CPACK_PACKAGE_DXC_DLL_DIR) + dxc.installOverrideRel = NBL_CPACK_PACKAGE_DXC_DLL_DIR; + #endif + #if defined(NBL_CPACK_PACKAGE_NABLA_DLL_DIR_BUILD_FALLBACK) + nabla.installBuildFallbackRel = NBL_CPACK_PACKAGE_NABLA_DLL_DIR_BUILD_FALLBACK; + #endif + #if defined(NBL_CPACK_PACKAGE_DXC_DLL_DIR_BUILD_FALLBACK) + dxc.installBuildFallbackRel = NBL_CPACK_PACKAGE_DXC_DLL_DIR_BUILD_FALLBACK; + #endif + #if defined(NBL_CPACK_PACKAGE_NABLA_DLL_DIR_ABS_KEY) + nabla.runtimeAbsKey = NBL_CPACK_PACKAGE_NABLA_DLL_DIR_ABS_KEY; + #endif + #if defined(NBL_CPACK_PACKAGE_DXC_DLL_DIR_ABS_KEY) + dxc.runtimeAbsKey = NBL_CPACK_PACKAGE_DXC_DLL_DIR_ABS_KEY; + #endif + + applyBuildInterfacePaths(); + } + + inline void applyInstallOverrides(const system::path& exeDirectory) + { + if (hasInstallOverride(nabla)) + nabla.paths.install = absoluteFromExe(exeDirectory, nabla.installOverrideRel); + if (hasInstallOverride(dxc)) + dxc.paths.install = absoluteFromExe(exeDirectory, dxc.installOverrideRel); + } + + inline bool chooseInstallLookupMode(const system::path& exeDirectory) + { + if (relocatablePackage) + { + if (!hasUsableInstallPaths()) + { + if (!tryResolveInstallPathsFromPackageLayout(exeDirectory)) + tryResolveInstallPathsFromBuildFallbackHints(exeDirectory); + } + return true; + } + if (hasUsableInstallPaths()) + return true; + if (tryResolveInstallPathsFromPackageLayout(exeDirectory)) + return true; + return tryResolveInstallPathsFromBuildFallbackHints(exeDirectory); + } + + inline void finalizeInstallLookups(bool useInstallLookups) + { + if (!useInstallLookups) + return; + #if defined(_NBL_PLATFORM_WINDOWS_) && defined(_NBL_SHARED_BUILD_) + if (nabla.paths.install.empty()) + nabla.paths.install = loadedModuleDirectory(nabla.name); + #endif + resolveDxcInstallPathFromLoadedNabla(useInstallLookups); + } + + private: + static inline bool hasInstallOverride(const Module& module) + { + return !module.installOverrideRel.empty(); + } + + static inline bool hasRuntimeAbsKey(const Module& module) + { + return !module.runtimeAbsKey.empty(); + } + + inline void applyBuildInterfacePaths() + { + if (sharedBuild && !nabla.buildOutputDir.empty()) + nabla.paths.build = system::path(nabla.buildOutputDir); + if (!dxc.buildDllPath.empty()) + dxc.paths.build = system::path(dxc.buildDllPath).parent_path(); + } + + static inline system::path absoluteFromExe(const system::path& exeDirectory, std::string_view relativePath) + { + if (relativePath.empty() || exeDirectory.empty()) + return system::path(""); + + const auto relPath = system::path(relativePath); + if (relPath.is_absolute()) + return system::path(""); + + return std::filesystem::absolute(exeDirectory / relPath); + } + + inline bool hasUsableInstallPaths() const + { + if (!moduleExistsInDirectory(dxc.paths.install, dxc.name)) + return false; + return !sharedBuild || moduleExistsInDirectory(nabla.paths.install, nabla.name); + } + + inline bool tryResolveInstallPathsFromPrefix(const system::path& candidatePrefix) + { + if (candidatePrefix.empty()) + return false; + if (!hasRuntimeAbsKey(nabla) && !hasRuntimeAbsKey(dxc)) + return false; + + Module candidateNabla = nabla; + Module candidateDxc = dxc; + + if (hasRuntimeAbsKey(nabla)) + candidateNabla.paths.install = std::filesystem::absolute(candidatePrefix / system::path(nabla.runtimeAbsKey)); + if (hasRuntimeAbsKey(dxc)) + candidateDxc.paths.install = std::filesystem::absolute(candidatePrefix / system::path(dxc.runtimeAbsKey)); + + if (!moduleExistsInDirectory(candidateDxc.paths.install, candidateDxc.name)) + return false; + if (sharedBuild && !moduleExistsInDirectory(candidateNabla.paths.install, candidateNabla.name)) + return false; + + nabla.paths.install = candidateNabla.paths.install; + dxc.paths.install = candidateDxc.paths.install; + return true; + } + + inline bool tryResolveInstallPathsFromPackageLayout(const system::path& lookupStartDirectory) + { + if (lookupStartDirectory.empty()) + return false; + if (!hasRuntimeAbsKey(nabla) && !hasRuntimeAbsKey(dxc)) + return false; + + auto candidatePrefix = std::filesystem::absolute(lookupStartDirectory); + while (!candidatePrefix.empty()) + { + if (tryResolveInstallPathsFromPrefix(candidatePrefix)) + return true; + + const auto parent = candidatePrefix.parent_path(); + if (parent == candidatePrefix) + break; + candidatePrefix = parent; + } + return false; + } + + inline bool tryResolveInstallPathsFromBuildFallbackHints(const system::path& exeDirectory) + { + Module candidateNabla = nabla; + Module candidateDxc = dxc; + candidateNabla.paths.install = system::path(""); + candidateDxc.paths.install = system::path(""); + + if (!candidateNabla.installBuildFallbackRel.empty()) + candidateNabla.paths.install = absoluteFromExe(exeDirectory, candidateNabla.installBuildFallbackRel); + if (!candidateDxc.installBuildFallbackRel.empty()) + candidateDxc.paths.install = absoluteFromExe(exeDirectory, candidateDxc.installBuildFallbackRel); + + if (candidateDxc.paths.install.empty() && !candidateNabla.paths.install.empty() && hasRuntimeAbsKey(nabla) && hasRuntimeAbsKey(dxc)) + { + const auto dxcRelToNabla = system::path(dxc.runtimeAbsKey).lexically_relative(system::path(nabla.runtimeAbsKey)); + if (!dxcRelToNabla.empty() && dxcRelToNabla != system::path(".")) + candidateDxc.paths.install = std::filesystem::absolute(candidateNabla.paths.install / dxcRelToNabla); + } + + if (!moduleExistsInDirectory(candidateDxc.paths.install, candidateDxc.name)) + return false; + if (sharedBuild && !moduleExistsInDirectory(candidateNabla.paths.install, candidateNabla.name)) + return false; + + nabla.paths.install = candidateNabla.paths.install; + dxc.paths.install = candidateDxc.paths.install; + return true; + } + + #if defined(_NBL_PLATFORM_WINDOWS_) + inline void resolveDxcInstallPathFromLoadedNabla(bool useInstallLookups) + { + if (!useInstallLookups || !dxc.paths.install.empty()) + return; + if (!(sharedBuild && !nabla.runtimeAbsKey.empty() && !dxc.runtimeAbsKey.empty())) + return; + + const auto nablaRuntimeDir = !nabla.paths.install.empty() ? nabla.paths.install : loadedModuleDirectory(nabla.name); + if (nablaRuntimeDir.empty()) + return; + + const auto dxcRelToNabla = system::path(dxc.runtimeAbsKey).lexically_relative(system::path(nabla.runtimeAbsKey)); + if (!dxcRelToNabla.empty() && dxcRelToNabla != system::path(".")) + dxc.paths.install = std::filesystem::absolute(nablaRuntimeDir / dxcRelToNabla); + } + #else + inline void resolveDxcInstallPathFromLoadedNabla(bool) + { + } + #endif +}; +} + +#endif diff --git a/include/nbl/system/json.h b/include/nbl/system/json.h new file mode 100644 index 0000000000..1fdd0abf47 --- /dev/null +++ b/include/nbl/system/json.h @@ -0,0 +1,15 @@ +#ifndef _NBL_SYSTEM_JSON_H_INCLUDED_ +#define _NBL_SYSTEM_JSON_H_INCLUDED_ + +namespace nbl::system::json { + template struct adl_serializer; +} + +#define NBL_JSON_IMPL_BIND_ADL_SERIALIZER(T) \ +namespace nlohmann { \ + template<> \ + struct adl_serializer \ + : T {}; \ +} + +#endif // _NBL_SYSTEM_JSON_H_INCLUDED_ \ No newline at end of file diff --git a/include/nbl/video/IGPUAccelerationStructure.h b/include/nbl/video/IGPUAccelerationStructure.h index 3c10a255a2..7da33e2cfa 100644 --- a/include/nbl/video/IGPUAccelerationStructure.h +++ b/include/nbl/video/IGPUAccelerationStructure.h @@ -289,8 +289,9 @@ class IGPUBottomLevelAccelerationStructure : public asset::IBottomLevelAccelerat totalPrims += buildRangeInfo.primitiveCount; return true; } - - inline core::smart_refctd_ptr* fillTracking(core::smart_refctd_ptr* oit) const + + template // TODO: requires + inline ForwardIterator fillTracking(ForwardIterator oit) const { *(oit++) = core::smart_refctd_ptr(Base::scratch.buffer); if (Base::isUpdate) @@ -486,7 +487,8 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr return retval; } - inline core::smart_refctd_ptr* fillTracking(core::smart_refctd_ptr* oit) const + template // TODO: requires + inline ForwardIterator fillTracking(ForwardIterator oit) const { *(oit++) = core::smart_refctd_ptr(Base::scratch.buffer); if (Base::isUpdate) @@ -713,8 +715,8 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr *(tracked++) = *(it++); } // Useful if TLAS got built externally as well - template - inline void insertTrackedBLASes(const Iterator begin, const Iterator end, const build_ver_t buildVer) + template + inline void insertTrackedBLASes(ForwardIterator begin, const uint32_t count, const build_ver_t buildVer) { if (buildVer==0) return; @@ -725,14 +727,19 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr for (auto it=std::next(prev); it!=m_pendingBuilds.end()&&it->ordinal>buildVer; prev=it++) {} auto inserted = m_pendingBuilds.emplace_after(prev); // now fill the contents - inserted->BLASes.insert(begin,end); + inserted->BLASes.reserve(count); + for (auto i=0u; iBLASes.insert(*begin); + ++begin; + } inserted->ordinal = buildVer; } - template - inline build_ver_t pushTrackedBLASes(const Iterator begin, const Iterator end) + template + inline build_ver_t pushTrackedBLASes(const ForwardIterator begin, const uint32_t count) { const auto buildVer = registerNextBuildVer(); - insertTrackedBLASes(begin,end,buildVer); + insertTrackedBLASes(begin,count,buildVer); return buildVer; } // a little utility to make sure nothing from before this build version gets tracked @@ -750,18 +757,9 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr const uint32_t m_maxInstanceCount; private: - struct DynamicUpCastingSpanIterator - { - inline bool operator!=(const DynamicUpCastingSpanIterator& other) const {return ptr!=other.ptr;} - - inline DynamicUpCastingSpanIterator operator++() {return {ptr++};} - - inline const IGPUBottomLevelAccelerationStructure* operator*() const {return dynamic_cast(ptr->get());} - - std::span>::iterator ptr; - }; friend class ILogicalDevice; friend class IQueue; + inline const core::unordered_set* getPendingBuildTrackedBLASes(const build_ver_t buildVer) const { const auto found = std::find_if(m_pendingBuilds.begin(),m_pendingBuilds.end(),[buildVer](const auto& item)->bool{return item.ordinal==buildVer;}); diff --git a/include/nbl/video/IGPUCommandBuffer.h b/include/nbl/video/IGPUCommandBuffer.h index 6b3bfef18c..8f0f1fce30 100644 --- a/include/nbl/video/IGPUCommandBuffer.h +++ b/include/nbl/video/IGPUCommandBuffer.h @@ -552,9 +552,12 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject { auto oit = reserveReferences(std::distance(begin,end)); if (oit) - while (begin!=end) - *(oit++) = core::smart_refctd_ptr(*(begin++)); - return oit; + { + while (begin!=end) + *(oit++) = core::smart_refctd_ptr(*(begin++)); + return true; + } + return false; } inline bool recordReferences(const std::span refs) {return recordReferences(refs.begin(),refs.end());} @@ -569,8 +572,9 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject m_TLASTrackingOps.emplace_back(TLASTrackingWrite{.src={oit,size},.dst=tlas}); while (beginBLASes!=endBLASes) *(oit++) = core::smart_refctd_ptr(*(beginBLASes++)); + return true; } - return oit; + return false; } virtual bool insertDebugMarker(const char* name, const core::vector4df_SIMD& color = core::vector4df_SIMD(1.0, 1.0, 1.0, 1.0)) = 0; @@ -885,7 +889,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject template requires nbl::is_any_of_v bool invalidDrawIndirectCount(const asset::SBufferBinding& indirectBinding, const asset::SBufferBinding& countBinding, const uint32_t maxDrawCount, const uint32_t stride); - core::smart_refctd_ptr* reserveReferences(const uint32_t size); + IGPUCommandPool::CTrackedIterator reserveReferences(const uint32_t size); // This bound descriptor set record doesn't include the descriptor sets whose layout has _any_ one of its bindings // created with IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT @@ -896,7 +900,9 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject // The Command Pool already tracks resources referenced in the Build Infos or Copies From Memory (Deserializations), so we only need pointers into those records. struct TLASTrackingWrite { - std::span> src; + // TODO: pack a little more efficiently so we can recover `CTrackedIterator` more easily + IGPUCommandPool::CTrackedIterator srcBegin; + uint32_t count; IGPUTopLevelAccelerationStructure* dst; }; struct TLASTrackingCopy diff --git a/include/nbl/video/IGPUCommandPool.h b/include/nbl/video/IGPUCommandPool.h index ddc4fcfd5c..56a0c46008 100644 --- a/include/nbl/video/IGPUCommandPool.h +++ b/include/nbl/video/IGPUCommandPool.h @@ -80,6 +80,7 @@ class IGPUCommandPool : public IBackendObject virtual const void* getNativeHandle() const = 0; // Host access to Command Pools needs to be externally synchronized anyway so its completely fine to do this + // TODO: abstract it away, useful in other externally synchronised contexts template class StackAllocation final { @@ -157,6 +158,88 @@ class IGPUCommandPool : public IBackendObject class CTraceRaysIndirectCmd; class CBindRayTracingPipelineCmd; + class IVariableSizeCommandBase; + class CExtraResourceTrackingBlock; + class CTrackedIterator final + { + public: + using value_t = core::smart_refctd_ptr; + + private: + IVariableSizeCommandBase* m_cmd = nullptr; + value_t* m_res = nullptr; + + public: + inline CTrackedIterator() {} + inline CTrackedIterator(IVariableSizeCommandBase* beginCmd) : m_cmd(beginCmd), m_res(m_cmd ? m_cmd->getLocalResources():nullptr) {} + + explicit inline operator bool() const {return m_cmd && m_res && m_resgetLocalResources()+m_cmd->getLocalResourceCount();} + inline bool operator!=(const CTrackedIterator& other) const + { + const bool selfInvalid = bool(*this); + if (selfInvalid!=bool(other)) + return true; + if (selfInvalid) + return false; + return m_cmd!=other.m_cmd || m_res!=other.m_res; + } + + inline value_t& operator*() + { + assert(bool(*this)); + return *m_res; + } + inline const value_t& operator*() const + { + assert(bool(*this)); + return *m_res; + } + + inline CTrackedIterator operator+(uint32_t advance) const + { + CTrackedIterator retval = *this; + if (bool(*this)) + { + auto* const localRes = m_cmd->getLocalResources(); + assert(m_res>=localRes); + uint32_t localPos = m_res-localRes; + if (const auto localCount=m_cmd->getLocalResourceCount(); localPos+advancem_next; + if (!retval.m_cmd) + { + retval.m_res = nullptr; + break; + } + retval.m_res = retval.m_cmd->getLocalResources(); + if (advance&& dev, const core::bitflag _flags, const uint8_t _familyIx) : IBackendObject(std::move(dev)), m_scratchAlloc(nullptr,0u,0u,_NBL_SIMD_ALIGNMENT,SCRATCH_MEMORY_SIZE), m_flags(_flags), m_familyIx(_familyIx) {} @@ -168,9 +251,24 @@ class IGPUCommandPool : public IBackendObject // for access to what? friend class IGPUCommandBuffer; + struct DynamicBLASCastingIterator + { + inline bool operator!=(const DynamicBLASCastingIterator& other) const { return orig != other.orig; } + inline DynamicBLASCastingIterator operator++() {return {++orig};} + + inline core::smart_refctd_ptr operator*() const + { + return core::smart_refctd_ptr_dynamic_cast(*orig); + } + + IGPUCommandPool::CTrackedIterator orig; + }; + friend class ILogicalDevice; + friend class IQueue; + class CCommandSegment; - class alignas(COMMAND_ALIGNMENT) ICommand + class ICommand { friend class CCommandSegment; @@ -203,55 +301,90 @@ class IGPUCommandPool : public IBackendObject void operator delete( ICommand* ptr, std::destroying_delete_t, std::size_t sz, std::align_val_t al ) { ptr->~ICommand(); } - private: - - friend CCommandSegment; - const uint32_t m_size; + // 4 bytes unused }; - template - class NBL_FORCE_EBO IFixedSizeCommand : public ICommand + class IFixedSizeCommand : public ICommand { public: template - static uint32_t calc_size(const Args&...) + static inline uint32_t calc_size(const Args&...) { static_assert(std::is_final_v); + //static_assert(sizeof(CRTP)<=CCommandSegment::STORAGE_SIZE); return sizeof(CRTP); } - virtual ~IFixedSizeCommand() = default; + inline virtual ~IFixedSizeCommand() = default; protected: inline IFixedSizeCommand() : ICommand(calc_size()) {} }; - template - class NBL_FORCE_EBO IVariableSizeCommand : public ICommand + // I can't refactor this into a base class for tracking handles, cause I want them to live at the end :( + class CCommandSegmentListPool; + class IVariableSizeCommandBase : public ICommand { public: - template - static uint32_t calc_size(const Args&... args) + inline virtual ~IVariableSizeCommandBase() { - static_assert(std::is_final_v); - return core::alignUp(sizeof(CRTP)+CRTP::calc_resources(args...)*sizeof(core::smart_refctd_ptr),alignof(CRTP)); + std::destroy_n(getLocalResources(),getLocalResourceCount()); + } + + inline uint32_t getLocalResourceCount() const {return reinterpret_cast(static_cast(this)+1)[-1];} + + protected: + struct SConstructionParams + { + uint32_t size; + uint32_t resources; + }; + static SConstructionParams calc_size(const uint32_t memoryLeft, const uint32_t thisSize, const uint32_t resourceCount) + { + SConstructionParams retval = {.size=std::min(thisSize+resourceCount*sizeof(CTrackedIterator::value_t),memoryLeft)}; + retval.resources = (retval.size-thisSize)/sizeof(CTrackedIterator::value_t); + return retval; } - virtual ~IVariableSizeCommand() + inline IVariableSizeCommandBase(const SConstructionParams& param) : ICommand(param.size), m_next(nullptr) { - std::destroy_n(getVariableCountResources(),m_resourceCount); + reinterpret_cast(static_cast(this)+1)[-1] = param.resources; + std::uninitialized_default_construct_n(getLocalResources(),getLocalResourceCount()); } - inline core::smart_refctd_ptr* getVariableCountResources() { return reinterpret_cast*>(static_cast(this)+1); } - protected: + private: + friend class CTrackedIterator; + friend class CCommandSegmentListPool; + + // methods for the iterator + inline CTrackedIterator::value_t* getLocalResources() + { + return reinterpret_cast(reinterpret_cast(this)+getSize())-getLocalResourceCount(); + } + inline const CTrackedIterator::value_t* getLocalResources() const + { + CTrackedIterator::value_t* retval = const_cast(this)->getLocalResources(); + return retval; + } + + CExtraResourceTrackingBlock* m_next; + }; + template + class IVariableSizeCommand : public IVariableSizeCommandBase + { + public: template - inline IVariableSizeCommand(const Args&... args) : ICommand(calc_size(args...)), m_resourceCount(CRTP::calc_resources(args...)) + static SConstructionParams calc_size(const uint32_t memoryLeft, const Args&... args) { - std::uninitialized_default_construct_n(getVariableCountResources(),m_resourceCount); + static_assert(std::is_final_v); + static_assert(alignof(CRTP)>=alignof(CTrackedIterator::value_t)); + return IVariableSizeCommandBase::calc_size(memoryLeft,sizeof(CRTP),CRTP::calc_resources(args...)); } - const uint32_t m_resourceCount; + protected: + template + inline IVariableSizeCommand(const uint32_t memoryLeft, const Args&... args) : IVariableSizeCommandBase(calc_size(memoryLeft,args...)) {} }; class alignas(COMMAND_SEGMENT_ALIGNMENT) CCommandSegment @@ -269,19 +402,20 @@ class IGPUCommandPool : public IBackendObject } m_header; public: - static inline constexpr uint32_t STORAGE_SIZE = COMMAND_SEGMENT_SIZE - core::roundUp(sizeof(header_t), alignof(ICommand)); + static inline constexpr uint32_t STORAGE_SIZE = COMMAND_SEGMENT_SIZE - core::roundUp(sizeof(header_t),alignof(ICommand)); - CCommandSegment(CCommandSegment* prev): + inline CCommandSegment(CCommandSegment* prev): m_header(nullptr, 0u, 0u, alignof(ICommand), STORAGE_SIZE) { - static_assert(alignof(ICommand) == COMMAND_SEGMENT_ALIGNMENT); + static_assert(alignof(ICommand) <= COMMAND_ALIGNMENT); + static_assert(COMMAND_ALIGNMENT <= COMMAND_SEGMENT_ALIGNMENT); wipeNextCommandSize(); if (prev) prev->m_header.next = this; } - ~CCommandSegment() + inline ~CCommandSegment() { for (ICommand* cmd = begin(); cmd != end();) { @@ -297,8 +431,12 @@ class IGPUCommandPool : public IBackendObject template Cmd* allocate(const Args&... args) { - const uint32_t cmdSize = Cmd::calc_size(args...); - const auto address = m_header.commandAllocator.alloc_addr(cmdSize, alignof(Cmd)); + uint32_t cmdSize; + if constexpr (std::is_base_of_v) + cmdSize = Cmd::calc_size(args...).size; + else + cmdSize = Cmd::calc_size(args...); + const auto address = m_header.commandAllocator.alloc_addr(cmdSize,alignof(Cmd)); if (address == decltype(m_header.commandAllocator)::invalid_address) return nullptr; @@ -308,6 +446,8 @@ class IGPUCommandPool : public IBackendObject return cmdMem; } + inline uint32_t max_size() const {return m_header.commandAllocator.max_size();} + inline CCommandSegment* getNext() const { return m_header.next; } inline CCommandSegment* getNextHead() const { return m_header.nextHead; } inline CCommandSegment* getPrevHead() const { return m_header.prevHead; } @@ -337,7 +477,7 @@ class IGPUCommandPool : public IBackendObject void wipeNextCommandSize() { const auto nextCmdOffset = m_header.commandAllocator.get_allocated_size(); - const auto wipeEnd = nextCmdOffset + offsetof(IGPUCommandPool::ICommand, m_size) + sizeof(IGPUCommandPool::ICommand::m_size); + const auto wipeEnd = nextCmdOffset + offsetof(IGPUCommandPool::ICommand,m_size) + sizeof(IGPUCommandPool::ICommand::m_size); if (wipeEnd < m_header.commandAllocator.get_total_size()) *(const_cast(&(reinterpret_cast(m_data + nextCmdOffset)->m_size))) = 0; } @@ -345,6 +485,23 @@ class IGPUCommandPool : public IBackendObject static_assert(sizeof(CCommandSegment)==COMMAND_SEGMENT_SIZE); private: + class CExtraResourceTrackingBlock final : public IVariableSizeCommandBase + { + public: + static SConstructionParams calc_size(const uint32_t extraResourceCount) + { + static_assert(alignof(CExtraResourceTrackingBlock)>=alignof(CTrackedIterator::value_t)); + return IVariableSizeCommandBase::calc_size(CCommandSegment::STORAGE_SIZE,sizeof(CExtraResourceTrackingBlock),extraResourceCount); + } + + // this command will always be created at the start of a new segment, the whole reason it exists is because previous command has overflown the segment + inline CExtraResourceTrackingBlock(const uint32_t extraResourceCount) : IVariableSizeCommandBase(calc_size(extraResourceCount)) {} + + static inline uint32_t calc_resources(const uint32_t extraResourceCount) + { + return extraResourceCount; + } + }; class CCommandSegmentListPool { public: @@ -354,7 +511,7 @@ class IGPUCommandPool : public IBackendObject CCommandSegment* tail = nullptr; }; - CCommandSegmentListPool() : m_pool(COMMAND_SEGMENTS_PER_BLOCK*COMMAND_SEGMENT_SIZE, 0u, MAX_COMMAND_SEGMENT_BLOCK_COUNT, MIN_POOL_ALLOC_SIZE) {} + inline CCommandSegmentListPool() : m_pool(COMMAND_SEGMENTS_PER_BLOCK*COMMAND_SEGMENT_SIZE, 0u, MAX_COMMAND_SEGMENT_BLOCK_COUNT, MIN_POOL_ALLOC_SIZE) {} template Cmd* emplace(SCommandSegmentList& list, Args&&... args) @@ -362,14 +519,29 @@ class IGPUCommandPool : public IBackendObject if (!list.tail && !appendToList(list)) return nullptr; + constexpr bool IsVariableSize = std::is_base_of_v,Cmd>; + uint32_t resourcesLeft = 0u; + if constexpr (IsVariableSize) + resourcesLeft = Cmd::calc_resources(args...); // not forwarding twice because newCmd() will never be called the second time auto newCmd = [&]() -> Cmd* { - auto cmdMem = list.tail->allocate(args...); + Cmd* cmdMem; + uint32_t unallocatedSize; + if constexpr (IsVariableSize) + { + unallocatedSize = list.tail->max_size(); + cmdMem = list.tail->allocate(unallocatedSize,args...); + } + else + cmdMem = list.tail->allocate(args...); if (!cmdMem) return nullptr; - return new (cmdMem) Cmd(std::forward(args)...); + if constexpr (IsVariableSize) + return new (cmdMem) Cmd(unallocatedSize,std::forward(args)...); + else + return new (cmdMem) Cmd(std::forward(args)...); }; auto cmd = newCmd(); @@ -379,10 +551,21 @@ class IGPUCommandPool : public IBackendObject return nullptr; cmd = newCmd(); - if (!cmd) + assert(cmd); + } + + // now handle segmenting the tracked resources + if constexpr (IsVariableSize) + { + for (IVariableSizeCommandBase* prev=cmd; (resourcesLeft-=prev->getLocalResourceCount())!=0u; ) { - assert(false); - return nullptr; + if (!appendToList(list)) + return nullptr; + auto* const mem = list.tail->allocate(resourcesLeft); + assert(mem); + auto* const extra = new (mem) CExtraResourceTrackingBlock(resourcesLeft); + prev->m_next = extra; + prev = extra; } } @@ -398,13 +581,13 @@ class IGPUCommandPool : public IBackendObject if (head == m_head) m_head = head->getNextHead(); - CCommandSegment::linkHeads(head->getPrevHead(), head->getNextHead()); + CCommandSegment::linkHeads(head->getPrevHead(),head->getNextHead()); for (auto& segment = head; segment;) { auto nextSegment = segment->getNext(); segment->~CCommandSegment(); - m_pool.deallocate(segment, COMMAND_SEGMENT_SIZE); + m_pool.deallocate(segment,COMMAND_SEGMENT_SIZE); segment = nextSegment; } } @@ -462,7 +645,7 @@ class IGPUCommandPool : public IBackendObject class IGPUCommandPool::CBindIndexBufferCmd final : public IFixedSizeCommand { public: - CBindIndexBufferCmd(core::smart_refctd_ptr&& indexBuffer) : m_indexBuffer(std::move(indexBuffer)) {} + inline CBindIndexBufferCmd(core::smart_refctd_ptr&& indexBuffer) : m_indexBuffer(std::move(indexBuffer)) {} private: core::smart_refctd_ptr m_indexBuffer; @@ -471,7 +654,7 @@ class IGPUCommandPool::CBindIndexBufferCmd final : public IFixedSizeCommand { public: - CIndirectCmd(core::smart_refctd_ptr&& buffer) : m_buffer(std::move(buffer)) {} + inline CIndirectCmd(core::smart_refctd_ptr&& buffer) : m_buffer(std::move(buffer)) {} private: core::smart_refctd_ptr m_buffer; @@ -480,7 +663,7 @@ class IGPUCommandPool::CIndirectCmd final : public IFixedSizeCommand { public: - CDrawIndirectCountCmd(core::smart_refctd_ptr&& buffer, core::smart_refctd_ptr&& countBuffer) + inline CDrawIndirectCountCmd(core::smart_refctd_ptr&& buffer, core::smart_refctd_ptr&& countBuffer) : m_buffer(std::move(buffer)), m_countBuffer(std::move(countBuffer)) {} @@ -503,9 +686,10 @@ class IGPUCommandPool::CBeginRenderPassCmd final : public IFixedSizeCommand { public: - CPipelineBarrierCmd(const uint32_t bufferCount, const uint32_t imageCount) : IVariableSizeCommand(bufferCount,imageCount) {} + inline CPipelineBarrierCmd(const uint32_t memoryLeft, const uint32_t bufferCount, const uint32_t imageCount) : + IVariableSizeCommand(memoryLeft,bufferCount,imageCount) {} - static uint32_t calc_resources(const uint32_t bufferCount, const uint32_t imageCount) + static inline uint32_t calc_resources(const uint32_t bufferCount, const uint32_t imageCount) { return bufferCount+imageCount; } @@ -514,7 +698,7 @@ class IGPUCommandPool::CPipelineBarrierCmd final : public IVariableSizeCommand { public: - CBindDescriptorSetsCmd(core::smart_refctd_ptr&& pipelineLayout, const uint32_t setCount, const IGPUDescriptorSet* const* const sets) + inline CBindDescriptorSetsCmd(core::smart_refctd_ptr&& pipelineLayout, const uint32_t setCount, const IGPUDescriptorSet* const* const sets) : m_layout(std::move(pipelineLayout)) { for (auto i = 0; i < setCount; ++i) @@ -532,7 +716,7 @@ class IGPUCommandPool::CBindDescriptorSetsCmd final : public IFixedSizeCommand { public: - CBindComputePipelineCmd(core::smart_refctd_ptr&& pipeline) : m_pipeline(std::move(pipeline)) {} + inline CBindComputePipelineCmd(core::smart_refctd_ptr&& pipeline) : m_pipeline(std::move(pipeline)) {} private: core::smart_refctd_ptr m_pipeline; @@ -679,7 +863,7 @@ class IGPUCommandPool::CCopyImageToBufferCmd final : public IFixedSizeCommand { public: - CExecuteCommandsCmd(const uint32_t count) : IVariableSizeCommand(count) {} + CExecuteCommandsCmd(const uint32_t memoryLeft, const uint32_t count) : IVariableSizeCommand(memoryLeft,count) {} static uint32_t calc_resources(const uint32_t count) { @@ -690,7 +874,7 @@ class IGPUCommandPool::CExecuteCommandsCmd final : public IVariableSizeCommand { public: - CCustomReferenceCmd(const uint32_t count) : IVariableSizeCommand(count) {} + CCustomReferenceCmd(const uint32_t memoryLeft, const uint32_t count) : IVariableSizeCommand(memoryLeft,count) {} static uint32_t calc_resources(const uint32_t count) { @@ -701,22 +885,13 @@ class IGPUCommandPool::CCustomReferenceCmd final : public IVariableSizeCommand { public: - CWaitEventsCmd(const uint32_t eventCount, IEvent *const *const events, const uint32_t totalBufferCount, const uint32_t totalImageCount) - : IVariableSizeCommand(eventCount,events,totalBufferCount,totalImageCount), m_eventCount(eventCount) - { - for (auto i=0u; i(events[i]); - } - - inline core::smart_refctd_ptr* getDeviceMemoryBacked() {return reinterpret_cast*>(getVariableCountResources()+m_eventCount);} + CWaitEventsCmd(const uint32_t memoryLeft, const uint32_t eventCount, const uint32_t totalBufferCount, const uint32_t totalImageCount) + : IVariableSizeCommand(memoryLeft,eventCount,totalBufferCount,totalImageCount) {} - static uint32_t calc_resources(const uint32_t eventCount, const IEvent *const *const, const uint32_t totalBufferCount, const uint32_t totalImageCount) + static uint32_t calc_resources(const uint32_t eventCount, const uint32_t totalBufferCount, const uint32_t totalImageCount) { return eventCount+totalBufferCount+totalImageCount; } - - private: - const uint32_t m_eventCount; }; class IGPUCommandPool::CCopyImageCmd final : public IFixedSizeCommand @@ -790,9 +965,9 @@ class IGPUCommandPool::CWriteAccelerationStructurePropertiesCmd final : public I // If we take queryPool as rvalue ref here (core::smart_refctd_ptr&&), in calc_size it will become const core::smart_refctd_ptr // because calc_size takes its arguments by const ref (https://github.com/Devsh-Graphics-Programming/Nabla/blob/04fcae3029772cbc739ccf6ba80f72e6e12f54e8/include/nbl/video/IGPUCommandPool.h#L76) // , that means we will not be able to pass a core::smart_refctd_ptr when emplacing the command. So instead, we take a raw pointer and create refctd pointers here. - CWriteAccelerationStructurePropertiesCmd(const IQueryPool* queryPool, const uint32_t accelerationStructureCount) - : IVariableSizeCommand(queryPool,accelerationStructureCount), m_queryPool(core::smart_refctd_ptr(queryPool)) - {} + CWriteAccelerationStructurePropertiesCmd(const uint32_t memoryLeft, const IQueryPool* queryPool, const uint32_t accelerationStructureCount) + : IVariableSizeCommand(memoryLeft,queryPool,accelerationStructureCount), + m_queryPool(core::smart_refctd_ptr(queryPool)) {} static uint32_t calc_resources(const IQueryPool* queryPool, const uint32_t accelerationStructureCount) { @@ -806,7 +981,7 @@ class IGPUCommandPool::CWriteAccelerationStructurePropertiesCmd final : public I class IGPUCommandPool::CBuildAccelerationStructuresCmd final : public IVariableSizeCommand { public: - inline CBuildAccelerationStructuresCmd(const uint32_t resourceCount) : IVariableSizeCommand(resourceCount) {} + inline CBuildAccelerationStructuresCmd(const uint32_t memoryLeft, const uint32_t resourceCount) : IVariableSizeCommand(memoryLeft,resourceCount) {} static inline uint32_t calc_resources(const uint32_t resourceCount) { diff --git a/include/nbl/video/IGPUPipeline.h b/include/nbl/video/IGPUPipeline.h index c22ad998db..00a8f71414 100644 --- a/include/nbl/video/IGPUPipeline.h +++ b/include/nbl/video/IGPUPipeline.h @@ -90,7 +90,7 @@ class IGPUPipelineBase { .count = dataSize ? static_cast(count) : 0, .dataSize = static_cast(dataSize), }; - return *retval; + return bool(*retval); } const asset::IShader* shader = nullptr; diff --git a/include/nbl/video/ILogicalDevice.h b/include/nbl/video/ILogicalDevice.h index 180342e2d4..ae351fdecd 100644 --- a/include/nbl/video/ILogicalDevice.h +++ b/include/nbl/video/ILogicalDevice.h @@ -410,7 +410,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe // struct AccelerationStructureBuildSizes { - inline operator bool() const { return accelerationStructureSize!=(~0ull); } + explicit inline operator bool() const { return accelerationStructureSize!=(~0ull); } size_t accelerationStructureSize = ~0ull; size_t updateScratchSize = ~0ull; @@ -831,6 +831,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe asset::IShaderCompiler::CCache* writeCache = nullptr; std::span extraDefines = {}; hlsl::ShaderStage stage = hlsl::ShaderStage::ESS_ALL_OR_LIBRARY; + core::bitflag debugInfoFlags = asset::IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_NONE; }; core::smart_refctd_ptr compileShader(const SShaderCreationParameters& creationParams); diff --git a/include/nbl/video/SPipelineCreationParams.h b/include/nbl/video/SPipelineCreationParams.h index 3a25560ae4..0971102599 100644 --- a/include/nbl/video/SPipelineCreationParams.h +++ b/include/nbl/video/SPipelineCreationParams.h @@ -14,7 +14,7 @@ namespace nbl::video struct SSpecializationValidationResult { constexpr static inline uint32_t Invalid = ~0u; - inline operator bool() const + explicit inline operator bool() const { return count!=Invalid && dataSize!=Invalid; } diff --git a/include/nbl/video/utilities/CAssetConverter.h b/include/nbl/video/utilities/CAssetConverter.h index a360e3b0f5..0b47fa7229 100644 --- a/include/nbl/video/utilities/CAssetConverter.h +++ b/include/nbl/video/utilities/CAssetConverter.h @@ -1048,7 +1048,7 @@ class CAssetConverter : public core::IReferenceCounted } // - inline operator bool() const {return bool(m_converter);} + explicit inline operator bool() const {return bool(m_converter);} // Until `convert` is called, the Buffers and Images are not filled with content and Acceleration Structures are not built, unless found in the `SInput::readCache` // WARNING: The Acceleration Structure Pointer WILL CHANGE after calling `convert` if its patch dictates that it will be compacted! (since AS can't resize) @@ -1056,6 +1056,16 @@ class CAssetConverter : public core::IReferenceCounted template std::span> getGPUObjects() const {return std::get>(m_gpuObjects);} + // after a successful conversion you can move the GPU objects over + template + bool moveGPUObjects(vector_t& out) + { + if (m_converter) + return false; + out = std::move(std::get>(m_gpuObjects)); + return true; + } + // If you ever need to look up the content hashes of the assets AT THE TIME you converted them // REMEMBER it can have stale hashes (asset or its dependants mutated since hash computed), // then you can get hash mismatches or plain wrong hashes. @@ -1106,7 +1116,7 @@ class CAssetConverter : public core::IReferenceCounted // we don't insert into the writeCache until conversions are successful core::tuple_transform_t m_stagingCaches; - // converted IShaders do not have any object that hold a smartptr into them, so we have to persist them in this vector to prevent m_stagingCacheds hold a raw dangling pointer into them + // converted IShaders do not have any object that hold a smartptr into them, so we have to persist them in this vector to prevent m_stagingCacheds hold a raw dangling pointer into them core::vector> m_shaders; // need a more explicit list of GPU objects that need device-assisted conversion diff --git a/include/nbl/video/utilities/CSmoothResizeSurface.h b/include/nbl/video/utilities/CSmoothResizeSurface.h index 4d3a243b90..017f1cd2d4 100644 --- a/include/nbl/video/utilities/CSmoothResizeSurface.h +++ b/include/nbl/video/utilities/CSmoothResizeSurface.h @@ -107,7 +107,7 @@ class NBL_API2 ISmoothResizeSurface : public ISimpleManagedSurface struct SCachedPresentInfo { - inline operator bool() const {return source.image && waitSemaphore && waitValue && pPresentSemaphoreWaitValue;} + explicit inline operator bool() const {return source.image && waitSemaphore && waitValue && pPresentSemaphoreWaitValue;} SPresentSource source = {}; // only allow waiting for one semaphore, because there's only one source to present! diff --git a/include/nbl/video/utilities/IUtilities.h b/include/nbl/video/utilities/IUtilities.h index 7817df8d23..f52d5d36ef 100644 --- a/include/nbl/video/utilities/IUtilities.h +++ b/include/nbl/video/utilities/IUtilities.h @@ -37,8 +37,10 @@ class NBL_API2 IUtilities : public core::IReferenceCounted , m_allocationAlignment(allocationAlignment) , m_allocationAlignmentForBufferImageCopy(allocationAlignmentForBufferImageCopy) { - m_defaultDownloadBuffer->getBuffer()->setObjectDebugName(("Default Download Buffer of Utilities "+std::to_string(ptrdiff_t(this))).c_str()); - m_defaultUploadBuffer->getBuffer()->setObjectDebugName(("Default Upload Buffer of Utilities "+std::to_string(ptrdiff_t(this))).c_str()); + if (m_defaultDownloadBuffer) + m_defaultDownloadBuffer->getBuffer()->setObjectDebugName(("Default Download Buffer of Utilities "+std::to_string(ptrdiff_t(this))).c_str()); + if (m_defaultUploadBuffer) + m_defaultUploadBuffer->getBuffer()->setObjectDebugName(("Default Upload Buffer of Utilities "+std::to_string(ptrdiff_t(this))).c_str()); } IUtilities() = delete; @@ -94,6 +96,7 @@ class NBL_API2 IUtilities : public core::IReferenceCounted core::smart_refctd_ptr > defaultDownloadBuffer = nullptr; // Try Create Download Buffer + if (downstreamSize > 0u) { IGPUBuffer::SCreationParams streamingBufferCreationParams = {}; streamingBufferCreationParams.size = downstreamSize; @@ -127,6 +130,7 @@ class NBL_API2 IUtilities : public core::IReferenceCounted defaultDownloadBuffer = core::make_smart_refctd_ptr>(asset::SBufferRange{0ull,downstreamSize,std::move(buffer)},maxStreamingBufferAllocationAlignment,minStreamingBufferAllocationSize); } // Try Create Upload Buffer + if (upstreamSize > 0u) { IGPUBuffer::SCreationParams streamingBufferCreationParams = {}; streamingBufferCreationParams.size = upstreamSize; @@ -374,6 +378,11 @@ class NBL_API2 IUtilities : public core::IReferenceCounted //! * data must not be nullptr inline bool updateBufferRangeViaStagingBuffer(SIntendedSubmitInfo& nextSubmit, const asset::SBufferRange& bufferRange, IUpstreamingDataProducer& callback) { + if (!m_defaultUploadBuffer) + { + m_logger.log("no staging buffer available for upload. check `upstreamSize` passed to `IUtilities::create`",system::ILogger::ELL_ERROR); + return false; + } if (!bufferRange.isValid() || !bufferRange.buffer->getCreationParams().usage.hasFlags(asset::IBuffer::EUF_TRANSFER_DST_BIT)) { m_logger.log("Invalid `bufferRange` or buffer has no `EUF_TRANSFER_DST_BIT` usage flag, cannot `updateBufferRangeViaStagingBuffer`!", system::ILogger::ELL_ERROR); @@ -623,6 +632,11 @@ class NBL_API2 IUtilities : public core::IReferenceCounted template requires std::is_same_v, SIntendedSubmitInfo> inline bool downloadBufferRangeViaStagingBuffer(const std::function& consumeCallback, IntendedSubmitInfo&& nextSubmit, const asset::SBufferRange& srcBufferRange) { + if (!m_defaultDownloadBuffer) + { + m_logger.log("no staging buffer available for download. check `downstreamSize` passed to `IUtilities::create`",system::ILogger::ELL_ERROR); + return false; + } if (!srcBufferRange.isValid() || !srcBufferRange.buffer->getCreationParams().usage.hasFlags(asset::IBuffer::EUF_TRANSFER_SRC_BIT)) { m_logger.log("Invalid `srcBufferRange` or buffer has no `EUF_TRANSFER_SRC_BIT` usage flag, cannot `downloadBufferRangeViaStagingBuffer`!",system::ILogger::ELL_ERROR); diff --git a/smoke/CMakeLists.txt b/smoke/CMakeLists.txt index a44374714d..99c76a302c 100644 --- a/smoke/CMakeLists.txt +++ b/smoke/CMakeLists.txt @@ -18,6 +18,7 @@ add_compile_options( set(CMAKE_SYSTEM_VERSION 10.0) project(NablaSmoke CXX) +include(${CMAKE_CURRENT_LIST_DIR}/NablaSmokeTests.cmake) # default hint for our CI, normally it needs to be path to package's directory where all autogen config .cmake scripts are set(PACKAGE_CONFIG_SEARCH_PATHS ${CMAKE_CURRENT_LIST_DIR}/build-ct/install/cmake ${PACKAGE_CONFIG_SEARCH_PATH_HINTS}) @@ -28,34 +29,76 @@ find_package(Nabla REQUIRED CONFIG add_executable(smoke main.cpp pch.hpp cdb.ps1) target_link_libraries(smoke PRIVATE Nabla::Nabla) +target_compile_definitions(smoke PRIVATE _AFXDLL) target_precompile_headers(smoke PRIVATE pch.hpp) +set(NBL_SMOKE_FLOW "CONFIGURE_ONLY" CACHE STRING "Smoke runtime flow: MINIMALISTIC, CONFIGURE_ONLY or BUILD_ONLY") +set_property(CACHE NBL_SMOKE_FLOW PROPERTY STRINGS MINIMALISTIC CONFIGURE_ONLY BUILD_ONLY) +string(TOUPPER "${NBL_SMOKE_FLOW}" NBL_SMOKE_FLOW) +message(STATUS "Smoke runtime flow: ${NBL_SMOKE_FLOW}") +option(NBL_SMOKE_INSTALL_SELFTEST "Install smoke with CTest metadata and run tests from install tree" ON) + +if(NBL_SMOKE_FLOW STREQUAL "MINIMALISTIC") + message(STATUS "Smoke minimalistic flow uses only package default runtime lookup") +elseif(NBL_SMOKE_FLOW STREQUAL "CONFIGURE_ONLY") + nabla_setup_runtime_modules( + TARGETS smoke + RUNTIME_MODULES_SUBDIR "Libraries" + MODE CONFIGURE_TIME + INSTALL_RULES ON + ) +elseif(NBL_SMOKE_FLOW STREQUAL "BUILD_ONLY") + nabla_setup_runtime_modules( + TARGETS smoke + RUNTIME_MODULES_SUBDIR "Libraries" + MODE BUILD_TIME + INSTALL_RULES ON + ) +else() + message(FATAL_ERROR "Invalid NBL_SMOKE_FLOW='${NBL_SMOKE_FLOW}'") +endif() + set(CMAKE_CTEST_ARGUMENTS --verbose) enable_testing() -set(OPTS +set(NBL_SMOKE_TEST_ENVIRONMENT NBL_EXPLICIT_MODULE_LOAD_LOG=1 NBL_EXPLICIT_MODULE_REQUEST_LOG=1 - NBL_INSTALL_DIRECTORY=${Nabla_ROOT} ) option(ENABLE_CRASH_HANDLER "Enable crash handler" ON) -if(WIN32) - if(ENABLE_CRASH_HANDLER) - set(CMD - powershell -NoProfile -ExecutionPolicy Bypass - -File "$" - -Exe "$" - ) - endif() -endif() +nabla_smoke_add_install_load_api_test( + TEST_NAME NBL_INSTALL_LOAD_API + EXE_PATH "$" + CRASH_HANDLER_SCRIPT "$" + ENABLE_CRASH_HANDLER ${ENABLE_CRASH_HANDLER} + ENVIRONMENT "${NBL_SMOKE_TEST_ENVIRONMENT}" +) + +if(NBL_SMOKE_INSTALL_SELFTEST) + include(GNUInstallDirs) -if(NOT ENABLE_CRASH_HANDLER) - set(CMD - "$" + install(TARGETS smoke + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" + ) + install(FILES "${CMAKE_CURRENT_LIST_DIR}/cdb.ps1" + DESTINATION "${CMAKE_INSTALL_BINDIR}" + ) + + set(_nbl_smoke_install_cmake_dir "${CMAKE_INSTALL_DATADIR}/nabla-smoke") + install(FILES "${CMAKE_CURRENT_LIST_DIR}/NablaSmokeTests.cmake" + DESTINATION "${_nbl_smoke_install_cmake_dir}" ) -endif() -add_test(NAME NBL_INSTALL_LOAD_API COMMAND ${CMD}) -set_tests_properties(NBL_INSTALL_LOAD_API PROPERTIES ENVIRONMENT "${OPTS}") \ No newline at end of file + set(NBL_SMOKE_INSTALL_CMAKE_DIR "${_nbl_smoke_install_cmake_dir}") + configure_file( + "${CMAKE_CURRENT_LIST_DIR}/CTestTestfile.install.cmake.in" + "${CMAKE_CURRENT_BINARY_DIR}/CTestTestfile.install.cmake" + @ONLY + ) + install(FILES "${CMAKE_CURRENT_BINARY_DIR}/CTestTestfile.install.cmake" + DESTINATION "." + RENAME "CTestTestfile.cmake" + ) +endif() diff --git a/smoke/CTestTestfile.install.cmake.in b/smoke/CTestTestfile.install.cmake.in new file mode 100644 index 0000000000..c5059ce5d8 --- /dev/null +++ b/smoke/CTestTestfile.install.cmake.in @@ -0,0 +1,10 @@ +include("@NBL_SMOKE_INSTALL_CMAKE_DIR@/NablaSmokeTests.cmake") + +nabla_smoke_add_install_load_api_test( + TEST_NAME NBL_INSTALL_LOAD_API + EXE_PATH "@CMAKE_INSTALL_BINDIR@/smoke@CMAKE_EXECUTABLE_SUFFIX@" + CRASH_HANDLER_SCRIPT "@CMAKE_INSTALL_BINDIR@/cdb.ps1" + ENABLE_CRASH_HANDLER @ENABLE_CRASH_HANDLER@ + LEGACY_CTEST_MODE + ENVIRONMENT "@NBL_SMOKE_TEST_ENVIRONMENT@" +) diff --git a/smoke/NablaSmokeTests.cmake b/smoke/NablaSmokeTests.cmake new file mode 100644 index 0000000000..9e2b796ff7 --- /dev/null +++ b/smoke/NablaSmokeTests.cmake @@ -0,0 +1,39 @@ +function(nabla_smoke_add_install_load_api_test) + set(_nbl_smoke_options LEGACY_CTEST_MODE) + set(_nbl_smoke_one_value_args TEST_NAME EXE_PATH CRASH_HANDLER_SCRIPT ENABLE_CRASH_HANDLER) + set(_nbl_smoke_multi_value_args ENVIRONMENT) + cmake_parse_arguments(_NBL_SMOKE "${_nbl_smoke_options}" "${_nbl_smoke_one_value_args}" "${_nbl_smoke_multi_value_args}" ${ARGN}) + + if(NOT _NBL_SMOKE_TEST_NAME) + message(FATAL_ERROR "nabla_smoke_add_install_load_api_test requires TEST_NAME") + endif() + if(NOT _NBL_SMOKE_EXE_PATH) + message(FATAL_ERROR "nabla_smoke_add_install_load_api_test requires EXE_PATH") + endif() + + if(WIN32 AND _NBL_SMOKE_ENABLE_CRASH_HANDLER) + if(_NBL_SMOKE_LEGACY_CTEST_MODE) + add_test("${_NBL_SMOKE_TEST_NAME}" + powershell -NoProfile -ExecutionPolicy Bypass + -File "${_NBL_SMOKE_CRASH_HANDLER_SCRIPT}" + -Exe "${_NBL_SMOKE_EXE_PATH}" + ) + else() + add_test(NAME "${_NBL_SMOKE_TEST_NAME}" COMMAND + powershell -NoProfile -ExecutionPolicy Bypass + -File "${_NBL_SMOKE_CRASH_HANDLER_SCRIPT}" + -Exe "${_NBL_SMOKE_EXE_PATH}" + ) + endif() + else() + if(_NBL_SMOKE_LEGACY_CTEST_MODE) + add_test("${_NBL_SMOKE_TEST_NAME}" "${_NBL_SMOKE_EXE_PATH}") + else() + add_test(NAME "${_NBL_SMOKE_TEST_NAME}" COMMAND "${_NBL_SMOKE_EXE_PATH}") + endif() + endif() + + if(_NBL_SMOKE_ENVIRONMENT) + set_tests_properties("${_NBL_SMOKE_TEST_NAME}" PROPERTIES ENVIRONMENT "${_NBL_SMOKE_ENVIRONMENT}") + endif() +endfunction() diff --git a/smoke/RunSmokeFlow.cmake b/smoke/RunSmokeFlow.cmake new file mode 100644 index 0000000000..9350e6094e --- /dev/null +++ b/smoke/RunSmokeFlow.cmake @@ -0,0 +1,95 @@ +if(NOT DEFINED FLOW) + message(FATAL_ERROR "FLOW is required. Allowed values: MINIMALISTIC, CONFIGURE_ONLY, BUILD_ONLY") +endif() + +string(TOUPPER "${FLOW}" FLOW) +if(NOT FLOW MATCHES "^(MINIMALISTIC|CONFIGURE_ONLY|BUILD_ONLY)$") + message(FATAL_ERROR "Invalid FLOW='${FLOW}'. Allowed values: MINIMALISTIC, CONFIGURE_ONLY, BUILD_ONLY") +endif() + +if(NOT DEFINED CONFIG) + message(FATAL_ERROR "CONFIG is required (e.g. Debug, Release, RelWithDebInfo)") +endif() + +if(NOT DEFINED SMOKE_SOURCE_DIR) + set(SMOKE_SOURCE_DIR "smoke") +endif() + +if(NOT DEFINED BUILD_DIR) + set(BUILD_DIR "smoke/out") +endif() + +if(NOT DEFINED INSTALL_DIR) + set(INSTALL_DIR "${BUILD_DIR}/install") +endif() + +if(NOT DEFINED CTEST_BIN) + if(DEFINED CMAKE_CTEST_COMMAND) + set(CTEST_BIN "${CMAKE_CTEST_COMMAND}") + else() + find_program(CTEST_BIN ctest REQUIRED) + endif() +endif() + +function(run_cmd) + execute_process( + COMMAND ${ARGV} + COMMAND_ECHO STDOUT + RESULT_VARIABLE _rc + ) + if(NOT _rc EQUAL 0) + message(FATAL_ERROR "Command failed with exit code ${_rc}") + endif() +endfunction() + +file(REMOVE_RECURSE "${BUILD_DIR}") + +set(_run_install_selftest ON) +if(FLOW STREQUAL "MINIMALISTIC") + set(_run_install_selftest OFF) +endif() + +run_cmd( + "${CMAKE_COMMAND}" + -S "${SMOKE_SOURCE_DIR}" + -B "${BUILD_DIR}" + -D "NBL_SMOKE_FLOW=${FLOW}" + -D "NBL_SMOKE_INSTALL_SELFTEST=${_run_install_selftest}" +) + +run_cmd( + "${CMAKE_COMMAND}" + --build "${BUILD_DIR}" + --config "${CONFIG}" +) + +run_cmd( + "${CTEST_BIN}" + --verbose + --test-dir "${BUILD_DIR}" + --force-new-ctest-process + --output-on-failure + --no-tests=error + -C "${CONFIG}" +) + +if(_run_install_selftest) + file(REMOVE_RECURSE "${INSTALL_DIR}") + + run_cmd( + "${CMAKE_COMMAND}" + --install "${BUILD_DIR}" + --config "${CONFIG}" + --prefix "${INSTALL_DIR}" + ) + + run_cmd( + "${CTEST_BIN}" + --verbose + --test-dir "${INSTALL_DIR}" + --force-new-ctest-process + --output-on-failure + --no-tests=error + -C "${CONFIG}" + ) +endif() diff --git a/smoke/main.cpp b/smoke/main.cpp index 2b1863d404..9510081f21 100644 --- a/smoke/main.cpp +++ b/smoke/main.cpp @@ -1,3 +1,5 @@ +#include + #define ENABLE_SMOKE using namespace nbl; @@ -16,16 +18,6 @@ class Smoke final : public system::IApplicationFramework bool onAppInitialized(smart_refctd_ptr&& system) override { - const char* sdk = std::getenv("NBL_INSTALL_DIRECTORY"); - - if (sdk) - { - auto dir = std::filesystem::absolute(std::filesystem::path(sdk).make_preferred()).string(); - std::cout << "[INFO]: NBL_INSTALL_DIRECTORY = \"" << dir.c_str() << "\"\n"; - } - else - std::cerr << "[INFO]: NBL_INSTALL_DIRECTORY env was not defined!\n"; - if (isAPILoaded()) { std::cout << "[INFO]: Loaded Nabla API\n"; @@ -36,7 +28,25 @@ class Smoke final : public system::IApplicationFramework return false; } - exportGpuProfiles(); + if (!AfxWinInit(GetModuleHandle(nullptr), nullptr, GetCommandLineA(), 0)) + { + std::cerr << "[ERROR]: Could not init AFX, terminating!\n"; + return false; + } + + try { + createAfxDummyWindow(320, 240, nullptr, _T("Dummy 1")); + exportGpuProfiles(); + createAfxDummyWindow(320, 240, nullptr, _T("Dummy 2")); + } + catch (const std::exception& e) { + std::cerr << "[ERROR]: " << e.what() << '\n'; + return false; + } + catch (...) { + std::cerr << "[ERROR]: Unknown exception!\n"; + return false; + } return true; } @@ -44,40 +54,63 @@ class Smoke final : public system::IApplicationFramework void workLoopBody() override {} bool keepRunning() override { return false; } + bool onAppTerminated() override + { + AfxWinTerm(); + return true; + } + private: static void exportGpuProfiles() { - std::string arg2 = "-o"; - std::string buf; - std::string arg1; - std::string arg3; + std::string buf, arg1, arg2 = "-o", arg3; for (size_t i = 0;; i++) { - auto stringifiedIndex = std::to_string(i); - arg1 = "--json=" + stringifiedIndex; - arg3 = "device_" + stringifiedIndex + ".json"; - std::array args = { arg1.data(), arg2.data(), arg3.data() }; + auto six = std::to_string(i); + arg1 = "--json=" + six; + arg3 = "device_" + six + ".json"; + auto args = std::to_array({ arg1.data(), arg2.data(), arg3.data()}); int code = nbl::video::vulkaninfo(args); if (code != 0) break; - // print out file content std::ifstream input(arg3); while (std::getline(input, buf)) - { std::cout << buf << "\n"; - } std::cout << "\n\n"; } } + + static bool createAfxDummyWindow(int w, int h, HWND parent, LPCTSTR windowName) + { + CWnd wnd; + LPCTSTR cls = AfxRegisterWndClass(0, ::LoadCursor(nullptr, IDC_ARROW)); + if (!cls) return false; + + if (!wnd.CreateEx(0, cls, windowName, WS_POPUP | WS_VISIBLE, 0, 0, w, h, parent, nullptr)) + return false; + + MSG msg {}; + const ULONGLONG end = GetTickCount64() + 1000; + while (GetTickCount64() < end) { + while (PeekMessage(&msg, nullptr, 0, 0, PM_REMOVE)) { + TranslateMessage(&msg); + DispatchMessage(&msg); + } + Sleep(1); + } + + wnd.DestroyWindow(); + return true; + } }; NBL_MAIN_FUNC(Smoke) #else int main() { return 0; } -#endif \ No newline at end of file +#endif diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 53e45edd22..18a25c8619 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -788,6 +788,26 @@ start_tracking_variables_for_propagation_to_parent() add_subdirectory(ext EXCLUDE_FROM_ALL) propagate_changed_variables_to_parent_scope() +if(TARGET ${NBL_EXT_FULL_SCREEN_TRIANGLE_LIB}) + set_target_properties(${NBL_EXT_FULL_SCREEN_TRIANGLE_LIB} PROPERTIES EXCLUDE_FROM_ALL OFF) + nbl_install_lib_spec(${NBL_EXT_FULL_SCREEN_TRIANGLE_LIB} "nbl/ext/FULL_SCREEN_TRIANGLE") + target_link_libraries(Nabla INTERFACE + "$:$/debug/lib/nbl/ext/FULL_SCREEN_TRIANGLE/NblExtFULL_SCREEN_TRIANGLE_d.lib>>" + "$:$/lib/nbl/ext/FULL_SCREEN_TRIANGLE/NblExtFULL_SCREEN_TRIANGLE.lib>>" + "$:$/relwithdebinfo/lib/nbl/ext/FULL_SCREEN_TRIANGLE/NblExtFULL_SCREEN_TRIANGLE_rwdi.lib>>" + ) + + set(_NBL_EXT_FULL_SCREEN_TRIANGLE_BUILTINS_LIB "${NBL_EXT_FULL_SCREEN_TRIANGLE_LIB}_builtinsBuild") + if(NBL_EMBED_BUILTIN_RESOURCES AND TARGET ${_NBL_EXT_FULL_SCREEN_TRIANGLE_BUILTINS_LIB}) + nbl_install_lib_spec(${_NBL_EXT_FULL_SCREEN_TRIANGLE_BUILTINS_LIB} "nbl/ext/FULL_SCREEN_TRIANGLE") + target_link_libraries(Nabla INTERFACE + "$:$/debug/lib/nbl/ext/FULL_SCREEN_TRIANGLE/NblExtFULL_SCREEN_TRIANGLE_builtinsBuild_d.lib>>" + "$:$/lib/nbl/ext/FULL_SCREEN_TRIANGLE/NblExtFULL_SCREEN_TRIANGLE_builtinsBuild.lib>>" + "$:$/relwithdebinfo/lib/nbl/ext/FULL_SCREEN_TRIANGLE/NblExtFULL_SCREEN_TRIANGLE_builtinsBuild.lib>>" + ) + endif() +endif() + nbl_install_headers("${NABLA_HEADERS_PUBLIC}") set_target_properties(Nabla PROPERTIES DEBUG_POSTFIX _debug) set_target_properties(Nabla PROPERTIES RELWITHDEBINFO_POSTFIX _relwithdebinfo) @@ -805,6 +825,7 @@ target_include_directories(Nabla PUBLIC target_compile_definitions(Nabla INTERFACE "$" + "$" ) if(NBL_STATIC_BUILD) @@ -845,10 +866,33 @@ nbl_install_dir_spec(../../include/nbl/application_templates nbl) # note: order important, keep after install rules due to NBL_3RDPARTY_DXC_NS_PACKAGE_RUNTIME_DLL_DIR_PATH property get_property(_NBL_DXC_PACKAGE_RUNTIME_DLL_DIR_PATH_ GLOBAL PROPERTY NBL_3RDPARTY_DXC_NS_PACKAGE_RUNTIME_DLL_DIR_PATH) get_target_property(_NBL_NABLA_PACKAGE_RUNTIME_DLL_DIR_PATH_ Nabla NBL_PACKAGE_RUNTIME_DLL_DIR_PATH) +set(_NBL_CONSUMER_BIN_DIR_GE_ + "$,$>>" +) +set(_NBL_NABLA_RUNTIME_DLL_DIR_GE_ + "$>" +) +set(_NBL_DXC_RUNTIME_DLL_DIR_GE_ + "$,3rdparty,dxc>>" +) +set(_NBL_NABLA_RUNTIME_SAME_ROOT_AS_CONSUMER_GE_ + "$,$>" +) +set(_NBL_DXC_RUNTIME_SAME_ROOT_AS_CONSUMER_GE_ + "$,$>" +) +set(_NBL_NABLA_RUNTIME_DLL_DIR_PATH_REL_TO_CONSUMER_EXE_GE_ + "$,>" +) +set(_NBL_DXC_RUNTIME_DLL_DIR_PATH_REL_TO_CONSUMER_EXE_GE_ + "$,>" +) target_compile_definitions(Nabla INTERFACE NBL_CPACK_PACKAGE_NABLA_DLL_DIR_ABS_KEY="${_NBL_NABLA_PACKAGE_RUNTIME_DLL_DIR_PATH_}" INTERFACE NBL_CPACK_PACKAGE_DXC_DLL_DIR_ABS_KEY="${_NBL_DXC_PACKAGE_RUNTIME_DLL_DIR_PATH_}" + INTERFACE "$" + INTERFACE "$" ) NBL_ADJUST_FOLDERS(src) diff --git a/src/nbl/asset/ICPUPolygonGeometry.cpp b/src/nbl/asset/ICPUPolygonGeometry.cpp index 1acff42876..1af3c8c5d6 100644 --- a/src/nbl/asset/ICPUPolygonGeometry.cpp +++ b/src/nbl/asset/ICPUPolygonGeometry.cpp @@ -12,8 +12,8 @@ class CListIndexingCB final : public IPolygonGeometryBase::IIndexingCallback template static void operator_impl(SContext& ctx) { - auto indexOfIndex = ctx.beginPrimitive*3; - for (const auto end=ctx.endPrimitive*3; indexOfIndex!=end; indexOfIndex+=3) + auto indexOfIndex = ctx.beginPrimitive*Order; + for (const auto end=ctx.endPrimitive*Order; indexOfIndex!=end; indexOfIndex+=Order) ctx.streamOut(indexOfIndex,std::ranges::iota_view{0,int(Order)}); } @@ -70,7 +70,7 @@ class CTriangleStripIndexingCB final : public IPolygonGeometryBase::IIndexingCal indexOfIndex = ctx.beginPrimitive+2; const int32_t perm[] = {-1,-2,0}; for (const auto end=ctx.endPrimitive+2; indexOfIndex!=end; indexOfIndex++) - ctx.streamOut(indexOfIndex,perm); + ctx.streamOut>(indexOfIndex,perm); } public: @@ -106,7 +106,7 @@ class CTriangleFanIndexingCB final : public IPolygonGeometryBase::IIndexingCallb { // first index is always global 0 perm[0] = -indexOfIndex; - ctx.streamOut(indexOfIndex,perm); + ctx.streamOut>(indexOfIndex,perm); } } diff --git a/src/nbl/asset/interchange/IAssetLoader.cpp b/src/nbl/asset/interchange/IAssetLoader.cpp index 9a881b300b..4a9a8f0378 100644 --- a/src/nbl/asset/interchange/IAssetLoader.cpp +++ b/src/nbl/asset/interchange/IAssetLoader.cpp @@ -10,7 +10,7 @@ using namespace nbl::core; using namespace nbl::asset; // todo NEED DOCS -IAssetLoader::IAssetLoaderOverride::IAssetLoaderOverride(IAssetManager* _manager) : m_manager(_manager), m_system(m_manager->getSystem()) +IAssetLoader::IAssetLoaderOverride::IAssetLoaderOverride(SCreationParams&& params) : m_creationParams(std::move(params)) { } @@ -20,7 +20,7 @@ SAssetBundle IAssetLoader::IAssetLoaderOverride::findCachedAsset(const std::stri if ((levelFlag & ECF_DUPLICATE_TOP_LEVEL) == ECF_DUPLICATE_TOP_LEVEL) return {}; - auto found = m_manager->findAssets(inSearchKey, inAssetTypes); + auto found = getManager()->findAssets(inSearchKey, inAssetTypes); if (!found->size()) return handleSearchFail(inSearchKey, ctx, hierarchyLevel); return chooseRelevantFromFound(found->begin(), found->end(), ctx, hierarchyLevel); @@ -28,11 +28,11 @@ SAssetBundle IAssetLoader::IAssetLoaderOverride::findCachedAsset(const std::stri void IAssetLoader::IAssetLoaderOverride::insertAssetIntoCache(SAssetBundle& asset, const std::string& supposedKey, const SAssetLoadParams& _params, const uint32_t hierarchyLevel) { - m_manager->changeAssetKey(asset, supposedKey); + getManager()->changeAssetKey(asset, supposedKey); auto levelFlag = _params.cacheFlags >> (uint64_t(hierarchyLevel) * 2ull); if (!(levelFlag&ECF_DONT_CACHE_TOP_LEVEL)) - m_manager->insertAssetIntoCache(asset,ASSET_MUTABILITY_ON_CACHE_INSERT); + getManager()->insertAssetIntoCache(asset,ASSET_MUTABILITY_ON_CACHE_INSERT); } SAssetBundle IAssetLoader::interm_getAssetInHierarchy(system::IFile* _file, const std::string& _supposedFilename, const IAssetLoader::SAssetLoadParams& _params, uint32_t _hierarchyLevel, IAssetLoader::IAssetLoaderOverride* _override) diff --git a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp index b4f2f2ef06..818751052b 100644 --- a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp +++ b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp @@ -20,257 +20,163 @@ namespace nbl::asset { -core::smart_refctd_ptr CPolygonGeometryManipulator::createUnweldedList(const ICPUPolygonGeometry* inGeo) +core::smart_refctd_ptr CPolygonGeometryManipulator::createUnweldedList(const ICPUPolygonGeometry* inGeo, const bool reverse, const bool recomputeHash) { - const auto* indexing = inGeo->getIndexingCallback(); - if (!indexing) - return nullptr; + const auto* indexing = inGeo->getIndexingCallback(); + if (!indexing) + return nullptr; - const auto indexView = inGeo->getIndexView(); - const auto primCount = inGeo->getPrimitiveCount(); - const uint8_t degree = indexing->degree(); - const auto outIndexCount = primCount*degree; - if (outIndexCountgetIndexView(); + const auto primCount = inGeo->getPrimitiveCount(); + const uint8_t degree = indexing->degree(); + const auto outIndexCount = primCount*degree; + if (outIndexCount(inGeo->clone(0u)); + const auto outGeometry = core::move_and_static_cast(inGeo->clone(0u)); - auto* outGeo = outGeometry.get(); - outGeo->setIndexing(IPolygonGeometryBase::NGonList(degree)); + auto* outGeo = outGeometry.get(); + outGeo->setIndexing(IPolygonGeometryBase::NGonList(degree)); - auto createOutView = [&](const ICPUPolygonGeometry::SDataView& inView) -> ICPUPolygonGeometry::SDataView - { - if (!inView) - return {}; - auto buffer = ICPUBuffer::create({ outIndexCount*inView.composed.stride , inView.src.buffer->getUsageFlags() }); - return { - .composed = inView.composed, - .src = {.offset = 0, .size = buffer->getSize(), .buffer = std::move(buffer)} - }; + auto createOutView = [&](const ICPUPolygonGeometry::SDataView& inView) -> ICPUPolygonGeometry::SDataView + { + if (!inView) + return {}; + auto buffer = ICPUBuffer::create({ outIndexCount*inView.composed.stride , inView.src.buffer->getUsageFlags() }); + return { + .composed = inView.composed, + .src = {.offset = 0, .size = buffer->getSize(), .buffer = std::move(buffer)} }; + }; - const auto inIndexView = inGeo->getIndexView(); - auto outIndexView = createOutView(inIndexView); - auto indexBuffer = outIndexView.src.buffer; - const auto indexSize = inIndexView.composed.stride; - std::byte* outIndices = reinterpret_cast(outIndexView.getPointer()); - outGeo->setIndexView({}); - - const auto inVertexView = inGeo->getPositionView(); - auto outVertexView = createOutView(inVertexView); - auto vertexBuffer = outVertexView.src.buffer; - const auto vertexSize = inVertexView.composed.stride; - const std::byte* inVertices = reinterpret_cast(inVertexView.getPointer()); - std::byte* const outVertices = reinterpret_cast(vertexBuffer->getPointer()); - outGeo->setPositionView(std::move(outVertexView)); - - const auto inNormalView = inGeo->getNormalView(); - const std::byte* const inNormals = reinterpret_cast(inNormalView.getPointer()); - auto outNormalView = createOutView(inNormalView); - auto outNormalBuffer = outNormalView.src.buffer; - outGeo->setNormalView(std::move(outNormalView)); - - outGeometry->getJointWeightViews()->resize(inGeo->getJointWeightViews().size()); - for (uint64_t jointView_i = 0u; jointView_i < inGeo->getJointWeightViews().size(); jointView_i++) - { - auto& inJointWeightView = inGeo->getJointWeightViews()[jointView_i]; - auto& outJointWeightView = outGeometry->getJointWeightViews()->operator[](jointView_i); - outJointWeightView.indices = createOutView(inJointWeightView.indices); - outJointWeightView.weights = createOutView(inJointWeightView.weights); - } + const auto inIndexView = inGeo->getIndexView(); + auto outIndexView = createOutView(inIndexView); + auto indexBuffer = outIndexView.src.buffer; + const auto indexSize = inIndexView.composed.stride; + std::byte* outIndices = reinterpret_cast(outIndexView.getPointer()); + outGeo->setIndexView({}); + + const auto inVertexView = inGeo->getPositionView(); + auto outVertexView = createOutView(inVertexView); + auto vertexBuffer = outVertexView.src.buffer; + const auto vertexSize = inVertexView.composed.stride; + const std::byte* inVertices = reinterpret_cast(inVertexView.getPointer()); + std::byte* const outVertices = reinterpret_cast(vertexBuffer->getPointer()); + outGeo->setPositionView(std::move(outVertexView)); + + const auto inNormalView = inGeo->getNormalView(); + const std::byte* const inNormals = reinterpret_cast(inNormalView.getPointer()); + auto outNormalView = createOutView(inNormalView); + auto outNormalBuffer = outNormalView.src.buffer; + outGeo->setNormalView(std::move(outNormalView)); + + outGeometry->getJointWeightViews()->resize(inGeo->getJointWeightViews().size()); + for (uint64_t jointView_i = 0u; jointView_i < inGeo->getJointWeightViews().size(); jointView_i++) + { + auto& inJointWeightView = inGeo->getJointWeightViews()[jointView_i]; + auto& outJointWeightView = outGeometry->getJointWeightViews()->operator[](jointView_i); + outJointWeightView.indices = createOutView(inJointWeightView.indices); + outJointWeightView.weights = createOutView(inJointWeightView.weights); + } - outGeometry->getAuxAttributeViews()->resize(inGeo->getAuxAttributeViews().size()); - for (uint64_t auxView_i = 0u; auxView_i < inGeo->getAuxAttributeViews().size(); auxView_i++) - outGeo->getAuxAttributeViews()->operator[](auxView_i) = createOutView(inGeo->getAuxAttributeViews()[auxView_i]); + outGeometry->getAuxAttributeViews()->resize(inGeo->getAuxAttributeViews().size()); + for (uint64_t auxView_i = 0u; auxView_i < inGeo->getAuxAttributeViews().size(); auxView_i++) + outGeo->getAuxAttributeViews()->operator[](auxView_i) = createOutView(inGeo->getAuxAttributeViews()[auxView_i]); - std::array indices; - for (uint64_t prim_i = 0u; prim_i < primCount; prim_i++) + std::array indices; + for (uint64_t prim_i = 0u; prim_i < primCount; prim_i++) + { + IPolygonGeometryBase::IIndexingCallback::SContext context{ + .indexBuffer = indexView.getPointer(), + .indexSize = indexView.composed.stride, + .beginPrimitive = prim_i, + .endPrimitive = prim_i + 1, + .reversePrims = reverse, + .out = indices.data() + }; + indexing->operator()(context); + for (uint8_t primIndex_i=0; primIndex_i context{ - .indexBuffer = indexView.getPointer(), - .indexSize = indexView.composed.stride, - .beginPrimitive = prim_i, - .endPrimitive = prim_i + 1, - .out = indices.data() - }; - indexing->operator()(context); - for (uint8_t primIndex_i=0; primIndex_i(outNormalBuffer->getPointer()); - const auto normalSize = inNormalView.composed.stride; - memcpy(outNormals + outIndex * normalSize, inNormals + inIndex * normalSize, normalSize); - } - - for (uint64_t jointView_i = 0u; jointView_i < inGeo->getJointWeightViews().size(); jointView_i++) - { - auto& inView = inGeo->getJointWeightViews()[jointView_i]; - auto& outView = outGeometry->getJointWeightViews()->operator[](jointView_i); - - const std::byte* const inJointIndices = reinterpret_cast(inView.indices.getPointer()); - const auto jointIndexSize = inView.indices.composed.stride; - std::byte* const outJointIndices = reinterpret_cast(outView.indices.getPointer()); - memcpy(outJointIndices + outIndex * jointIndexSize, inJointIndices + inIndex * jointIndexSize, jointIndexSize); - - const std::byte* const inWeights = reinterpret_cast(inView.weights.getPointer()); - const auto jointWeightSize = inView.weights.composed.stride; - std::byte* const outWeights = reinterpret_cast(outView.weights.getPointer()); - memcpy(outWeights + outIndex * jointWeightSize, outWeights + inIndex * jointWeightSize, jointWeightSize); - } - - for (uint64_t auxView_i = 0u; auxView_i < inGeo->getAuxAttributeViews().size(); auxView_i++) - { - auto& inView = inGeo->getAuxAttributeViews()[auxView_i]; - auto& outView = outGeometry->getAuxAttributeViews()->operator[](auxView_i); - const auto attrSize = inView.composed.stride; - const std::byte* const inAuxs = reinterpret_cast(inView.getPointer()); - std::byte* const outAuxs = reinterpret_cast(outView.getPointer()); - memcpy(outAuxs + outIndex * attrSize, inAuxs + inIndex * attrSize, attrSize); - } - } + const auto outIndex = prim_i * degree + primIndex_i; + const auto inIndex = indices[primIndex_i]; + // TODO: these memcpys from view to view could really be DRY-ed and lambdified + memcpy(outIndices + outIndex * indexSize, &outIndex, indexSize); + memcpy(outVertices + outIndex * vertexSize, inVertices + inIndex * vertexSize, vertexSize); + if (inNormalView) + { + std::byte* const outNormals = reinterpret_cast(outNormalBuffer->getPointer()); + const auto normalSize = inNormalView.composed.stride; + memcpy(outNormals + outIndex * normalSize, inNormals + inIndex * normalSize, normalSize); + } + + for (uint64_t jointView_i = 0u; jointView_i < inGeo->getJointWeightViews().size(); jointView_i++) + { + auto& inView = inGeo->getJointWeightViews()[jointView_i]; + auto& outView = outGeometry->getJointWeightViews()->operator[](jointView_i); + + const std::byte* const inJointIndices = reinterpret_cast(inView.indices.getPointer()); + const auto jointIndexSize = inView.indices.composed.stride; + std::byte* const outJointIndices = reinterpret_cast(outView.indices.getPointer()); + memcpy(outJointIndices + outIndex * jointIndexSize, inJointIndices + inIndex * jointIndexSize, jointIndexSize); + + const std::byte* const inWeights = reinterpret_cast(inView.weights.getPointer()); + const auto jointWeightSize = inView.weights.composed.stride; + std::byte* const outWeights = reinterpret_cast(outView.weights.getPointer()); + memcpy(outWeights + outIndex * jointWeightSize, outWeights + inIndex * jointWeightSize, jointWeightSize); + } + + for (uint64_t auxView_i = 0u; auxView_i < inGeo->getAuxAttributeViews().size(); auxView_i++) + { + auto& inView = inGeo->getAuxAttributeViews()[auxView_i]; + auto& outView = outGeometry->getAuxAttributeViews()->operator[](auxView_i); + const auto attrSize = inView.composed.stride; + const std::byte* const inAuxs = reinterpret_cast(inView.getPointer()); + std::byte* const outAuxs = reinterpret_cast(outView.getPointer()); + memcpy(outAuxs + outIndex * attrSize, inAuxs + inIndex * attrSize, attrSize); + } } + } + + if (recomputeHash) + recomputeContentHashes(outGeo); - recomputeContentHashes(outGeo); - return outGeometry; + return outGeometry; } -core::smart_refctd_ptr CPolygonGeometryManipulator::createSmoothVertexNormal(const ICPUPolygonGeometry* inPolygon, bool enableWelding, float epsilon, SSNGVxCmpFunction vxcmp) +core::smart_refctd_ptr CPolygonGeometryManipulator::createSmoothVertexNormal(const ICPUPolygonGeometry* inPolygon, bool enableWelding, float epsilon, SSNGVxCmpFunction vxcmp, const bool recomputeHash) { - if (!inPolygon) - { - _NBL_DEBUG_BREAK_IF(true); - return nullptr; - } - - // Mesh need to be unwelded (TODO: why? the output only need to be unwelded, really should be checking `inPolygon->getIndexingCallback()->count()!=3`) - if (inPolygon->getIndexView() && inPolygon->getIndexingCallback()!=IPolygonGeometryBase::TriangleList()) - { + if (!inPolygon) + { _NBL_DEBUG_BREAK_IF(true); return nullptr; - } + } - auto result = CSmoothNormalGenerator::calculateNormals(inPolygon, epsilon, vxcmp); - if (enableWelding) - { - auto weldPredicate = CVertexWelder::DefaultWeldPredicate(epsilon); - return CVertexWelder::weldVertices(result.geom.get(), result.vertexHashGrid, weldPredicate); - } - return result.geom; -} + if (!inPolygon->getIndexingCallback() && inPolygon->getIndexingCallback()->degree()!=3) + { + _NBL_DEBUG_BREAK_IF(true); + return nullptr; + } -#if 0 -//! Flips the direction of surfaces. Changes backfacing triangles to frontfacing -//! triangles and vice versa. -//! \param mesh: Mesh on which the operation is performed. -void IMeshManipulator::flipSurfaces(ICPUMeshBuffer* inbuffer) -{ - if (!inbuffer) - return; - auto* pipeline = inbuffer->getPipeline(); - const E_PRIMITIVE_TOPOLOGY primType = pipeline->getCachedCreationParams().primitiveAssembly.primitiveType; + // right now we can't handle this, see TODOs in CSmoothNormalGenerator + if (inPolygon->getIndexView()) + { + _NBL_DEBUG_BREAK_IF(true); + return nullptr; + } - const uint32_t idxcnt = inbuffer->getIndexCount(); - if (!inbuffer->getIndices()) - return; + auto result = CSmoothNormalGenerator::calculateNormals(inPolygon, epsilon, vxcmp, false); + if (enableWelding) + { + auto weldPredicate = CVertexWelder::DefaultWeldPredicate(epsilon); + result.geom = CVertexWelder::weldVertices(result.geom.get(), result.vertexHashGrid, weldPredicate, false); + } - if (inbuffer->getIndexType() == EIT_16BIT) - { - uint16_t* idx = reinterpret_cast(inbuffer->getIndices()); - switch (primType) - { - case EPT_TRIANGLE_FAN: - for (uint32_t i = 1; i < idxcnt; i += 2) - { - const uint16_t tmp = idx[i]; - idx[i] = idx[i + 1]; - idx[i + 1] = tmp; - } - break; - case EPT_TRIANGLE_STRIP: - if (idxcnt % 2) //odd - { - for (uint32_t i = 0; i < (idxcnt >> 1); i++) - { - const uint16_t tmp = idx[i]; - idx[i] = idx[idxcnt - 1 - i]; - idx[idxcnt - 1 - i] = tmp; - } - } - else //even - { - auto newIndexBuffer = ICPUBuffer::create({ (idxcnt + 1u) * sizeof(uint16_t) }); - auto* destPtr = reinterpret_cast(newIndexBuffer->getPointer()); - destPtr[0] = idx[0]; - memcpy(destPtr + 1u, idx, sizeof(uint16_t) * idxcnt); - inbuffer->setIndexCount(idxcnt + 1u); - SBufferBinding ixBufBinding{ 0u, std::move(newIndexBuffer) }; - inbuffer->setIndexBufferBinding(std::move(ixBufBinding)); - } - break; - case EPT_TRIANGLE_LIST: - for (uint32_t i = 0; i < idxcnt; i += 3) - { - const uint16_t tmp = idx[i + 1]; - idx[i + 1] = idx[i + 2]; - idx[i + 2] = tmp; - } - break; - default: break; - } - } - else if (inbuffer->getIndexType() == EIT_32BIT) - { - uint32_t* idx = reinterpret_cast(inbuffer->getIndices()); - switch (primType) - { - case EPT_TRIANGLE_FAN: - for (uint32_t i = 1; i < idxcnt; i += 2) - { - const uint32_t tmp = idx[i]; - idx[i] = idx[i + 1]; - idx[i + 1] = tmp; - } - break; - case EPT_TRIANGLE_STRIP: - if (idxcnt % 2) //odd - { - for (uint32_t i = 0; i < (idxcnt >> 1); i++) - { - const uint32_t tmp = idx[i]; - idx[i] = idx[idxcnt - 1 - i]; - idx[idxcnt - 1 - i] = tmp; - } - } - else //even - { - auto newIndexBuffer = ICPUBuffer::create({ (idxcnt + 1u) * sizeof(uint32_t) }); - auto* destPtr = reinterpret_cast(newIndexBuffer->getPointer()); - destPtr[0] = idx[0]; - memcpy(destPtr + 1u, idx, sizeof(uint32_t) * idxcnt); - inbuffer->setIndexCount(idxcnt + 1); - SBufferBinding ixBufBinding{ 0u, std::move(newIndexBuffer) }; - inbuffer->setIndexBufferBinding(std::move(ixBufBinding)); - } - break; - case EPT_TRIANGLE_LIST: - for (uint32_t i = 0; i < idxcnt; i += 3) - { - const uint32_t tmp = idx[i + 1]; - idx[i + 1] = idx[i + 2]; - idx[i + 2] = tmp; - } - break; - default: break; - } - } + if (recomputeHash) + recomputeContentHashes(result.geom.get()); + return result.geom; } +#if 0 core::smart_refctd_ptr CMeshManipulator::createMeshBufferFetchOptimized(const ICPUMeshBuffer* _inbuffer) { if (!_inbuffer) @@ -401,128 +307,6 @@ core::smart_refctd_ptr CMeshManipulator::createMeshBufferFetchOp return outbuffer; } -//! Creates a copy of the mesh, which will only consist of unique primitives -core::smart_refctd_ptr IMeshManipulator::createMeshBufferUniquePrimitives(ICPUMeshBuffer* inbuffer, bool _makeIndexBuf) -{ - if (!inbuffer) - return nullptr; - const ICPURenderpassIndependentPipeline* oldPipeline = inbuffer->getPipeline(); - if (!oldPipeline) - return nullptr; - - const uint32_t idxCnt = inbuffer->getIndexCount(); - if (idxCnt<2u || !inbuffer->getIndices()) - return core::smart_refctd_ptr(inbuffer); // yes we want an extra grab - - const auto& oldVtxParams = oldPipeline->getCachedCreationParams().vertexInput; - - auto clone = core::move_and_static_cast(inbuffer->clone(0u)); - - constexpr uint32_t NEW_VTX_BUF_BINDING = 0u; - - auto pipeline = core::smart_refctd_ptr_static_cast(oldPipeline->clone(0u)); - auto& vtxParams = pipeline->getCachedCreationParams().vertexInput; - vtxParams = SVertexInputParams(); - - vtxParams.enabledBindingFlags = (1u<getAttribBoundBuffer(i); - if (inbuffer->isAttributeEnabled(i) && vbuf.buffer) - { - offset[i] = stride; - newAttribSizes[i] = getTexelOrBlockBytesize(inbuffer->getAttribFormat(i)); - stride += newAttribSizes[i]; - if (stride>=0xdeadbeefu) - return nullptr; - - sourceBuffers[i] = reinterpret_cast(vbuf.buffer->getPointer()); - sourceBuffers[i] += inbuffer->getAttribCombinedOffset(i); - sourceBufferStrides[i] = inbuffer->getAttribStride(i); - } - else - offset[i] = -1; - } - - vtxParams.bindings[NEW_VTX_BUF_BINDING].inputRate = SVertexInputBindingParams::EVIR_PER_VERTEX; - vtxParams.bindings[NEW_VTX_BUF_BINDING].stride = stride; - - auto vertexBuffer = ICPUBuffer::create({ stride*idxCnt }); - clone->setVertexBufferBinding({0u, vertexBuffer}, 0u); - for (size_t i=0; i= 0) - { - vtxParams.attributes[i].binding = NEW_VTX_BUF_BINDING; - vtxParams.attributes[i].format = inbuffer->getAttribFormat(i); - vtxParams.attributes[i].relativeOffset = offset[i]; - } - } - - uint8_t* destPointer = reinterpret_cast(vertexBuffer->getPointer()); - if (inbuffer->getIndexType()==EIT_16BIT) - { - uint16_t* idx = reinterpret_cast(inbuffer->getIndices()); - for (uint64_t i=0; igetBaseVertex())*sourceBufferStrides[j],newAttribSizes[j]); - destPointer += newAttribSizes[j]; - } - } - else if (inbuffer->getIndexType()==EIT_32BIT) - { - uint32_t* idx = reinterpret_cast(inbuffer->getIndices()); - for (uint64_t i=0; igetBaseVertex())*sourceBufferStrides[j],newAttribSizes[j]); - destPointer += newAttribSizes[j]; - } - } - - clone->setPipeline(std::move(pipeline)); - - if (_makeIndexBuf) - { - auto idxbuf = ICPUBuffer::create({ idxCnt*(idxCnt<0x10000 ? 2u : 4u) }); - if (idxCnt<0x10000u) - { - for (uint32_t i = 0u; i < idxCnt; ++i) - reinterpret_cast(idxbuf->getPointer())[i] = i; - clone->setIndexType(EIT_16BIT); - } - else - { - for (uint32_t i = 0u; i < idxCnt; ++i) - reinterpret_cast(idxbuf->getPointer())[i] = i; - clone->setIndexType(EIT_32BIT); - } - clone->setIndexBufferBinding({ 0u, std::move(idxbuf) }); - } - else - { - clone->setIndexType(EIT_UNKNOWN); - } - } - - return clone; -} core::smart_refctd_ptr IMeshManipulator::createOptimizedMeshBuffer(const ICPUMeshBuffer* _inbuffer, const SErrorMetric* _errMetric) { @@ -1358,78 +1142,5 @@ bool CMeshManipulator::calcMaxQuantizationError(const SAttribTypeChoice& _srcTyp return true; } - -core::smart_refctd_ptr IMeshManipulator::idxBufferFromLineStripsToLines(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType) -{ - if (_inIndexType == EIT_16BIT) - { - if (_outIndexType == EIT_16BIT) - return CMeshManipulator::lineStripsToLines(_input, _idxCount); - else - return CMeshManipulator::lineStripsToLines(_input, _idxCount); - } - else if (_inIndexType == EIT_32BIT) - { - if (_outIndexType == EIT_16BIT) - return CMeshManipulator::lineStripsToLines(_input, _idxCount); - else - return CMeshManipulator::lineStripsToLines(_input, _idxCount); - } - return nullptr; -} - -core::smart_refctd_ptr IMeshManipulator::idxBufferFromTriangleStripsToTriangles(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType) -{ - if (_inIndexType == EIT_16BIT) - { - if (_outIndexType == EIT_16BIT) - return CMeshManipulator::triangleStripsToTriangles(_input, _idxCount); - else - return CMeshManipulator::triangleStripsToTriangles(_input, _idxCount); - } - else if (_inIndexType == EIT_32BIT) - { - if (_outIndexType == EIT_16BIT) - return CMeshManipulator::triangleStripsToTriangles(_input, _idxCount); - else - return CMeshManipulator::triangleStripsToTriangles(_input, _idxCount); - } - return nullptr; -} - -core::smart_refctd_ptr IMeshManipulator::idxBufferFromTrianglesFanToTriangles(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType) -{ - if (_inIndexType == EIT_16BIT) - { - if (_outIndexType == EIT_16BIT) - return CMeshManipulator::trianglesFanToTriangles(_input, _idxCount); - else - return CMeshManipulator::trianglesFanToTriangles(_input, _idxCount); - } - else if (_inIndexType == EIT_32BIT) - { - if (_outIndexType == EIT_16BIT) - return CMeshManipulator::trianglesFanToTriangles(_input, _idxCount); - else - return CMeshManipulator::trianglesFanToTriangles(_input, _idxCount); - } - return nullptr; -} - -float IMeshManipulator::DistanceToLine(core::vectorSIMDf P0, core::vectorSIMDf P1, core::vectorSIMDf InPoint) -{ - core::vectorSIMDf PointToStart = InPoint - P0; - core::vectorSIMDf Diff = core::cross(P0 - P1, PointToStart); - - return core::dot(Diff, Diff).x; -} - -float IMeshManipulator::DistanceToPlane(core::vectorSIMDf InPoint, core::vectorSIMDf PlanePoint, core::vectorSIMDf PlaneNormal) -{ - core::vectorSIMDf PointToPlane = InPoint - PlanePoint; - - return (core::dot(PointToPlane, PlaneNormal).x >= 0) ? core::abs(core::dot(PointToPlane, PlaneNormal).x) : 0; -} - #endif } // end namespace nbl::asset diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp index f8bc45a317..a1884bd191 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp @@ -29,13 +29,18 @@ static bool compareVertexPosition(const hlsl::float32_t3& a, const hlsl::float32 return (difference.x <= epsilon && difference.y <= epsilon && difference.z <= epsilon); } -CSmoothNormalGenerator::Result CSmoothNormalGenerator::calculateNormals(const asset::ICPUPolygonGeometry* polygon, float epsilon, VxCmpFunction vxcmp) +CSmoothNormalGenerator::Result CSmoothNormalGenerator::calculateNormals(const asset::ICPUPolygonGeometry* polygon, float epsilon, VxCmpFunction vxcmp, const bool recomputeHash) { assert(polygon->getIndexingCallback()->degree() == 3); + static constexpr auto MinEpsilon = 0.00001f; const auto patchedEpsilon = epsilon < MinEpsilon ? MinEpsilon : epsilon; VertexHashMap vertexHashMap = setupData(polygon, patchedEpsilon); + const auto smoothPolygon = processConnectedVertices(polygon, vertexHashMap, patchedEpsilon,vxcmp); + if (recomputeHash) + CPolygonGeometryManipulator::recomputeContentHashes(smoothPolygon.get()); + return { vertexHashMap, smoothPolygon }; } @@ -51,18 +56,25 @@ CSmoothNormalGenerator::VertexHashMap CSmoothNormalGenerator::setupData(const as { //calculate face normal of parent triangle hlsl::float32_t3 v0, v1, v2; + // TODO: could iterate over an index buffer properly polygon->getPositionView().decodeElement(i, v0); polygon->getPositionView().decodeElement(i + 1, v1); polygon->getPositionView().decodeElement(i + 2, v2); - const auto faceNormal = normalize(cross(v1 - v0, v2 - v0)); + auto faceNormal = cross(v1 - v0, v2 - v0); + // if any triangle edge is 0 length, the cross product will be 0 length too + const float normLen2 = dot(faceNormal,faceNormal); + // need to filter invalid triangles while we're at it + if (normLen2::min) + continue; + faceNormal *= hlsl::rsqrt(normLen2); //set data for m_vertices - const auto angleWages = hlsl::shapes::util::anglesFromTriangleEdges(v2 - v1, v0 - v2, v1 - v2); + const auto angleWeights = hlsl::shapes::util::anglesFromTriangleEdges(v2 - v1, v0 - v2, v1 - v2); - vertices.add({ i, 0, faceNormal * angleWages.x, v0}); - vertices.add({ i + 1, 0, faceNormal * angleWages.y,v1}); - vertices.add({ i + 2, 0, faceNormal * angleWages.z, v2}); + vertices.add({ i, 0, faceNormal * angleWeights.x, v0}); + vertices.add({ i + 1, 0, faceNormal * angleWeights.y,v1}); + vertices.add({ i + 2, 0, faceNormal * angleWeights.z, v2}); } vertices.bake(); @@ -70,14 +82,16 @@ CSmoothNormalGenerator::VertexHashMap CSmoothNormalGenerator::setupData(const as return vertices; } -core::smart_refctd_ptr CSmoothNormalGenerator::processConnectedVertices(const asset::ICPUPolygonGeometry* polygon, VertexHashMap& vertexHashMap, float epsilon, VxCmpFunction vxcmp) +core::smart_refctd_ptr CSmoothNormalGenerator::processConnectedVertices(const asset::ICPUPolygonGeometry* polygon, VertexHashMap& vertexHashMap, const float epsilon, VxCmpFunction vxcmp) { + // TODO: its semi doable to defer unwelding/rewelding until later an just work on a duplicated normal buffer only auto outPolygon = core::move_and_static_cast(polygon->clone(0u)); static constexpr auto NormalFormat = EF_R32G32B32_SFLOAT; const auto normalFormatBytesize = asset::getTexelOrBlockBytesize(NormalFormat); auto normalBuf = ICPUBuffer::create({ normalFormatBytesize * outPolygon->getPositionView().getElementCount()}); auto normalView = polygon->getNormalView(); + // TODO: compute actual range hlsl::shapes::AABB<4,hlsl::float32_t> aabb; aabb.maxVx = hlsl::float32_t4(1, 1, 1, 0.f); aabb.minVx = -aabb.maxVx; @@ -118,8 +132,6 @@ core::smart_refctd_ptr CSmoothNormalGenerator::processConne memcpy(normalPtr + (normalStride * processedVertex.index), &normal, sizeof(normal)); } - CPolygonGeometryManipulator::recomputeContentHashes(outPolygon.get()); - return outPolygon; } diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.h b/src/nbl/asset/utils/CSmoothNormalGenerator.h index 6ac4daf6c4..7c9bf5358f 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.h +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.h @@ -11,18 +11,20 @@ namespace nbl::asset { // TODO: implement a class template that take position type(either float32_t3 or float64_t3 as template argument -class CSmoothNormalGenerator +class CSmoothNormalGenerator final { public: CSmoothNormalGenerator() = delete; ~CSmoothNormalGenerator() = delete; - struct VertexData - { - uint32_t index; //offset of the vertex into index buffer + struct VertexData + { + //offset of the vertex into index buffer + uint32_t index; uint32_t hash; - hlsl::float32_t3 weightedNormal; - hlsl::float32_t3 position; //position of the vertex in 3D space + hlsl::float32_t3 weightedNormal; + //position of the vertex in 3D space + hlsl::float32_t3 position; hlsl::float32_t3 getPosition() const { @@ -39,21 +41,20 @@ class CSmoothNormalGenerator return hash; }; - }; + }; using VxCmpFunction = std::function; using VertexHashMap = CVertexHashGrid; - struct Result - { + struct Result + { VertexHashMap vertexHashGrid; core::smart_refctd_ptr geom; - }; - static Result calculateNormals(const ICPUPolygonGeometry* polygon, float epsilon, VxCmpFunction function); + }; + static Result calculateNormals(const ICPUPolygonGeometry* polygon, float epsilon, VxCmpFunction function, const bool recomputeHash=true); private: - static VertexHashMap setupData(const ICPUPolygonGeometry* polygon, float epsilon); static core::smart_refctd_ptr processConnectedVertices(const ICPUPolygonGeometry* polygon, VertexHashMap& vertices, float epsilon, VxCmpFunction vxcmp); }; diff --git a/src/nbl/asset/utils/IShaderCompiler.cpp b/src/nbl/asset/utils/IShaderCompiler.cpp index a6cd95b441..3f5a3bab17 100644 --- a/src/nbl/asset/utils/IShaderCompiler.cpp +++ b/src/nbl/asset/utils/IShaderCompiler.cpp @@ -3,7 +3,6 @@ // For conditions of distribution and use, see copyright notice in nabla.h #include "nbl/asset/utils/IShaderCompiler.h" #include "nbl/asset/utils/shadercUtils.h" -#include "nbl/asset/utils/shaderCompiler_serialization.h" #include #include @@ -13,10 +12,258 @@ #include #include +#include "nlohmann/json.hpp" + +using json = nlohmann::json; +using SEntry = nbl::asset::IShaderCompiler::CCache::SEntry; using namespace nbl; using namespace nbl::asset; +// -> serialization +// SMacroData, simple container used in SPreprocessorArgs +namespace nbl::system::json { + template<> + struct adl_serializer + { + using value_t = IShaderCompiler::SMacroDefinition; + + static inline void to_json(::json& j, const value_t& p) + { + j = ::json{ + { "identifier", p.identifier }, + { "definition", p.definition }, + }; + } + + static inline void from_json(const ::json& j, value_t& p) + { + j.at("identifier").get_to(p.identifier); + j.at("definition").get_to(p.definition); + } + }; +} +NBL_JSON_IMPL_BIND_ADL_SERIALIZER(::nbl::system::json::adl_serializer) + +// SPreprocessorData, holds serialized info for Preprocessor options used during compilation +namespace nbl::system::json { + template<> + struct adl_serializer + { + using value_t = SEntry::SPreprocessorArgs; + + static inline void to_json(::json& j, const value_t& p) + { + j = ::json{ + { "sourceIdentifier", p.sourceIdentifier }, + { "extraDefines", p.extraDefines}, + }; + } + + static inline void from_json(const ::json& j, value_t& p) + { + j.at("sourceIdentifier").get_to(p.sourceIdentifier); + j.at("extraDefines").get_to(p.extraDefines); + } + }; +} +NBL_JSON_IMPL_BIND_ADL_SERIALIZER(::nbl::system::json::adl_serializer) + +// Optimizer pass has its own method for easier vector serialization +namespace nbl::system::json { + template<> + struct adl_serializer + { + using value_t = ISPIRVOptimizer::E_OPTIMIZER_PASS; + + static inline void to_json(::json& j, const value_t& p) + { + uint32_t value = static_cast(p); + j = ::json{ + { "optPass", value }, + }; + } + + static inline void from_json(const ::json& j, value_t& p) + { + uint32_t aux; + j.at("optPass").get_to(aux); + p = static_cast(aux); + } + }; +} +NBL_JSON_IMPL_BIND_ADL_SERIALIZER(::nbl::system::json::adl_serializer) + +// SCompilerArgs, holds serialized info for all Compilation options +namespace nbl::system::json { + template<> + struct adl_serializer + { + using value_t = SEntry::SCompilerArgs; + + static inline void to_json(::json& j, const value_t& p) + { + uint32_t shaderStage = static_cast(p.stage); + uint32_t spirvVersion = static_cast(p.targetSpirvVersion); + uint32_t debugFlags = static_cast(p.debugInfoFlags.value); + + j = ::json{ + { "shaderStage", shaderStage }, + { "spirvVersion", spirvVersion }, + { "optimizerPasses", p.optimizerPasses }, + { "debugFlags", debugFlags }, + { "preprocessorArgs", p.preprocessorArgs }, + }; + } + + static inline void from_json(const ::json& j, value_t& p) + { + uint32_t shaderStage, spirvVersion, debugFlags; + j.at("shaderStage").get_to(shaderStage); + j.at("spirvVersion").get_to(spirvVersion); + j.at("optimizerPasses").get_to(p.optimizerPasses); + j.at("debugFlags").get_to(debugFlags); + j.at("preprocessorArgs").get_to(p.preprocessorArgs); + p.stage = static_cast(shaderStage); + p.targetSpirvVersion = static_cast(spirvVersion); + p.debugInfoFlags = core::bitflag(debugFlags); + } + }; +} +NBL_JSON_IMPL_BIND_ADL_SERIALIZER(::nbl::system::json::adl_serializer) + +// Serialize clock's time point +using time_point_t = nbl::system::IFileBase::time_point_t; +namespace nbl::system::json { + template<> + struct adl_serializer + { + using value_t = time_point_t; + + static inline void to_json(::json& j, const value_t& p) + { + auto ticks = p.time_since_epoch().count(); + j = ::json{ + { "ticks", ticks }, + }; + } + + static inline void from_json(const ::json& j, value_t& p) + { + uint64_t ticks; + j.at("ticks").get_to(ticks); + p = time_point_t(time_point_t::clock::duration(ticks)); + } + }; +} +NBL_JSON_IMPL_BIND_ADL_SERIALIZER(::nbl::system::json::adl_serializer) + +// SDependency serialization. Dependencies will be saved in a vector for easier vectorization +namespace nbl::system::json { + template<> + struct adl_serializer + { + using value_t = SEntry::SPreprocessingDependency; + + static inline void to_json(::json& j, const value_t& p) + { + j = ::json{ + { "requestingSourceDir", p.requestingSourceDir }, + { "identifier", p.identifier }, + { "hash", p.hash.data }, + { "standardInclude", p.standardInclude }, + }; + } + + static inline void from_json(const ::json& j, value_t& p) + { + j.at("requestingSourceDir").get_to(p.requestingSourceDir); + j.at("identifier").get_to(p.identifier); + j.at("hash").get_to(p.hash.data); + j.at("standardInclude").get_to(p.standardInclude); + } + }; +} +NBL_JSON_IMPL_BIND_ADL_SERIALIZER(::nbl::system::json::adl_serializer) + +// We serialize shader creation parameters into a json, along with indexing info into the .bin buffer where the cache is serialized +struct CPUShaderCreationParams { + IShader::E_SHADER_STAGE stage; + std::string filepathHint; + uint64_t codeByteSize = 0; + uint64_t offset = 0; // Offset into the serialized .bin for the Cache where code starts + + CPUShaderCreationParams(IShader::E_SHADER_STAGE _stage, std::string_view _filepathHint, uint64_t _codeByteSize, uint64_t _offset) + : stage(_stage), filepathHint(_filepathHint), codeByteSize(_codeByteSize), offset(_offset) {} + CPUShaderCreationParams() {}; +}; + +namespace nbl::system::json { + template<> + struct adl_serializer + { + using value_t = CPUShaderCreationParams; + + static inline void to_json(::json& j, const value_t& p) + { + uint32_t stage = static_cast(p.stage); + j = ::json{ + { "stage", stage }, + { "filepathHint", p.filepathHint }, + { "codeByteSize", p.codeByteSize }, + { "offset", p.offset }, + }; + } + + static inline void from_json(const ::json& j, value_t& p) + { + uint32_t stage; + j.at("stage").get_to(stage); + j.at("filepathHint").get_to(p.filepathHint); + j.at("codeByteSize").get_to(p.codeByteSize); + j.at("offset").get_to(p.offset); + p.stage = static_cast(stage); + } + }; +} +NBL_JSON_IMPL_BIND_ADL_SERIALIZER(::nbl::system::json::adl_serializer) + +// Serialize SEntry, keeping some fields as extra serialization to keep them separate on disk +namespace nbl::system::json { + template<> + struct adl_serializer + { + using value_t = SEntry; + + static inline void to_json(::json& j, const value_t& p) + { + j = ::json{ + { "mainFileContents", p.mainFileContents }, + { "compilerArgs", p.compilerArgs }, + { "hash", p.hash.data }, + { "lookupHash", p.lookupHash }, + { "dependencies", p.dependencies }, + { "uncompressedContentHash", p.uncompressedContentHash.data }, + { "uncompressedSize", p.uncompressedSize }, + }; + } + + static inline void from_json(const ::json& j, value_t& p) + { + j.at("mainFileContents").get_to(p.mainFileContents); + j.at("compilerArgs").get_to(p.compilerArgs); + j.at("hash").get_to(p.hash.data); + j.at("lookupHash").get_to(p.lookupHash); + j.at("dependencies").get_to(p.dependencies); + j.at("uncompressedContentHash").get_to(p.uncompressedContentHash.data); + j.at("uncompressedSize").get_to(p.uncompressedSize); + p.spirv = nullptr; + } + }; +} +NBL_JSON_IMPL_BIND_ADL_SERIALIZER(::nbl::system::json::adl_serializer) +// <- serialization + IShaderCompiler::IShaderCompiler(core::smart_refctd_ptr&& system) : m_system(std::move(system)) { diff --git a/src/nbl/asset/utils/shaderCompiler_serialization.h b/src/nbl/asset/utils/shaderCompiler_serialization.h deleted file mode 100644 index 6ad33a2ff5..0000000000 --- a/src/nbl/asset/utils/shaderCompiler_serialization.h +++ /dev/null @@ -1,196 +0,0 @@ -#ifndef _NBL_ASSET_SHADER_COMPILER_SERIALIZATION_H_INCLUDED_ -#define _NBL_ASSET_SHADER_COMPILER_SERIALIZATION_H_INCLUDED_ - -#include "nbl/asset/utils/IShaderCompiler.h" -#include "nlohmann/json.hpp" - -using json = nlohmann::json; -using SEntry = nbl::asset::IShaderCompiler::CCache::SEntry; - - -namespace nbl::asset -{ - -// TODO: use NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE whenever possible - -// SMacroData, simple container used in SPreprocessorArgs - -inline void to_json(json& j, const IShaderCompiler::SMacroDefinition& macroData) -{ - j = json{ - { "identifier", macroData.identifier }, - { "definition", macroData.definition }, - }; -} - -inline void from_json(const json& j, IShaderCompiler::SMacroDefinition& macroData) -{ - j.at("identifier").get_to(macroData.identifier); - j.at("definition").get_to(macroData.definition); -} - -// SPreprocessorData, holds serialized info for Preprocessor options used during compilation -inline void to_json(json& j, const SEntry::SPreprocessorArgs& preprocArgs) -{ - j = json{ - { "sourceIdentifier", preprocArgs.sourceIdentifier }, - { "extraDefines", preprocArgs.extraDefines}, - }; -} - -inline void from_json(const json& j, SEntry::SPreprocessorArgs& preprocArgs) -{ - j.at("sourceIdentifier").get_to(preprocArgs.sourceIdentifier); - j.at("extraDefines").get_to(preprocArgs.extraDefines); -} - -// Optimizer pass has its own method for easier vector serialization - -inline void to_json(json& j, const ISPIRVOptimizer::E_OPTIMIZER_PASS& optPass) -{ - uint32_t value = static_cast(optPass); - j = json{ - { "optPass", value }, - }; -} - -inline void from_json(const json& j, ISPIRVOptimizer::E_OPTIMIZER_PASS& optPass) -{ - uint32_t aux; - j.at("optPass").get_to(aux); - optPass = static_cast(aux); -} - -// SCompilerArgs, holds serialized info for all Compilation options - -inline void to_json(json& j, const SEntry::SCompilerArgs& compilerData) -{ - uint32_t shaderStage = static_cast(compilerData.stage); - uint32_t spirvVersion = static_cast(compilerData.targetSpirvVersion); - uint32_t debugFlags = static_cast(compilerData.debugInfoFlags.value); - - j = json { - { "shaderStage", shaderStage }, - { "spirvVersion", spirvVersion }, - { "optimizerPasses", compilerData.optimizerPasses }, - { "debugFlags", debugFlags }, - { "preprocessorArgs", compilerData.preprocessorArgs }, - }; -} - -inline void from_json(const json& j, SEntry::SCompilerArgs& compilerData) -{ - uint32_t shaderStage, spirvVersion, debugFlags; - j.at("shaderStage").get_to(shaderStage); - j.at("spirvVersion").get_to(spirvVersion); - j.at("optimizerPasses").get_to(compilerData.optimizerPasses); - j.at("debugFlags").get_to(debugFlags); - j.at("preprocessorArgs").get_to(compilerData.preprocessorArgs); - compilerData.stage = static_cast(shaderStage); - compilerData.targetSpirvVersion = static_cast(spirvVersion); - compilerData.debugInfoFlags = core::bitflag(debugFlags); -} - -// Serialize clock's time point -using time_point_t = nbl::system::IFileBase::time_point_t; - -inline void to_json(json& j, const time_point_t& timePoint) -{ - auto ticks = timePoint.time_since_epoch().count(); - j = json{ - { "ticks", ticks }, - }; -} - -inline void from_json(const json& j, time_point_t& timePoint) -{ - uint64_t ticks; - j.at("ticks").get_to(ticks); - timePoint = time_point_t(time_point_t::clock::duration(ticks)); -} - -// SDependency serialization. Dependencies will be saved in a vector for easier vectorization - -inline void to_json(json& j, const SEntry::SPreprocessingDependency& dependency) -{ - j = json{ - { "requestingSourceDir", dependency.requestingSourceDir }, - { "identifier", dependency.identifier }, - { "hash", dependency.hash.data }, - { "standardInclude", dependency.standardInclude }, - }; -} - -inline void from_json(const json& j, SEntry::SPreprocessingDependency& dependency) -{ - j.at("requestingSourceDir").get_to(dependency.requestingSourceDir); - j.at("identifier").get_to(dependency.identifier); - j.at("hash").get_to(dependency.hash.data); - j.at("standardInclude").get_to(dependency.standardInclude); -} - -// We serialize shader creation parameters into a json, along with indexing info into the .bin buffer where the cache is serialized - -struct CPUShaderCreationParams { - IShader::E_SHADER_STAGE stage; - std::string filepathHint; - uint64_t codeByteSize = 0; - uint64_t offset = 0; // Offset into the serialized .bin for the Cache where code starts - - CPUShaderCreationParams(IShader::E_SHADER_STAGE _stage, std::string_view _filepathHint, uint64_t _codeByteSize, uint64_t _offset) - : stage(_stage), filepathHint(_filepathHint), codeByteSize(_codeByteSize), offset(_offset) - {} - - CPUShaderCreationParams() {}; -}; - -inline void to_json(json& j, const CPUShaderCreationParams& creationParams) -{ - uint32_t stage = static_cast(creationParams.stage); - j = json{ - { "stage", stage }, - { "filepathHint", creationParams.filepathHint }, - { "codeByteSize", creationParams.codeByteSize }, - { "offset", creationParams.offset }, - }; -} - -inline void from_json(const json& j, CPUShaderCreationParams& creationParams) -{ - uint32_t stage; - j.at("stage").get_to(stage); - j.at("filepathHint").get_to(creationParams.filepathHint); - j.at("codeByteSize").get_to(creationParams.codeByteSize); - j.at("offset").get_to(creationParams.offset); - creationParams.stage = static_cast(stage); -} - -// Serialize SEntry, keeping some fields as extra serialization to keep them separate on disk - -inline void to_json(json& j, const SEntry& entry) -{ - j = json{ - { "mainFileContents", entry.mainFileContents }, - { "compilerArgs", entry.compilerArgs }, - { "hash", entry.hash.data }, - { "lookupHash", entry.lookupHash }, - { "dependencies", entry.dependencies }, - { "uncompressedContentHash", entry.uncompressedContentHash.data }, - { "uncompressedSize", entry.uncompressedSize }, - }; -} - -inline void from_json(const json& j, SEntry& entry) -{ - j.at("mainFileContents").get_to(entry.mainFileContents); - j.at("compilerArgs").get_to(entry.compilerArgs); - j.at("hash").get_to(entry.hash.data); - j.at("lookupHash").get_to(entry.lookupHash); - j.at("dependencies").get_to(entry.dependencies); - j.at("uncompressedContentHash").get_to(entry.uncompressedContentHash.data); - j.at("uncompressedSize").get_to(entry.uncompressedSize); - entry.spirv = nullptr; -} - -} -#endif \ No newline at end of file diff --git a/src/nbl/ext/MitsubaLoader/CElementBSDF.cpp b/src/nbl/ext/MitsubaLoader/CElementBSDF.cpp index 4117ca5f3a..5d988b614c 100644 --- a/src/nbl/ext/MitsubaLoader/CElementBSDF.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementBSDF.cpp @@ -12,6 +12,8 @@ #include +#pragma warning( push ) +#pragma warning( disable : 5103 ) namespace nbl::ext::MitsubaLoader { namespace impl @@ -422,4 +424,5 @@ bool CElementBSDF::onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt return true; } -} \ No newline at end of file +} +#pragma warning( pop ) \ No newline at end of file diff --git a/src/nbl/ext/MitsubaLoader/CElementEmitter.cpp b/src/nbl/ext/MitsubaLoader/CElementEmitter.cpp index 93b77b4c3a..d179235f54 100644 --- a/src/nbl/ext/MitsubaLoader/CElementEmitter.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementEmitter.cpp @@ -21,6 +21,8 @@ auto CElementEmitter::compAddPropertyMap() -> AddPropertyMap using this_t = CElementEmitter; AddPropertyMap retval; +#pragma warning( push ) +#pragma warning( disable : 5103 ) // funky transform setting NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY("position",POINT) { @@ -116,6 +118,7 @@ auto CElementEmitter::compAddPropertyMap() -> AddPropertyMap NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(gamma,FLOAT,std::is_same,EnvMap); #undef ADD_SPECTRUM +#pragma warning( pop ) return retval; } diff --git a/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp b/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp index f99b4487f9..859730167a 100644 --- a/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp @@ -41,6 +41,8 @@ auto CElementIntegrator::compAddPropertyMap() -> AddPropertyMap retval; +#pragma warning( push ) +#pragma warning( disable : 5103 ) // common // this one has really funny legacy behaviour which Mitsuba allowed contrary to its PDF docs NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED("shadingSamples",INTEGER,is_any_of,AmbientOcclusion,DirectIllumination) @@ -200,7 +202,7 @@ auto CElementIntegrator::compAddPropertyMap() -> AddPropertyMap AddPropertyMap { using this_t = CElementSensor; AddPropertyMap retval; - +#pragma warning( push ) +#pragma warning( disable : 5103 ) NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(up,VECTOR,derived_from,ShutterSensor); NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(shiftX,FLOAT,derived_from,PerspectivePinhole); NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(shiftY,FLOAT,derived_from,PerspectivePinhole); @@ -50,7 +51,7 @@ auto CElementSensor::compAddPropertyMap() -> AddPropertyMap NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(farClip,FLOAT,derived_from,CameraBase); NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(focusDistance,FLOAT,derived_from,DepthOfFieldBase); NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(apertureRadius,FLOAT,derived_from,DepthOfFieldBase); - +#pragma warning( pop ) // special auto setClipPlane = [](this_t* _this, SNamedPropertyElement&& _property, const system::logger_opt_ptr logger)->bool { diff --git a/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp b/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp index 0151bc5578..399a79a2a3 100644 --- a/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp +++ b/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp @@ -3,16 +3,17 @@ // For conditions of distribution and use, see copyright notice in nabla.h -#include +#include "nbl/builtin/hlsl/math/linalg/basic.hlsl" +#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" #include "nbl/ext/MitsubaLoader/CMitsubaLoader.h" #include "nbl/ext/MitsubaLoader/ParserUtil.h" +#include "nbl/ext/MitsubaLoader/CMitsubaSerializedMetadata.h" -#if 0 -#include "nbl/asset/utils/CDerivativeMapCreator.h" +#include + +//#include "nbl/asset/utils/CDerivativeMapCreator.h" -#include "nbl/ext/MitsubaLoader/CMitsubaSerializedMetadata.h" -#endif #if defined(_NBL_DEBUG) || defined(_NBL_RELWITHDEBINFO) @@ -21,7 +22,8 @@ namespace nbl { -using namespace asset; +using namespace nbl::asset; +using namespace nbl::hlsl; namespace ext::MitsubaLoader { @@ -210,8 +212,6 @@ SAssetBundle CMitsubaLoader::loadAsset(system::IFile* _file, const IAssetLoader: else { SContext ctx( -// m_assetMgr->getGeometryCreator(), -// m_assetMgr->getMeshManipulator(), IAssetLoader::SAssetLoadContext{ IAssetLoader::SAssetLoadParams(_params.decryptionKeyLen,_params.decryptionKey,_params.cacheFlags,_params.loaderFlags,_params.logger,_file->getFileName().parent_path()), _file @@ -219,13 +219,17 @@ SAssetBundle CMitsubaLoader::loadAsset(system::IFile* _file, const IAssetLoader: _override, result.metadata.get() ); + ctx.interm_getAssetInHierarchy = [&](const char* filename, const uint16_t hierarchyOffset)->SAssetBundle + { + return this->interm_getAssetInHierarchy(filename,ctx.inner.params,_hierarchyLevel+hierarchyOffset,ctx.override_); + }; // ctx.scene->m_ambientLight = result.ambient; // TODO: abstract/move away since many loaders will need to do this - core::unordered_map> morphTargetCache; - auto createMorphTargets = [&_params,&morphTargetCache](core::smart_refctd_ptr&& collection)->core::smart_refctd_ptr + core::unordered_map> morphTargetCache; + auto createMorphTargets = [&_params,&morphTargetCache](core::smart_refctd_ptr&& collection)->core::smart_refctd_ptr { auto found = morphTargetCache.find(collection.get()); if (found!=morphTargetCache.end()) @@ -234,7 +238,7 @@ SAssetBundle CMitsubaLoader::loadAsset(system::IFile* _file, const IAssetLoader: if (targets) { morphTargetCache[collection.get()] = targets; - targets->getTargets()->push_back({.geoCollection=std::move(collection)}); + targets->getTargets()->push_back({.geoCollection=core::smart_refctd_ptr(const_cast(collection.get()))}); } return targets; }; @@ -242,9 +246,14 @@ SAssetBundle CMitsubaLoader::loadAsset(system::IFile* _file, const IAssetLoader: // auto& instances = ctx.scene->getInstances(); instances.reserve(result.shapegroups.size()); - auto addToScene = [&](const CElementShape* shape, core::smart_refctd_ptr&& collection)->void + auto addToScene = [&](const CElementShape* shape, core::smart_refctd_ptr&& collection)->void { - assert(shape && collection); + if (!collection) + { + _params.logger.log("Failed to load a ICPUGeometryCollection for Shape with id %s",LoggerError,shape->id.c_str()); + return; + } + assert(shape); auto targets = createMorphTargets(std::move(collection)); if (!targets) { @@ -252,8 +261,8 @@ SAssetBundle CMitsubaLoader::loadAsset(system::IFile* _file, const IAssetLoader: return; } const auto index = instances.size(); - instances.resize(index+1); - instances.getMorphTargets()[index] = std::move(targets); + instances.resize(index+1,true); + instances.getMorphTargets()[index] = core::smart_refctd_ptr(const_cast(targets.get())); // TODO: add materials (incl emission) to the instances /* auto emitter = shape->obtainEmitter(); @@ -269,6 +278,8 @@ SAssetBundle CMitsubaLoader::loadAsset(system::IFile* _file, const IAssetLoader: CElementEmitter{} // no backface emission ); */ + if (shape->transform.matrix[3]!=float32_t4(0,0,0,1)) + _params.logger.log("Shape with id %s has Non-Affine transformation matrix, last row is not 0,0,0,1!",system::ILogger::ELL_ERROR,shape->id.c_str()); instances.getInitialTransforms()[index] = shape->getTransform(); }; @@ -281,29 +292,14 @@ SAssetBundle CMitsubaLoader::loadAsset(system::IFile* _file, const IAssetLoader: continue; if (shapedef->type!=CElementShape::Type::INSTANCE) - { - auto geometry = ctx.loadBasicShape(_hierarchyLevel,shapedef); - if (!geometry) - continue; - auto collection = core::make_smart_refctd_ptr(); - if (!collection) - { - _params.logger.log("Failed to create an ICPUGeometryCollection non-Instanced Shape with id %s",LoggerError,shapedef->id.c_str()); - continue; - } - // we don't put a transform on the geometry, because we want the transform on the instance - collection->getGeometries()->push_back({.geometry=std::move(geometry)}); - addToScene(shapedef,std::move(collection)); - } + addToScene(shapedef,ctx.loadBasicShape(shapedef)); else // mitsuba is weird and lists instances under a shapegroup instead of having instances reference the shapegroup { // get group reference const CElementShape* parent = shapedef->instance.parent; if (!parent) // we should probably assert this continue; - assert(parent->type==CElementShape::Type::SHAPEGROUP); - auto collection = ctx.loadShapeGroup(_hierarchyLevel,&parent->shapegroup); - addToScene(shapedef,std::move(collection)); + addToScene(shapedef,ctx.loadShapeGroup(parent)); } } result.shapegroups.clear(); @@ -621,12 +617,10 @@ inline core::smart_refctd_ptr CMitsubaLoader::createDS using namespace std::string_literals; SContext::SContext( -// const asset::IGeometryCreator* _geomCreator, -// const asset::IMeshManipulator* _manipulator, const asset::IAssetLoader::SAssetLoadContext& _ctx, asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* _metadata -) : /*creator(_geomCreator), manipulator(_manipulator),*/ inner(_ctx), override_(_override), meta(_metadata) +) : inner(_ctx), override_(_override), meta(_metadata) //,ir(core::make_smart_refctd_ptr()), frontend(this) { auto materialPool = material_compiler3::CTrueIR::create(); @@ -634,11 +628,13 @@ SContext::SContext( frontIR = material_compiler3::CFrontendIR::create(); } -auto SContext::loadShapeGroup(const uint32_t hierarchyLevel, const CElementShape::ShapeGroup* shapegroup) -> SContext::group_ass_type +auto SContext::loadShapeGroup(const CElementShape* shape) -> SContext::shape_ass_type { + assert(shape->type==CElementShape::Type::SHAPEGROUP); + const auto* const shapegroup = &shape->shapegroup; auto found = groupCache.find(shapegroup); if (found!=groupCache.end()) - return found->second; + return found->second.collection; auto collection = core::make_smart_refctd_ptr(); if (!collection) @@ -652,191 +648,207 @@ auto SContext::loadShapeGroup(const uint32_t hierarchyLevel, const CElementShape auto child = children[i]; if (!child) continue; - + // shape groups cannot contain instances assert(child->type!=CElementShape::Type::INSTANCE); + + shape_ass_type nestedCollection; if (child->type!=CElementShape::Type::SHAPEGROUP) - { - auto geometry = loadBasicShape(hierarchyLevel,child); - if (geometry) - geometries->push_back({.transform=child->getTransform(),.geometry=std::move(geometry)}); - } + nestedCollection = loadBasicShape(child); else - { - auto nestedCollection = loadShapeGroup(hierarchyLevel,&child->shapegroup); - if (!nestedCollection) - continue; - auto* nestedGeometries = nestedCollection->getGeometries(); - for (auto& ref : *nestedGeometries) - { - auto& newRef = geometries->emplace_back(std::move(ref)); - // thankfully because SHAPEGROUPS are not allowed to have transforms we don't need to rack them up - //if (newRef.hasTransform()) - // newRef.transform = hlsl::mul(thisTransform,newRef.transform); - //else - // newRef.transform = thisTransform; - } - } + nestedCollection = loadShapeGroup(child); + if (!nestedCollection) + continue; + + // note that we flatten geometry collections, different children are their own collections we turn them into one mega-collection + const auto& nestedGeometries = nestedCollection->getGeometries(); + // thankfully because SHAPEGROUPS are not allowed to have transforms we don't need to rack them up + //if (newRef.hasTransform()) + // newRef.transform = hlsl::mul(thisTransform,newRef.transform); + //else + // newRef.transform = thisTransform; + geometries->insert(geometries->end(),nestedGeometries.begin(),nestedGeometries.end()); } - groupCache.insert({shapegroup,collection}); + CMitsubaMetadata::SGeometryCollectionMetaPair pair = {.collection=collection}; + pair.meta.m_id = shape->id; + pair.meta.type = shape->type; + groupCache.insert({shapegroup,std::move(pair)}); } return collection; } -#if 0 -static core::smart_refctd_ptr createMeshFromGeomCreatorReturnType(IGeometryCreator::return_type&& _data, asset::IAssetManager* _manager) -{ - //creating pipeline just to forward vtx and primitive params - auto pipeline = core::make_smart_refctd_ptr( - nullptr, nullptr, nullptr, //no layout nor shaders - _data.inputParams, - asset::SBlendParams(), - _data.assemblyParams, - asset::SRasterizationParams() - ); - - auto mb = core::make_smart_refctd_ptr( - nullptr, nullptr, - _data.bindings, std::move(_data.indexBuffer) - ); - mb->setIndexCount(_data.indexCount); - mb->setIndexType(_data.indexType); - mb->setBoundingBox(_data.bbox); - mb->setPipeline(std::move(pipeline)); - constexpr auto NORMAL_ATTRIBUTE = 3; - mb->setNormalAttributeIx(NORMAL_ATTRIBUTE); - - auto mesh = core::make_smart_refctd_ptr(); - mesh->getMeshBufferVector().push_back(std::move(mb)); - - return mesh; -} -#endif - -auto SContext::loadBasicShape(const uint32_t hierarchyLevel, const CElementShape* shape) -> SContext::shape_ass_type +auto SContext::loadBasicShape(const CElementShape* shape) -> SContext::shape_ass_type { auto found = shapeCache.find(shape); if (found!=shapeCache.end()) - return found->second.geom; - - core::smart_refctd_ptr geo; - auto exiter = core::makeRAIIExiter<>([&]()->void - { - if (geo) - return; - this->inner.params.logger.log("Failed to Load/Create Basic non-Instanced Shape with id %s",system::ILogger::ELL_ERROR,shape->id.c_str()); - } - ); - -#if 0 - constexpr uint32_t UV_ATTRIB_ID = 2u; + return found->second.collection; + auto collection = core::make_smart_refctd_ptr(); + if (!collection) + { + inner.params.logger.log("Failed to create an ICPUGeometryCollection non-Instanced Shape with id %s",LoggerError,shape->id.c_str()); + return nullptr; + } + // the geometry reference transform shall only contain an exceptional and optional relative transform like to make Builtin shapes like cubes, spheres, etc. of different sizes + // the whole shape (which is a geometry collection) has its own transform + auto* pGeometries = collection->getGeometries(); + auto addGeometry = [pGeometries](ICPUGeometryCollection::SGeometryReference&& ref)->void + { + if (ref) + pGeometries->push_back(std::move(ref)); + }; - auto loadModel = [&](const ext::MitsubaLoader::SPropertyElementData& filename, int64_t index=-1) -> core::smart_refctd_ptr + auto loadModel = [&](const char* filename, int64_t index=-1) -> void { - assert(filename.type==ext::MitsubaLoader::SPropertyElementData::Type::STRING); - auto loadParams = ctx.inner.params; - loadParams.loaderFlags = static_cast(loadParams.loaderFlags | IAssetLoader::ELPF_RIGHT_HANDED_MESHES); - auto retval = interm_getAssetInHierarchy( filename.svalue, loadParams, hierarchyLevel/*+ICPUScene::MESH_HIERARCHY_LEVELS_BELOW*/, ctx.override_); - if (retval.getContents().empty()) - { - os::Printer::log(std::string("[ERROR] Could Not Find Mesh: ") + filename.svalue, ELL_ERROR); - return nullptr; - } - if (retval.getAssetType()!=asset::IAsset::ET_MESH) - { - os::Printer::log("[ERROR] Loaded an Asset but it wasn't a mesh, was E_ASSET_TYPE " + std::to_string(retval.getAssetType()), ELL_ERROR); - return nullptr; - } + auto retval = interm_getAssetInHierarchy(filename,/*ICPUScene::GEOMETRY_COLLECTION_HIERARCHY_LEVELS_BELOW*/1); auto contentRange = retval.getContents(); - auto serializedMeta = retval.getMetadata()->selfCast(); - // - uint32_t actualIndex = 0; - if (index>=0ll && serializedMeta) - for (auto it=contentRange.begin(); it!=contentRange.end(); it++) + if (contentRange.empty()) { - auto meshMeta = static_cast(serializedMeta->getAssetSpecificMetadata(IAsset::castDown(*it).get())); - if (meshMeta->m_id!=static_cast(index)) - continue; - actualIndex = it-contentRange.begin(); - break; + inner.params.logger.log("Could Not Load Shape : %s",LoggerError,filename); + return; } + + // we used to load with the IAssetLoader::ELPF_RIGHT_HANDED_MESHES flag, this means flipping the mesh x-axis + auto transform = math::linalg::diagonal(1.f); + transform[0][0] = -1.f; + // - if (contentRange.begin()+actualIndex < contentRange.end()) + auto addCollectionGeometries = [&](const ICPUGeometryCollection* col)->void + { + if (col) + for (auto ref : col->getGeometries()) + { + if (ref.hasTransform()) + ref.transform = math::linalg::promoted_mul(ref.transform,transform); + else + ref.transform = transform; + addGeometry(std::move(ref)); + } + }; + + // take first target and replace the collection + auto addFirstTargetGeometries = [&](const ICPUMorphTargets* morph)->void + { + if (const auto& targets=morph->getTargets(); !targets.empty()) + addCollectionGeometries(targets.front().geoCollection.get()); + }; + + switch (retval.getAssetType()) { - auto asset = contentRange.begin()[actualIndex]; - if (!asset) - return nullptr; - return core::smart_refctd_ptr_static_cast(asset); + case IAsset::ET_GEOMETRY: + { + // only add one geometry, if we meant to add a whole collection, the file would load a collection + const IGeometry* geo = nullptr; + auto serializedMeta = retval.getMetadata()->selfCast(); + for (auto it=contentRange.begin(); it!=contentRange.end(); it++) + { + geo = IAsset::castDown(*it).get(); + assert(geo); + if (!serializedMeta || index<0ll || index>numeric_limits::max) // not Misuba serialized or shape index not specialized + break; + auto* const meta = serializedMeta->getAssetSpecificMetadata(static_cast(geo)); + assert(meta); + auto* const polygonMeta = static_cast(meta); + if (polygonMeta->m_id==static_cast(index)) + break; + } + if (auto* const mg=const_cast*>(geo); mg) + addGeometry({.transform=transform,.geometry=core::smart_refctd_ptr>(mg)}); + break; + } + case IAsset::ET_GEOMETRY_COLLECTION: + { + // only add the first collection's geometries + addCollectionGeometries(IAsset::castDown(contentRange[0]).get()); + break; + } + case IAsset::ET_MORPH_TARGETS: + { + addFirstTargetGeometries(IAsset::castDown(contentRange[0]).get()); + break; + } + case IAsset::ET_SCENE: + { + // flatten the scene into a single instance, this is path for OBJ loading + const auto& instances = IAsset::castDown(contentRange[0])->getInstances(); + const auto instanceTforms = instances.getInitialTransforms(); + for (auto i=0u; isize(); + addFirstTargetGeometries(targets); + if (!instanceTforms.empty()) + for (auto geoIx=oldGeoBegin; geoIxsize(); geoIx++) + { + auto& ref = pGeometries->operator[](geoIx); + ref.transform = math::linalg::promoted_mul(instanceTforms[i],ref.transform); + } + // NOTE: also need to preserve/forward the materials somehow (need to chape the `shape_ass_type` to have a default Material Binding Table) + } + break; + } + default: + inner.params.logger.log("Loaded an Asset but it didn't contain any geometry, was %s",LoggerError,system::to_string(retval.getAssetType())); + break; } - else - return nullptr; }; -#endif + bool flipNormals = false; bool faceNormals = false; - float maxSmoothAngle = hlsl::bit_cast(hlsl::numeric_limits::quiet_NaN); + float maxSmoothAngle = bit_cast(numeric_limits::quiet_NaN); + auto* const creator = override_->getGeometryCreator(); switch (shape->type) { -#if 0 + // TODO: cache the simple geos to not spam new objects ? + // FAR TODO: create some special non-poly geometries for procedural raycasts? case CElementShape::Type::CUBE: { - auto cubeData = ctx.creator->createCubeMesh(core::vector3df(2.f)); - - mesh = createMeshFromGeomCreatorReturnType(ctx.creator->createCubeMesh(core::vector3df(2.f)), m_assetMgr); flipNormals = flipNormals!=shape->cube.flipNormals; + addGeometry({.geometry=creator->createCube(promote(2.f))}); break; } case CElementShape::Type::SPHERE: - mesh = createMeshFromGeomCreatorReturnType(ctx.creator->createSphereMesh(1.f,64u,64u), m_assetMgr); flipNormals = flipNormals!=shape->sphere.flipNormals; { - core::matrix3x4SIMD tform; - tform.setScale(core::vectorSIMDf(shape->sphere.radius,shape->sphere.radius,shape->sphere.radius)); - tform.setTranslation(shape->sphere.center); - shape->transform.matrix = core::concatenateBFollowedByA(shape->transform.matrix,core::matrix4SIMD(tform)); + auto tform = math::linalg::diagonal(shape->sphere.radius); + math::linalg::setTranslation(tform,shape->sphere.center); + addGeometry({.transform=tform,.geometry=creator->createSphere(1.f,64u,64u)}); } break; case CElementShape::Type::CYLINDER: + flipNormals = flipNormals!=shape->cylinder.flipNormals; { - auto diff = shape->cylinder.p0-shape->cylinder.p1; - mesh = createMeshFromGeomCreatorReturnType(ctx.creator->createCylinderMesh(1.f, 1.f, 64), m_assetMgr); - core::vectorSIMDf up(0.f); - float maxDot = diff[0]; - uint32_t index = 0u; - for (auto i = 1u; i < 3u; i++) - if (diff[i] < maxDot) - { - maxDot = diff[i]; - index = i; - } - up[index] = 1.f; - core::matrix3x4SIMD tform; - // mesh is left haded so transforming by LH matrix is fine (I hope but lets check later on) - core::matrix3x4SIMD::buildCameraLookAtMatrixLH(shape->cylinder.p0,shape->cylinder.p1,up).getInverse(tform); - core::matrix3x4SIMD scale; - scale.setScale(core::vectorSIMDf(shape->cylinder.radius,shape->cylinder.radius,core::length(diff).x)); - shape->transform.matrix = core::concatenateBFollowedByA(shape->transform.matrix,core::matrix4SIMD(core::concatenateBFollowedByA(tform,scale))); + // start off as transpose, so rows are columns + float32_t4x3 extra; + extra[2] = shape->cylinder.p1 - shape->cylinder.p0; + extra[3] = shape->cylinder.p0; + math::frisvad(normalize(extra[2]),extra[0],extra[1]); + for (auto i=0u; i<2u; i++) + { + assert(length(extra[i])==1.f); + extra[i] *= shape->cylinder.radius; + } + addGeometry({.transform=transpose(extra),.geometry=creator->createCylinder(1.f,1.f,64u)}); } - flipNormals = flipNormals!=shape->cylinder.flipNormals; break; case CElementShape::Type::RECTANGLE: - mesh = createMeshFromGeomCreatorReturnType(ctx.creator->createRectangleMesh(core::vector2df_SIMD(1.f,1.f)), m_assetMgr); - flipNormals = flipNormals!=shape->rectangle.flipNormals; + flipNormals = flipNormals!=shape->cylinder.flipNormals; + addGeometry({.geometry=creator->createRectangle(promote(1.f))}); break; case CElementShape::Type::DISK: - mesh = createMeshFromGeomCreatorReturnType(ctx.creator->createDiskMesh(1.f,64u), m_assetMgr); - flipNormals = flipNormals!=shape->disk.flipNormals; + flipNormals = flipNormals!=shape->cylinder.flipNormals; + addGeometry({.geometry=creator->createDisk(1.f,64)}); break; -#endif -#if 0 case CElementShape::Type::OBJ: +#if 0 // TODO: Arek mesh = loadModel(shape->obj.filename); flipNormals = flipNormals!=shape->obj.flipNormals; faceNormals = shape->obj.faceNormals; maxSmoothAngle = shape->obj.maxSmoothAngle; - if (mesh && shape->obj.flipTexCoords) + if (!pGeometries->empty() && shape->obj.flipTexCoords) { + _NBL_DEBUG_BREAK_IF(true); + // TODO: find the UV attribute, it doesn't help we don't name them newMesh = core::smart_refctd_ptr_static_cast (mesh->clone(1u)); for (auto& meshbuffer : mesh->getMeshBufferVector()) { @@ -854,114 +866,84 @@ auto SContext::loadBasicShape(const uint32_t hierarchyLevel, const CElementShape } } } +#endif // collapse parameter gets ignored break; case CElementShape::Type::PLY: _NBL_DEBUG_BREAK_IF(true); // this code has never been tested - mesh = loadModel(shape->ply.filename); + loadModel(shape->ply.filename); flipNormals = flipNormals!=shape->ply.flipNormals; faceNormals = shape->ply.faceNormals; maxSmoothAngle = shape->ply.maxSmoothAngle; - if (mesh && shape->ply.srgb) + if (shape->ply.srgb) + for (auto& ref : *pGeometries) { - uint32_t totalVertexCount = 0u; - for (auto meshbuffer : mesh->getMeshBuffers()) - totalVertexCount += IMeshManipulator::upperBoundVertexID(meshbuffer); - if (totalVertexCount) - { - constexpr uint32_t hidefRGBSize = 4u; - auto newRGBbuff = core::make_smart_refctd_ptr(hidefRGBSize*totalVertexCount); - newMesh = core::smart_refctd_ptr_static_cast(mesh->clone(1u)); - constexpr uint32_t COLOR_ATTR = 1u; - constexpr uint32_t COLOR_BUF_BINDING = 15u; - uint32_t* newRGB = reinterpret_cast(newRGBbuff->getPointer()); - uint32_t offset = 0u; - for (auto& meshbuffer : mesh->getMeshBufferVector()) - { - core::vectorSIMDf rgb; - for (uint32_t i=0u; meshbuffer->getAttribute(rgb,COLOR_ATTR,i); i++,offset++) - { - for (auto i=0; i<3u; i++) - rgb[i] = core::srgb2lin(rgb[i]); - ICPUMeshBuffer::setAttribute(rgb,newRGB+offset,asset::EF_A2B10G10R10_UNORM_PACK32); - } - auto newPipeline = core::smart_refctd_ptr_static_cast(meshbuffer->getPipeline()->clone(0u)); - auto& vtxParams = newPipeline->getVertexInputParams(); - vtxParams.attributes[COLOR_ATTR].format = EF_A2B10G10R10_UNORM_PACK32; - vtxParams.attributes[COLOR_ATTR].relativeOffset = 0u; - vtxParams.attributes[COLOR_ATTR].binding = COLOR_BUF_BINDING; - vtxParams.bindings[COLOR_BUF_BINDING].inputRate = EVIR_PER_VERTEX; - vtxParams.bindings[COLOR_BUF_BINDING].stride = hidefRGBSize; - vtxParams.enabledBindingFlags |= (1u<setPipeline(std::move(newPipeline)); - meshbuffer->setVertexBufferBinding({offset*hidefRGBSize,core::smart_refctd_ptr(newRGBbuff)},COLOR_BUF_BINDING); - } - } + // TODO: find the color attribute (it doesn't help we don't name them, just slap them in vectors) + // TODO: clone geometry + // TODO: change the color aux attribute's format from UNORM8 to SRGB } break; case CElementShape::Type::SERIALIZED: - mesh = loadModel(shape->serialized.filename,shape->serialized.shapeIndex); + loadModel(shape->serialized.filename,shape->serialized.shapeIndex); flipNormals = flipNormals!=shape->serialized.flipNormals; faceNormals = shape->serialized.faceNormals; maxSmoothAngle = shape->serialized.maxSmoothAngle; break; -#endif case CElementShape::Type::SHAPEGROUP: [[fallthrough]]; case CElementShape::Type::INSTANCE: - assert(false); + assert(false); // this shouldn't happen, our parser code shouldn't reach here break; default: // _NBL_DEBUG_BREAK_IF(true); break; } - // - if (geo) + // handle fail + if (pGeometries->empty()) { -#if 0 - // mesh including meshbuffers needs to be cloned because instance counts and base instances will be changed - if (!newMesh) - newMesh = core::smart_refctd_ptr_static_cast(mesh->clone(1u)); - // flip normals if necessary - if (flipNormals) + inner.params.logger.log("Failed to Load/Create Basic non-Instanced Shape with id %s",system::ILogger::ELL_ERROR,shape->id.c_str()); + return nullptr; + } + + // recompute and flip normals if necessary + if (faceNormals || !std::isnan(maxSmoothAngle)) + { + for (auto& ref : *pGeometries) { - for (auto& meshbuffer : mesh->getMeshBufferVector()) - { - auto binding = meshbuffer->getIndexBufferBinding(); - binding.buffer = core::smart_refctd_ptr_static_cast(binding.buffer->clone(0u)); - meshbuffer->setIndexBufferBinding(std::move(binding)); - ctx.manipulator->flipSurfaces(meshbuffer.get()); - } + const float smoothAngleCos = cos(radians(maxSmoothAngle)); + + auto* const polyGeo = static_cast(ref.geometry.get()); + ref.geometry = CPolygonGeometryManipulator::createSmoothVertexNormal( + CPolygonGeometryManipulator::createUnweldedList(polyGeo,flipNormals,false).get(),false,0.f, // TODO: maybe enable welding based on `!faceNormals` later + [faceNormals,smoothAngleCos](const CPolygonGeometryManipulator::SSNGVertexData& v0, const CPolygonGeometryManipulator::SSNGVertexData& v1, const ICPUPolygonGeometry* buffer) + { + if (faceNormals) + return v0.index==v1.index; + else + return dot(v0.weightedNormal,v1.weightedNormal)*rsqrt(dot(v0.weightedNormal,v0.weightedNormal)*dot(v1.weightedNormal,v1.weightedNormal)) >= smoothAngleCos; + }, + true // rewelding or initial unweld mess with all vertex attributes and index buffers, so recompute every hash + ); } - // recompute normalis if necessary - if (faceNormals || !std::isnan(maxSmoothAngle)) - for (auto& meshbuffer : mesh->getMeshBufferVector()) - { - const float smoothAngleCos = cos(core::radians(maxSmoothAngle)); - - // TODO: make these mesh manipulator functions const-correct - auto newMeshBuffer = ctx.manipulator->createMeshBufferUniquePrimitives(meshbuffer.get()); - ctx.manipulator->filterInvalidTriangles(newMeshBuffer.get()); - ctx.manipulator->calculateSmoothNormals(newMeshBuffer.get(), false, 0.f, newMeshBuffer->getNormalAttributeIx(), - [&](const asset::IMeshManipulator::SSNGVertexData& a, const asset::IMeshManipulator::SSNGVertexData& b, asset::ICPUMeshBuffer* buffer) - { - if (faceNormals) - return a.indexOffset == b.indexOffset; - else - return core::dot(a.parentTriangleFaceNormal, b.parentTriangleFaceNormal).x >= smoothAngleCos; - }); - meshbuffer = std::move(newMeshBuffer); - } - IMeshManipulator::recalculateBoundingBox(newMesh.get()); - mesh = std::move(newMesh); -#endif - // cache and return - CMitsubaMetadata::SGeometryMetaPair geoMeta = {.geom=std::move(geo)}; - geoMeta.meta.m_id = shape->id; - geoMeta.meta.type = shape->type; - shapeCache.insert({shape,std::move(geoMeta)}); } - return geo; + else if (flipNormals) + { + for (auto& ref : *pGeometries) + { + auto* const polyGeo = static_cast(ref.geometry.get()); + auto flippedGeo = CPolygonGeometryManipulator::createTriangleListIndexing(polyGeo,true,false); + CGeometryManipulator::recomputeContentHash(flippedGeo->getIndexView()); + // TODO: don't we also need to flip the normal buffer values? changing the winding doesn't help because the normals weren't recomputed ! + ref.geometry = std::move(flippedGeo); + } + } + + // cache and return + CMitsubaMetadata::SGeometryCollectionMetaPair pair = {.collection=collection}; + pair.meta.m_id = shape->id; + pair.meta.type = shape->type; + shapeCache.insert({shape,std::move(pair)}); + return collection; } } diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index f47428aae8..7ec56d2b93 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -337,16 +337,17 @@ bool IGPUCommandBuffer::waitEvents(const std::span events, const SEvent totalImageCount += depInfo.imgBarriers.size(); } - auto* cmd = m_cmdpool->m_commandListPool.emplace(m_commandList,events.size(),events.data(),totalBufferCount,totalImageCount); + auto* cmd = m_cmdpool->m_commandListPool.emplace(m_commandList,static_cast(events.size()),totalBufferCount,totalImageCount); if (!cmd) { NBL_LOG_ERROR("out of host memory!"); return false; } - auto outIt = cmd->getDeviceMemoryBacked(); + IGPUCommandPool::CTrackedIterator outIt(cmd); for (auto i=0u; i(events[i]); const auto& depInfo = depInfos[i]; for (const auto& barrier : depInfo.bufBarriers) *(outIt++) = barrier.range.buffer; @@ -453,7 +454,7 @@ bool IGPUCommandBuffer::pipelineBarrier(const core::bitflaggetVariableCountResources(); + IGPUCommandPool::CTrackedIterator outIt(cmd); for (const auto& barrier : depInfo.bufBarriers) *(outIt++) = barrier.range.buffer; for (const auto& barrier : depInfo.imgBarriers) @@ -804,18 +805,15 @@ uint32_t IGPUCommandBuffer::buildAccelerationStructures_common(const std::spangetVariableCountResources(); + auto oit = IGPUCommandPool::CTrackedIterator(cmd); if (indirectBuffer) *(oit++) = core::smart_refctd_ptr(indirectBuffer); for (const auto& info : infos) { - oit = info.fillTracking(oit); - // we still need to clear the BLAS tracking list if the TLAS has nothing to track + // we still need to clear the BLAS tracking list if the TLAS has nothing to track, so add even if trackedBLASes.empty() if constexpr (std::is_same_v) - { - const auto blasCount = info.trackedBLASes.size(); - m_TLASTrackingOps.emplace_back(TLASTrackingWrite{.src={oit-blasCount,blasCount},.dst=info.dstAS}); - } + m_TLASTrackingOps.emplace_back(TLASTrackingWrite{.srcBegin=oit,.count=static_cast(info.trackedBLASes.size()),.dst=info.dstAS}); + oit = info.fillTracking(oit); } return totalGeometries; @@ -918,11 +916,11 @@ bool IGPUCommandBuffer::copyAccelerationStructureFromMemory(const AccelerationSt const bool retval = copyAccelerationStructureFromMemory_impl(copyInfo.src,copyInfo.dst); if constexpr (std::is_same_v) { - const auto size = copyInfo.trackedBLASes.size(); + const uint32_t size = copyInfo.trackedBLASes.size(); auto oit = reserveReferences(size); if (oit) { - m_TLASTrackingOps.emplace_back(TLASTrackingWrite{.src={oit,size},.dst=copyInfo.dst}); + m_TLASTrackingOps.emplace_back(TLASTrackingWrite{.srcBegin=oit,.count=size,.dst=copyInfo.dst}); for (const auto& blas : copyInfo.trackedBLASes) *(oit++) = core::smart_refctd_ptr(blas); } @@ -1353,7 +1351,7 @@ bool IGPUCommandBuffer::writeAccelerationStructureProperties(const std::spangetVariableCountResources(); + auto oit = IGPUCommandPool::CTrackedIterator(cmd); for (const auto* as : pAccelerationStructures) *(oit++) = core::smart_refctd_ptr(as); m_noCommands = false; @@ -2057,13 +2055,14 @@ bool IGPUCommandBuffer::executeCommands(const uint32_t count, IGPUCommandBuffer* NBL_LOG_ERROR("out of host memory!"); return false; } + auto oit = IGPUCommandPool::CTrackedIterator(cmd); for (auto i=0u; igetVariableCountResources()[i] = core::smart_refctd_ptr(cmdbufs[i]); + *(oit++) = core::smart_refctd_ptr(cmdbufs[i]); m_noCommands = false; return executeCommands_impl(count,cmdbufs); } -core::smart_refctd_ptr* IGPUCommandBuffer::reserveReferences(const uint32_t size) +IGPUCommandPool::CTrackedIterator IGPUCommandBuffer::reserveReferences(const uint32_t size) { if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT|queue_flags_t::GRAPHICS_BIT|queue_flags_t::TRANSFER_BIT|queue_flags_t::SPARSE_BINDING_BIT)) return nullptr; @@ -2074,7 +2073,7 @@ core::smart_refctd_ptr* IGPUCommandBuffer::reserv NBL_LOG_ERROR("out of host memory!"); return nullptr; } - return cmd->getVariableCountResources(); + return IGPUCommandPool::CTrackedIterator(cmd); } } \ No newline at end of file diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp index d5b38f9b69..a98deff5c7 100644 --- a/src/nbl/video/ILogicalDevice.cpp +++ b/src/nbl/video/ILogicalDevice.cpp @@ -360,9 +360,7 @@ core::smart_refctd_ptr ILogicalDevice::compileShader(const SShad commonCompileOptions.preprocessorOptions.extraDefines = creationParams.extraDefines; commonCompileOptions.stage = creationParams.stage; - commonCompileOptions.debugInfoFlags = - asset::IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_SOURCE_BIT | - asset::IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_TOOL_BIT; + commonCompileOptions.debugInfoFlags = creationParams.debugInfoFlags; commonCompileOptions.spirvOptimizer = creationParams.optimizer; commonCompileOptions.preprocessorOptions.targetSpirvVersion = m_physicalDevice->getLimits().spirvVersion; diff --git a/src/nbl/video/IQueue.cpp b/src/nbl/video/IQueue.cpp index 108f76183c..70acecffca 100644 --- a/src/nbl/video/IQueue.cpp +++ b/src/nbl/video/IQueue.cpp @@ -177,9 +177,7 @@ IQueue::DeferredSubmitCallback::DeferredSubmitCallback(const SSubmitInfo& info) case 0: { const IGPUCommandBuffer::TLASTrackingWrite& op = std::get<0>(var); - - using iterator = decltype(op.src)::iterator; - m_readTLASVersions[op.dst] = m_TLASOverwrites[op.dst] = op.dst->pushTrackedBLASes({op.src.begin()},{op.src.end()}); + m_readTLASVersions[op.dst] = m_TLASOverwrites[op.dst] = op.dst->pushTrackedBLASes({.orig=op.srcBegin},op.count); break; } case 1: @@ -192,8 +190,7 @@ IQueue::DeferredSubmitCallback::DeferredSubmitCallback(const SSubmitInfo& info) // stop multiple threads messing with us std::lock_guard lk(op.src->m_trackingLock); const auto* pSrcBLASes = op.src->getPendingBuildTrackedBLASes(ver); - const std::span emptySpan = {}; - m_readTLASVersions[op.dst] = m_TLASOverwrites[op.dst] = pSrcBLASes ? op.dst->pushTrackedBLASes(pSrcBLASes->begin(),pSrcBLASes->end()):op.dst->pushTrackedBLASes(emptySpan.begin(),emptySpan.end()); + m_readTLASVersions[op.dst] = m_TLASOverwrites[op.dst] = pSrcBLASes ? op.dst->pushTrackedBLASes(pSrcBLASes->begin(),pSrcBLASes->size()):op.dst->pushTrackedBLASes(nullptr,0); break; } case 2: diff --git a/src/nbl/video/utilities/IUtilities.cpp b/src/nbl/video/utilities/IUtilities.cpp index 46bda8a227..f6db104aa2 100644 --- a/src/nbl/video/utilities/IUtilities.cpp +++ b/src/nbl/video/utilities/IUtilities.cpp @@ -11,6 +11,11 @@ bool IUtilities::updateImageViaStagingBuffer( const std::span regions ) { + if (!m_defaultUploadBuffer) + { + m_logger.log("no staging buffer available for upload. check `upstreamSize` passed to `IUtilities::create`",system::ILogger::ELL_ERROR); + return false; + } auto* scratch = commonTransferValidation(intendedNextSubmit); if (!scratch) return false; @@ -164,6 +169,11 @@ bool IUtilities::downloadImageViaStagingBuffer( void* dest, const std::span regions ) { + if (!m_defaultDownloadBuffer) + { + m_logger.log("no staging buffer available for download. check `downstreamSize` passed to `IUtilities::create`",system::ILogger::ELL_ERROR); + return false; + } if (regions.empty()) return false; diff --git a/tools/nsc/main.cpp b/tools/nsc/main.cpp index 64ad684b0c..203aa6ce8c 100644 --- a/tools/nsc/main.cpp +++ b/tools/nsc/main.cpp @@ -16,7 +16,7 @@ #include "nbl/asset/metadata/CHLSLMetadata.h" #include "nlohmann/json.hpp" -using json = nlohmann::json; +using json = ::nlohmann::json; using namespace nbl; using namespace nbl::system; using namespace nbl::core; @@ -416,14 +416,14 @@ class ShaderCompiler final : public IApplicationFramework static void dumpBuildInfo(const argparse::ArgumentParser& program) { - json j; + ::json j; auto& modules = j["modules"]; auto serialize = [&](const gtml::GitInfo& info, std::string_view target) { auto& s = modules[target.data()]; s["isPopulated"] = info.isPopulated; - s["hasUncommittedChanges"] = info.hasUncommittedChanges.has_value() ? json(info.hasUncommittedChanges.value()) : json("UNKNOWN, BUILT WITHOUT DIRTY-CHANGES CAPTURE"); + s["hasUncommittedChanges"] = info.hasUncommittedChanges.has_value() ? ::json(info.hasUncommittedChanges.value()) : ::json("UNKNOWN, BUILT WITHOUT DIRTY-CHANGES CAPTURE"); s["commitAuthorName"] = info.commitAuthorName; s["commitAuthorEmail"] = info.commitAuthorEmail; s["commitHash"] = info.commitHash;