diff --git a/examples_tests b/examples_tests index 85d44671d1..793182654b 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 85d44671d137669ce51d973c8cf76b38dad5a12a +Subproject commit 793182654b252e5bd7ab9fedb5bb12e2135303d3 diff --git a/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl b/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl index 5d5083af02..30639d6b7c 100644 --- a/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl @@ -202,9 +202,10 @@ struct SCookTorrance // fail if samples have invalid paths const scalar_type NdotL = hlsl::mix(scalar_type(2.0) * VdotH * localH.z - NdotV, localH.z * (VdotH * rcpEta.value[0] + LdotH) - NdotV * rcpEta.value[0], transmitted); + assert(!hlsl::isnan(NdotL)); // VNDF sampling guarantees that `VdotH` has same sign as `NdotV` // and `transmitted` controls the sign of `LdotH` relative to `VdotH` by construction (reflect -> same sign, or refract -> opposite sign) - if (hlsl::isnan(NdotL) || ComputeMicrofacetNormal::isTransmissionPath(NdotV, NdotL) != transmitted) + if (ComputeMicrofacetNormal::isTransmissionPath(NdotV, NdotL) != transmitted) { valid = false; return sample_type::createInvalid(); // should check if sample direction is invalid @@ -307,7 +308,16 @@ struct SCookTorrance partitionRandVariable.leftProb = reflectance; bool transmitted = partitionRandVariable(z, rcpChoiceProb); - const scalar_type LdotH = hlsl::mix(VdotH, ieee754::copySign(hlsl::sqrt(rcpEta.value2[0]*VdotH*VdotH + scalar_type(1.0) - rcpEta.value2[0]), -VdotH), transmitted); + scalar_type LdotH; + if (transmitted) + { + scalar_type det = rcpEta.value2[0]*VdotH*VdotH + scalar_type(1.0) - rcpEta.value2[0]; + if (det < scalar_type(0.0)) + return sample_type::createInvalid(); + LdotH = ieee754::copySign(hlsl::sqrt(det), -VdotH); + } + else + LdotH = VdotH; bool valid; sample_type s = __generate_common(interaction, localH, NdotV, VdotH, LdotH, transmitted, rcpEta, valid); if (valid) diff --git a/include/nbl/builtin/hlsl/bxdf/base/lambertian.hlsl b/include/nbl/builtin/hlsl/bxdf/base/lambertian.hlsl index a107921026..3f7b85875a 100644 --- a/include/nbl/builtin/hlsl/bxdf/base/lambertian.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/base/lambertian.hlsl @@ -37,16 +37,18 @@ struct SLambertianBase template > enable_if_t generate(NBL_CONST_REF_ARG(anisotropic_interaction_type) interaction, const vector2_type u) NBL_CONST_MEMBER_FUNC { + typename sampling::ProjectedHemisphere::cache_type cache; ray_dir_info_type L; - L.setDirection(sampling::ProjectedHemisphere::generate(u)); + L.setDirection(sampling::ProjectedHemisphere::generate(u, cache)); return sample_type::createFromTangentSpace(L, interaction.getFromTangentSpace()); } template > enable_if_t generate(NBL_CONST_REF_ARG(anisotropic_interaction_type) interaction, const vector3_type u) NBL_CONST_MEMBER_FUNC { + typename sampling::ProjectedSphere::cache_type cache; vector3_type _u = u; ray_dir_info_type L; - L.setDirection(sampling::ProjectedSphere::generate(_u)); + L.setDirection(sampling::ProjectedSphere::generate(_u, cache)); return sample_type::createFromTangentSpace(L, interaction.getFromTangentSpace()); } template > @@ -76,10 +78,10 @@ struct SLambertianBase { sampling::quotient_and_pdf qp; NBL_IF_CONSTEXPR (IsBSDF) - qp = sampling::ProjectedSphere::template quotient_and_pdf(_sample.getNdotL(_clamp)); + qp = sampling::ProjectedSphere::template quotientAndPdf(_sample.getNdotL(_clamp)); else - qp = sampling::ProjectedHemisphere::template quotient_and_pdf(_sample.getNdotL(_clamp)); - return quotient_pdf_type::create(qp.quotient[0], qp.pdf); + qp = sampling::ProjectedHemisphere::template quotientAndPdf(_sample.getNdotL(_clamp)); + return quotient_pdf_type::create(qp.quotient()[0], qp.pdf()); } quotient_pdf_type quotient_and_pdf(NBL_CONST_REF_ARG(sample_type) _sample, NBL_CONST_REF_ARG(anisotropic_interaction_type) interaction) NBL_CONST_MEMBER_FUNC { diff --git a/include/nbl/builtin/hlsl/bxdf/base/oren_nayar.hlsl b/include/nbl/builtin/hlsl/bxdf/base/oren_nayar.hlsl index d104842608..ab06e8d43a 100644 --- a/include/nbl/builtin/hlsl/bxdf/base/oren_nayar.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/base/oren_nayar.hlsl @@ -72,16 +72,18 @@ struct SOrenNayarBase template > enable_if_t generate(NBL_CONST_REF_ARG(anisotropic_interaction_type) interaction, const vector2_type u) NBL_CONST_MEMBER_FUNC { + typename sampling::ProjectedHemisphere::cache_type cache; ray_dir_info_type L; - L.setDirection(sampling::ProjectedHemisphere::generate(u)); + L.setDirection(sampling::ProjectedHemisphere::generate(u, cache)); return sample_type::createFromTangentSpace(L, interaction.getFromTangentSpace()); } template > enable_if_t generate(NBL_CONST_REF_ARG(anisotropic_interaction_type) interaction, const vector3_type u) NBL_CONST_MEMBER_FUNC { + typename sampling::ProjectedSphere::cache_type cache; vector3_type _u = u; ray_dir_info_type L; - L.setDirection(sampling::ProjectedSphere::generate(_u)); + L.setDirection(sampling::ProjectedSphere::generate(_u, cache)); return sample_type::createFromTangentSpace(L, interaction.getFromTangentSpace()); } template > diff --git a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl index 76ea58de6a..23e5ff327e 100644 --- a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl @@ -508,10 +508,8 @@ struct iridescent_helper using vector_type = T; // returns phi, the phase shift for each plane of polarization (p,s) - static void phase_shift(const vector_type ior1, const vector_type ior2, const vector_type iork2, const vector_type cosTheta, NBL_REF_ARG(vector_type) phiS, NBL_REF_ARG(vector_type) phiP) + static void phase_shift(const vector_type ior1, const vector_type ior2, const vector_type iork2, const vector_type cosTheta, const vector_type cosTheta2, const vector_type sinTheta2, NBL_REF_ARG(vector_type) phiS, NBL_REF_ARG(vector_type) phiP) { - const vector_type cosTheta2 = cosTheta * cosTheta; - const vector_type sinTheta2 = hlsl::promote(1.0) - cosTheta2; const vector_type ior1_2 = ior1*ior1; const vector_type ior2_2 = ior2*ior2; const vector_type iork2_2 = iork2*iork2; @@ -523,6 +521,7 @@ struct iridescent_helper const vector_type a = hlsl::sqrt(a2); const vector_type b = hlsl::sqrt(b2); + // TODO: very optimizable, especially atan2 usage phiS = hlsl::atan2(scalar_type(2.0) * ior1 * b * cosTheta, a2 + b2 - ior1_2*cosTheta2); const vector_type k2_plus_one = hlsl::promote(1.0) + iork2_2; phiP = hlsl::atan2(scalar_type(2.0) * ior1 * ior2_2 * cosTheta * (scalar_type(2.0) * iork2 * a - (hlsl::promote(1.0) - iork2_2) * b), @@ -549,37 +548,50 @@ struct iridescent_helper { const scalar_type cosTheta_1 = clampedCosTheta; vector_type R12p, R23p, R12s, R23s; - vector_type cosTheta_2; + vector_type cosTheta_2, cosTheta2_2; vector::Dimension> notTIR; { const vector_type scale = scalar_type(1.0)/eta12; - const vector_type cosTheta2_2 = hlsl::promote(1.0) - hlsl::promote(scalar_type(1.0)-cosTheta_1*cosTheta_1) * scale * scale; + cosTheta2_2 = hlsl::promote(1.0) - hlsl::promote(scalar_type(1.0)-cosTheta_1*cosTheta_1) * scale * scale; notTIR = cosTheta2_2 > hlsl::promote(0.0); - cosTheta_2 = hlsl::sqrt(hlsl::max(cosTheta2_2, hlsl::promote(0.0))); + cosTheta_2 = hlsl::mix(hlsl::promote(0.0), hlsl::sqrt(cosTheta2_2), notTIR); } - if (hlsl::any(notTIR)) + NBL_UNROLL for (uint32_t i = 0; i < vector_traits::Dimension; i++) { - Dielectric::__polarized(eta12 * eta12, hlsl::promote(cosTheta_1), R12p, R12s); - - // Reflected part by the base - // if kappa==0, base material is dielectric - NBL_IF_CONSTEXPR(SupportsTransmission) - Dielectric::__polarized(eta23 * eta23, cosTheta_2, R23p, R23s); + // Check for total internal reflection + if (notTIR[i]) + { + using monochrome_type = vector; + monochrome_type p12, s12, p23, s23; + Dielectric::__polarized(hlsl::promote(eta12[i] * eta12[i]), hlsl::promote(cosTheta_1), p12, s12); + + const monochrome_type eta23_2 = hlsl::promote(eta23[i] * eta23[i]); + + // Reflected part by the base + // if kappa==0, base material is dielectric + NBL_IF_CONSTEXPR(SupportsTransmission) + Dielectric::__polarized(eta23_2, hlsl::promote(cosTheta_2[i]), p23, s23); + else + { + const monochrome_type etaLen2 = eta23_2 + hlsl::promote(etak23[i] * etak23[i]); + Conductor::__polarized(hlsl::promote(eta23[i]), etaLen2, hlsl::promote(cosTheta_2[i]), p23, s23); + } + + R12p[i] = p12[0]; + R12s[i] = s12[0]; + R23p[i] = p23[0]; + R23s[i] = s23[0]; + } else { - vector_type etaLen2 = eta23 * eta23 + etak23 * etak23; - Conductor::__polarized(eta23, etaLen2, cosTheta_2, R23p, R23s); + R12s[i] = scalar_type(0.0); + R12p[i] = scalar_type(0.0); + R23s[i] = scalar_type(0.0); + R23p[i] = scalar_type(0.0); } } - // Check for total internal reflection - const vector_type notTIRFactor = vector_type(notTIR); // 0 when TIR, 1 otherwise - R12s = R12s * notTIRFactor; - R12p = R12p * notTIRFactor; - R23s = R23s * notTIRFactor; - R23p = R23p * notTIRFactor; - // Compute the transmission coefficients vector_type T121p = hlsl::promote(1.0) - R12p; vector_type T121s = hlsl::promote(1.0) - R12s; @@ -591,8 +603,18 @@ struct iridescent_helper vector_type I = hlsl::promote(0.0); // Evaluate the phase shift - phase_shift(ior1, ior2, hlsl::promote(0.0), hlsl::promote(cosTheta_1), phi21s, phi21p); - phase_shift(ior2, ior3, iork3, cosTheta_2, phi23s, phi23p); + { + const vector_type ct = hlsl::promote(cosTheta_1); + const vector_type ct2 = ct * ct; + const vector_type st2 = hlsl::promote(1.0) - ct2; + phase_shift(ior1, ior2, hlsl::promote(0.0), ct, ct2, st2, phi21s, phi21p); + } + { + const vector_type ct = hlsl::promote(cosTheta_2); + const vector_type ct2 = cosTheta2_2; + const vector_type st2 = hlsl::promote(1.0) - ct2; + phase_shift(ior2, ior3, iork3, ct, ct2, st2, phi23s, phi23p); + } phi21p = hlsl::promote(numbers::pi) - phi21p; phi21s = hlsl::promote(numbers::pi) - phi21s; @@ -613,6 +635,7 @@ struct iridescent_helper NBL_UNROLL for (int m=1; m<=2; ++m) { Cm *= r123p; + // TODO: common subexpression, maybe we can hoist it out somehow? Sm = hlsl::promote(2.0) * evalSensitivity(hlsl::promote(scalar_type(m))*D, hlsl::promote(scalar_type(m))*(phi23p+phi21p)); I += Cm*Sm; } @@ -641,6 +664,13 @@ struct iridescent_base using scalar_type = typename vector_traits::scalar_type; using vector_type = T; + template + T __call(const vector_type iork3, const vector_type etak23, const scalar_type clampedCosTheta) NBL_CONST_MEMBER_FUNC + { + return impl::iridescent_helper::template __call(D, ior1, ior2, ior3, iork3, + eta12, eta23, etak23, clampedCosTheta); + } + vector_type D; vector_type ior1; vector_type ior2; @@ -650,6 +680,14 @@ struct iridescent_base vector_type eta23; // thin-film -> base material IOR vector_type eta13; }; + + +// workaround due to DXC bug: github.com/microsoft/DirectXShaderCompiler/issues/5966 +template +T __iridescent_base__call_const(NBL_CONST_REF_ARG(iridescent_base) _this, const T iork3, const T etak23, const typename vector_traits::scalar_type clampedCosTheta) +{ + return _this.template __call(iork3, etak23, clampedCosTheta); +} } template @@ -691,8 +729,7 @@ struct Iridescent::template __call(base_type::D, base_type::ior1, base_type::ior2, base_type::ior3, base_type::iork3, - base_type::eta12, base_type::eta23, getEtak23(), clampedCosTheta); + return impl::__iridescent_base__call_const(NBL_DEREF_THIS, base_type::iork3, getEtak23(), clampedCosTheta); } vector_type getEtak23() NBL_CONST_MEMBER_FUNC @@ -739,8 +776,7 @@ struct Iridescent::template __call(base_type::D, base_type::ior1, base_type::ior2, base_type::ior3, getEtak23(), - base_type::eta12, base_type::eta23, getEtak23(), clampedCosTheta); + return impl::__iridescent_base__call_const(NBL_DEREF_THIS, getEtak23(), getEtak23(), clampedCosTheta); } scalar_type getRefractionOrientedEta() NBL_CONST_MEMBER_FUNC { return base_type::eta13[0]; } diff --git a/include/nbl/builtin/hlsl/concepts/accessors/anisotropically_sampled.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/anisotropically_sampled.hlsl index e6019d056c..76f2c2219a 100644 --- a/include/nbl/builtin/hlsl/concepts/accessors/anisotropically_sampled.hlsl +++ b/include/nbl/builtin/hlsl/concepts/accessors/anisotropically_sampled.hlsl @@ -18,25 +18,28 @@ namespace accessors { // declare concept #define NBL_CONCEPT_NAME AnisotropicallySampled -#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(int32_t) -#define NBL_CONCEPT_TPLT_PRM_NAMES (U)(Dims) +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(int32_t)(int32_t) +#define NBL_CONCEPT_TPLT_PRM_NAMES (U)(Dims)(Components) // not the greatest syntax but works #define NBL_CONCEPT_PARAM_0 (a,U) #define NBL_CONCEPT_PARAM_1 (uv,vector) #define NBL_CONCEPT_PARAM_2 (layer,uint16_t) #define NBL_CONCEPT_PARAM_3 (dU,vector) #define NBL_CONCEPT_PARAM_4 (dV,vector) +#define NBL_CONCEPT_PARAM_5 (outVal,vector) // start concept -NBL_CONCEPT_BEGIN(5) +NBL_CONCEPT_BEGIN(6) // need to be defined AFTER the cocnept begins #define a NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 #define uv NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 #define layer NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 #define dU NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_3 #define dV NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_4 +#define outVal NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_5 NBL_CONCEPT_END( - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(uv,layer,dU,dV)) , ::nbl::hlsl::is_same_v, float32_t4>)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(outVal,uv,layer,dU,dV)) , ::nbl::hlsl::is_same_v, void)) ); +#undef outVal #undef dV #undef dU #undef layer @@ -47,4 +50,4 @@ NBL_CONCEPT_END( } } } -#endif \ No newline at end of file +#endif diff --git a/include/nbl/builtin/hlsl/concepts/accessors/loadable_image.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/loadable_image.hlsl index 8c7251214d..fcc200ad95 100644 --- a/include/nbl/builtin/hlsl/concepts/accessors/loadable_image.hlsl +++ b/include/nbl/builtin/hlsl/concepts/accessors/loadable_image.hlsl @@ -18,28 +18,31 @@ namespace accessors { // concept `LoadableImage` translates to smth like this: -//template -//concept LoadableImage = requires(U a, vector uv, uint16_t layer) { -// ::nbl::hlsl::is_same_v().template get(uv,layer)), vector>; +//template +//concept LoadableImage = requires(U a, vector uv, uint16_t layer, vector outVal) { +// ::nbl::hlsl::is_same_v().template get(outVal,uv,layer)), void>; //}; // declare concept #define NBL_CONCEPT_NAME LoadableImage -#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename)(int32_t) -#define NBL_CONCEPT_TPLT_PRM_NAMES (U)(T)(Dims) +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename)(int32_t)(int32_t) +#define NBL_CONCEPT_TPLT_PRM_NAMES (U)(T)(Dims)(Components) // not the greatest syntax but works #define NBL_CONCEPT_PARAM_0 (a,U) #define NBL_CONCEPT_PARAM_1 (uv,vector) #define NBL_CONCEPT_PARAM_2 (layer,uint16_t) +#define NBL_CONCEPT_PARAM_3 (outVal,vector) // start concept -NBL_CONCEPT_BEGIN(3) +NBL_CONCEPT_BEGIN(4) // need to be defined AFTER the concept begins #define a NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 #define uv NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 #define layer NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +#define outVal NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_3 NBL_CONCEPT_END( - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(uv,layer)), ::nbl::hlsl::is_same_v, vector)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(outVal,uv,layer)), ::nbl::hlsl::is_same_v, void)) ); +#undef outVal #undef layer #undef uv #undef a @@ -47,23 +50,26 @@ NBL_CONCEPT_END( // declare concept #define NBL_CONCEPT_NAME MipmappedLoadableImage -#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename)(int32_t) -#define NBL_CONCEPT_TPLT_PRM_NAMES (U)(T)(Dims) +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename)(int32_t)(int32_t) +#define NBL_CONCEPT_TPLT_PRM_NAMES (U)(T)(Dims)(Components) // not the greatest syntax but works #define NBL_CONCEPT_PARAM_0 (a,U) #define NBL_CONCEPT_PARAM_1 (uv,vector) #define NBL_CONCEPT_PARAM_2 (layer,uint16_t) #define NBL_CONCEPT_PARAM_3 (level,uint16_t) +#define NBL_CONCEPT_PARAM_4 (outVal,vector) // start concept -NBL_CONCEPT_BEGIN(4) +NBL_CONCEPT_BEGIN(5) // need to be defined AFTER the cocnept begins #define a NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 #define uv NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 #define layer NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 #define level NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_3 +#define outVal NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_4 NBL_CONCEPT_END( - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(uv,layer,level)) , ::nbl::hlsl::is_same_v, vector)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(outVal,uv,layer,level)) , ::nbl::hlsl::is_same_v, void)) ); +#undef outVal #undef level #undef layer #undef uv @@ -73,4 +79,4 @@ NBL_CONCEPT_END( } } } -#endif \ No newline at end of file +#endif diff --git a/include/nbl/builtin/hlsl/concepts/accessors/mip_mapped.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/mip_mapped.hlsl index c49e66617b..e8b61d4029 100644 --- a/include/nbl/builtin/hlsl/concepts/accessors/mip_mapped.hlsl +++ b/include/nbl/builtin/hlsl/concepts/accessors/mip_mapped.hlsl @@ -25,16 +25,19 @@ namespace accessors #define NBL_CONCEPT_PARAM_1 (uv,vector) #define NBL_CONCEPT_PARAM_2 (layer,uint16_t) #define NBL_CONCEPT_PARAM_3 (level,float) +#define NBL_CONCEPT_PARAM_4 (outVal,float32_t4) // start concept -NBL_CONCEPT_BEGIN(4) +NBL_CONCEPT_BEGIN(5) // need to be defined AFTER the cocnept begins #define a NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 #define uv NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 #define layer NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 #define level NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_3 +#define outVal NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_4 NBL_CONCEPT_END( - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(uv,layer,level)) , ::nbl::hlsl::is_same_v, float32_t4>)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(outVal,uv,layer,level)) , ::nbl::hlsl::is_same_v, void)) ); +#undef outVal #undef level #undef layer #undef uv @@ -44,4 +47,4 @@ NBL_CONCEPT_END( } } } -#endif \ No newline at end of file +#endif diff --git a/include/nbl/builtin/hlsl/concepts/accessors/storable_image.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/storable_image.hlsl index 7eda9b9303..900352d993 100644 --- a/include/nbl/builtin/hlsl/concepts/accessors/storable_image.hlsl +++ b/include/nbl/builtin/hlsl/concepts/accessors/storable_image.hlsl @@ -18,13 +18,13 @@ namespace accessors { // declare concept #define NBL_CONCEPT_NAME StorableImage -#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename)(int32_t) -#define NBL_CONCEPT_TPLT_PRM_NAMES (U)(T)(Dims) +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename)(int32_t)(int32_t) +#define NBL_CONCEPT_TPLT_PRM_NAMES (U)(T)(Dims)(Components) // not the greatest syntax but works #define NBL_CONCEPT_PARAM_0 (a,U) #define NBL_CONCEPT_PARAM_1 (uv,vector) #define NBL_CONCEPT_PARAM_2 (layer,uint16_t) -#define NBL_CONCEPT_PARAM_3 (data,vector) +#define NBL_CONCEPT_PARAM_3 (data,vector) // start concept NBL_CONCEPT_BEGIN(4) // need to be defined AFTER the cocnept begins diff --git a/include/nbl/builtin/hlsl/math/functions.hlsl b/include/nbl/builtin/hlsl/math/functions.hlsl index a1c51d4e51..7930bb73aa 100644 --- a/include/nbl/builtin/hlsl/math/functions.hlsl +++ b/include/nbl/builtin/hlsl/math/functions.hlsl @@ -136,7 +136,7 @@ struct conditionalAbsOrMax_helper; const T condAbs = bit_cast(bit_cast(x) & (cond ? (numeric_limits::max >> 1) : numeric_limits::max)); - return max(condAbs, limit); + return nbl::hlsl::max(condAbs, limit); } }; @@ -152,11 +152,11 @@ struct conditionalAbsOrMax_helper(x); - const Uint32VectorWithDimensionOfT mask = cond ? _static_cast(numeric_limits::max >> 1) : _static_cast(numeric_limits::max); + const Uint32VectorWithDimensionOfT mask = cond ? promote(numeric_limits::max >> 1) : promote(numeric_limits::max); const Uint32VectorWithDimensionOfT condAbsAsUint = xAsUintVec & mask; T condAbs = bit_cast(condAbsAsUint); - return max(condAbs, limit); + return nbl::hlsl::max(condAbs, limit); } }; } diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl deleted file mode 100644 index 4a6cb5dfd3..0000000000 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h -#ifndef _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ -#define _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ - -#include "nbl/builtin/hlsl/cpp_compat.hlsl" - -namespace nbl -{ -namespace hlsl -{ -namespace math -{ - -namespace impl -{ - -template -struct MortonComponent; - -template -struct MortonComponent -{ - static T decode2d(T x) - { - x &= 0x55555555u; - x = (x ^ (x >> 1u)) & 0x33333333u; - x = (x ^ (x >> 2u)) & 0x0f0f0f0fu; - x = (x ^ (x >> 4u)) & 0x00ff00ffu; - return x; - } -}; - -template -struct MortonComponent -{ - static T decode2d(T x) - { - x &= 0x55555555u; - x = (x ^ (x >> 1u)) & 0x33333333u; - x = (x ^ (x >> 2u)) & 0x0f0f0f0fu; - x = (x ^ (x >> 4u)) & 0x00ff00ffu; - x = (x ^ (x >> 8u)) & 0x0000ffffu; - x = (x ^ (x >> 16u)); - return x; - } -}; - -} - -template -struct Morton -{ - using vector2_type = vector; - using component_type = impl::MortonComponent; - - static vector2_type decode2d(T x) - { - return vector2_type(component_type::decode2d(x), component_type::decode2d(x >> 1u)); - } -}; - -} -} -} - -#endif diff --git a/include/nbl/builtin/hlsl/path_tracing/basic_ray_gen.hlsl b/include/nbl/builtin/hlsl/path_tracing/basic_ray_gen.hlsl new file mode 100644 index 0000000000..898b3b0bd9 --- /dev/null +++ b/include/nbl/builtin/hlsl/path_tracing/basic_ray_gen.hlsl @@ -0,0 +1,49 @@ +#ifndef _NBL_BUILTIN_HLSL_PATH_TRACING_BASIC_RAYGEN_INCLUDED_ +#define _NBL_BUILTIN_HLSL_PATH_TRACING_BASIC_RAYGEN_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ +namespace path_tracing +{ + +template +struct BasicRayGenerator +{ + using this_t = BasicRayGenerator; + using ray_type = Ray; + using scalar_type = typename Ray::scalar_type; + using vector3_type = typename Ray::vector3_type; + + using vector2_type = vector; + using vector4_type = vector; + using matrix4x4_type = matrix; + + ray_type generate(const vector3_type randVec) + { + vector4_type tmp = NDC; + GaussianFilter filter = GaussianFilter::create(2.5, 1.5); // stochastic reconstruction filter + tmp.xy += pixOffsetParam * filter.sample(randVec.xy); + // for depth of field we could do another stochastic point-pick + tmp = nbl::hlsl::mul(invMVP, tmp); + + ray_type ray; + ray.init(camPos, hlsl::normalize(tmp.xyz / tmp.w - camPos)); + + return ray; + } + + vector2_type pixOffsetParam; + vector3_type camPos; + vector4_type NDC; + matrix4x4_type invMVP; +}; + +} +} +} + +#endif diff --git a/include/nbl/builtin/hlsl/path_tracing/concepts.hlsl b/include/nbl/builtin/hlsl/path_tracing/concepts.hlsl new file mode 100644 index 0000000000..25ca98772c --- /dev/null +++ b/include/nbl/builtin/hlsl/path_tracing/concepts.hlsl @@ -0,0 +1,350 @@ +// Copyright (C) 2018-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_BUILTIN_HLSL_PATH_TRACING_CONCEPTS_INCLUDED_ +#define _NBL_BUILTIN_HLSL_PATH_TRACING_CONCEPTS_INCLUDED_ + +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace path_tracing +{ +namespace concepts +{ + +#define NBL_CONCEPT_NAME RandGenerator +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (T) +#define NBL_CONCEPT_PARAM_0 (rand, T) +#define NBL_CONCEPT_PARAM_1 (sampleIx, uint32_t) +NBL_CONCEPT_BEGIN(2) +#define rand NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define sampleIx NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(T::rng_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::return_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((rand(sampleIx, sampleIx)), ::nbl::hlsl::is_same_v, typename T::return_type)) +); +#undef sampleIx +#undef rand +#include + +#define NBL_CONCEPT_NAME Ray +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (T) +#define NBL_CONCEPT_PARAM_0 (ray, T) +#define NBL_CONCEPT_PARAM_1 (v, typename T::vector3_type) +#define NBL_CONCEPT_PARAM_2 (interaction, bxdf::surface_interactions::SIsotropic, typename T::spectral_type>) +#define NBL_CONCEPT_PARAM_3 (scalar, typename T::scalar_type) +#define NBL_CONCEPT_PARAM_4 (color, typename T::spectral_type) +NBL_CONCEPT_BEGIN(5) +#define ray NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define v NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define interaction NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +#define scalar NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_3 +#define color NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_4 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(T::scalar_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::vector3_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::spectral_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((ray.init(v/*origin*/, v/*direction*/)), ::nbl::hlsl::is_same_v, void)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((ray.template setInteraction, typename T::spectral_type> >(interaction)), ::nbl::hlsl::is_same_v, void)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((ray.initPayload()), ::nbl::hlsl::is_same_v, void)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((ray.shouldDoMIS()), ::nbl::hlsl::is_same_v, bool)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((ray.foundEmissiveMIS(scalar)), ::nbl::hlsl::is_same_v, typename T::scalar_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((ray.addPayloadContribution(color)), ::nbl::hlsl::is_same_v, void)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((ray.getPayloadAccumulatiion()), ::nbl::hlsl::is_same_v, typename T::spectral_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((ray.updateThroughputAndMISWeights(color, scalar)), ::nbl::hlsl::is_same_v, void)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((ray.setT(scalar)), ::nbl::hlsl::is_same_v, void)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((ray.getT()), ::nbl::hlsl::is_same_v, typename T::scalar_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((ray.getPayloadThroughput()), ::nbl::hlsl::is_same_v, typename T::spectral_type)) +); +#undef color +#undef scalar +#undef interaction +#undef v +#undef ray +#include + +#define NBL_CONCEPT_NAME RayGenerator +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (T) +#define NBL_CONCEPT_PARAM_0 (raygen, T) +#define NBL_CONCEPT_PARAM_1 (randVec, typename T::vector3_type) +NBL_CONCEPT_BEGIN(2) +#define raygen NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define randVec NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(T::vector3_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::ray_type)) + ((NBL_CONCEPT_REQ_TYPE_ALIAS_CONCEPT)(Ray, typename T::ray_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((raygen.generate(randVec)), ::nbl::hlsl::is_same_v, typename T::ray_type)) +); +#undef randVec +#undef raygen +#include + +#define NBL_CONCEPT_NAME IntersectorClosestHit +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (T) +#define NBL_CONCEPT_PARAM_0 (closest_hit, T) +NBL_CONCEPT_BEGIN(1) +#define closest_hit NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(T::object_handle_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::vector3_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::interaction_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((closest_hit.foundHit()), ::nbl::hlsl::is_same_v, bool)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((closest_hit.getObjectID()), ::nbl::hlsl::is_same_v, typename T::object_handle_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((closest_hit.getPosition()), ::nbl::hlsl::is_same_v, typename T::vector3_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((closest_hit.getInteraction()), ::nbl::hlsl::is_same_v, typename T::interaction_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((closest_hit.getGeometricNormal()), ::nbl::hlsl::is_same_v, typename T::vector3_type)) +); +#undef closest_hit +#include + +#define NBL_CONCEPT_NAME Intersector +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (T) +#define NBL_CONCEPT_PARAM_0 (intersect, T) +#define NBL_CONCEPT_PARAM_1 (ray, typename T::ray_type) +#define NBL_CONCEPT_PARAM_2 (scene, typename T::scene_type) +#define NBL_CONCEPT_PARAM_3 (objectID, typename T::object_handle_type) +NBL_CONCEPT_BEGIN(4) +#define intersect NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define ray NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define scene NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +#define objectID NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_3 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(T::scalar_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::scene_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::ray_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::object_handle_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::closest_hit_type)) + ((NBL_CONCEPT_REQ_TYPE_ALIAS_CONCEPT)(IntersectorClosestHit, typename T::closest_hit_type)) + ((NBL_CONCEPT_REQ_TYPE_ALIAS_CONCEPT)(Ray, typename T::ray_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((intersect.traceClosestHit(scene, ray)), ::nbl::hlsl::is_same_v, typename T::closest_hit_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((intersect.traceShadowRay(scene, ray, objectID)), ::nbl::hlsl::is_same_v, typename T::scalar_type)) +); +#undef objectID +#undef scene +#undef ray +#undef intersect +#include + +#define NBL_CONCEPT_NAME BxdfNode +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (T) +#define NBL_CONCEPT_PARAM_0 (node, T) +NBL_CONCEPT_BEGIN(1) +#define intersect NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(T::scalar_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((intersect.getNEEProb()), ::nbl::hlsl::is_same_v, typename T::scalar_type)) +); +#undef intersect +#include + +#define NBL_CONCEPT_NAME MaterialSystem +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (T) +#define NBL_CONCEPT_PARAM_0 (matsys, T) +#define NBL_CONCEPT_PARAM_1 (_sample, typename T::sample_type) +#define NBL_CONCEPT_PARAM_2 (matid, typename T::material_id_type) +#define NBL_CONCEPT_PARAM_3 (aniso_inter, typename T::anisotropic_interaction_type) +#define NBL_CONCEPT_PARAM_4 (iso_inter, typename T::isotropic_interaction_type) +#define NBL_CONCEPT_PARAM_5 (cache_, typename T::cache_type) +#define NBL_CONCEPT_PARAM_6 (iso_cache, typename T::isocache_type) +#define NBL_CONCEPT_PARAM_7 (u, typename T::vector3_type) +#define NBL_CONCEPT_PARAM_8 (cie_y, typename T::measure_type) +NBL_CONCEPT_BEGIN(9) +#define matsys NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define _sample NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define matid NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +#define aniso_inter NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_3 +#define iso_inter NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_4 +#define cache_ NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_5 +#define iso_cache NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_6 +#define u NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_7 +#define cie_y NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_8 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(T::vector3_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::material_id_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::sample_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::quotient_pdf_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::measure_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::anisotropic_interaction_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::cache_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::bxdfnode_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::create_params_t)) + ((NBL_CONCEPT_REQ_TYPE_ALIAS_CONCEPT)(BxdfNode, typename T::bxdfnode_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((matsys.eval(matid, _sample, aniso_inter)), ::nbl::hlsl::is_same_v, typename T::measure_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((matsys.generate(matid, aniso_inter, u, cache_)), ::nbl::hlsl::is_same_v, typename T::sample_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((matsys.pdf(matid, _sample, aniso_inter)), ::nbl::hlsl::is_same_v, typename T::scalar_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((matsys.quotient_and_pdf(matid, _sample, aniso_inter, cache_)), ::nbl::hlsl::is_same_v, typename T::quotient_pdf_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((matsys.getBxDFNode(matid, aniso_inter)), ::nbl::hlsl::is_same_v, typename T::bxdfnode_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((matsys.hasEmission(matid)), ::nbl::hlsl::is_same_v, bool)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((matsys.setMonochromeEta(matid, cie_y)), ::nbl::hlsl::is_same_v, typename T::scalar_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((matsys.getEmission(matid, aniso_inter)), ::nbl::hlsl::is_same_v, typename T::measure_type)) +); +#undef cie_y +#undef u +#undef iso_cache +#undef cache_ +#undef iso_inter +#undef aniso_inter +#undef matid +#undef _sample +#undef matsys +#include + +namespace impl +{ +struct DummyMaterialSystem {}; +} + +#define NBL_CONCEPT_NAME NextEventEstimatorSampleQuotientReturn +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (T) +#define NBL_CONCEPT_PARAM_0 (sqr, T) +NBL_CONCEPT_BEGIN(1) +#define sqr NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(T::scalar_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::sample_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::quotient_pdf_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::object_handle_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((sqr.getSample()), ::nbl::hlsl::is_same_v, typename T::sample_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((sqr.getQuotientPdf()), ::nbl::hlsl::is_same_v, typename T::quotient_pdf_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((sqr.getT()), ::nbl::hlsl::is_same_v, typename T::scalar_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((sqr.getLightObjectID()), ::nbl::hlsl::is_same_v, typename T::object_handle_type)) +); +#undef sqr +#include + +#define NBL_CONCEPT_NAME NextEventEstimator +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (T) +#define NBL_CONCEPT_PARAM_0 (nee, T) +#define NBL_CONCEPT_PARAM_1 (ray, typename T::ray_type) +#define NBL_CONCEPT_PARAM_2 (id, typename T::light_id_type) +#define NBL_CONCEPT_PARAM_3 (v, typename T::vector3_type) +#define NBL_CONCEPT_PARAM_4 (matSys, impl::DummyMaterialSystem) +#define NBL_CONCEPT_PARAM_5 (interaction, typename T::interaction_type) +#define NBL_CONCEPT_PARAM_6 (depth, uint16_t) +#define NBL_CONCEPT_PARAM_7 (scene, typename T::scene_type) +NBL_CONCEPT_BEGIN(8) +#define nee NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define ray NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define id NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +#define v NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_3 +#define matSys NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_4 +#define interaction NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_5 +#define depth NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_6 +#define scene NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_7 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(T::scalar_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::vector3_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::scene_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::light_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::light_id_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::ray_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::spectral_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::sample_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::quotient_pdf_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::interaction_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::sample_quotient_return_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::tolerance_method_type)) + ((NBL_CONCEPT_REQ_TYPE_ALIAS_CONCEPT)(NextEventEstimatorSampleQuotientReturn, typename T::sample_quotient_return_type)) + ((NBL_CONCEPT_REQ_TYPE_ALIAS_CONCEPT)(Ray, typename T::ray_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((nee.deferred_pdf(scene, id, ray)), ::nbl::hlsl::is_same_v, typename T::scalar_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((nee.template generate_and_quotient_and_pdf(scene, matSys, v/*origin*/, interaction, v/*xi*/, depth)), ::nbl::hlsl::is_same_v, typename T::sample_quotient_return_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((nee.get_env_light_id()), ::nbl::hlsl::is_same_v, typename T::light_id_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((nee.get_environment_radiance(ray)), ::nbl::hlsl::is_same_v, typename T::spectral_type)) +); +#undef scene +#undef depth +#undef interaction +#undef matSys +#undef v +#undef id +#undef ray +#undef nee +#include + +#define NBL_CONCEPT_NAME Accumulator +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (T) +#define NBL_CONCEPT_PARAM_0 (acc, T) +#define NBL_CONCEPT_PARAM_1 (sampleCount, uint32_t) +#define NBL_CONCEPT_PARAM_2 (_sample, typename T::input_sample_type) +NBL_CONCEPT_BEGIN(3) +#define acc NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define sampleCount NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define _sample NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(T::input_sample_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((acc.addSample(sampleCount, _sample)), ::nbl::hlsl::is_same_v, void)) +); +#undef _sample +#undef sampleCount +#undef acc +#include + +#define NBL_CONCEPT_NAME MaterialLightID +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (T) +#define NBL_CONCEPT_PARAM_0 (id, T) +NBL_CONCEPT_BEGIN(1) +#define id NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(T::light_id_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::material_id_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((id.getLightID()), ::nbl::hlsl::is_same_v, typename T::light_id_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((id.getMaterialID()), ::nbl::hlsl::is_same_v, typename T::material_id_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((id.isLight()), ::nbl::hlsl::is_same_v, bool)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((id.canContinuePath()), ::nbl::hlsl::is_same_v, bool)) +); +#undef id +#include + +namespace impl +{ +struct DummyRay {}; +struct DummyIntersect {}; +} + +#define NBL_CONCEPT_NAME Scene +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (T) +#define NBL_CONCEPT_PARAM_0 (scene, T) +#define NBL_CONCEPT_PARAM_1 (id, typename T::object_handle_type) +#define NBL_CONCEPT_PARAM_2 (ray, impl::DummyRay) +NBL_CONCEPT_BEGIN(3) +#define scene NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define id NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define ray NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(T::vector3_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::object_handle_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::mat_light_id_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::interaction_type)) + ((NBL_CONCEPT_REQ_TYPE_ALIAS_CONCEPT)(MaterialLightID, typename T::mat_light_id_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((scene.getMatLightIDs(id)), ::nbl::hlsl::is_same_v, typename T::mat_light_id_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((scene.template getIntersection(id, ray)), ::nbl::hlsl::is_same_v, impl::DummyIntersect)) +); +#undef ray +#undef id +#undef scene +#include + +} +} +} +} + +#endif diff --git a/include/nbl/builtin/hlsl/path_tracing/default_accumulator.hlsl b/include/nbl/builtin/hlsl/path_tracing/default_accumulator.hlsl new file mode 100644 index 0000000000..e4174a7c12 --- /dev/null +++ b/include/nbl/builtin/hlsl/path_tracing/default_accumulator.hlsl @@ -0,0 +1,43 @@ +#ifndef _NBL_BUILTIN_HLSL_DEFAULT_ACCUMULATOR_INCLUDED_ +#define _NBL_BUILTIN_HLSL_DEFAULT_ACCUMULATOR_INCLUDED_ + +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace path_tracing +{ + +template) +struct DefaultAccumulator +{ + using input_sample_type = OutputTypeVec; + using output_storage_type = OutputTypeVec; + using this_t = DefaultAccumulator; + using scalar_type = typename vector_traits::scalar_type; + + static this_t create() + { + this_t retval; + retval.accumulation = promote(0.0f); + + return retval; + } + + void addSample(uint32_t sampleCount, input_sample_type _sample) + { + scalar_type rcpSampleSize = scalar_type(1.0) / _static_cast(sampleCount); + accumulation += (_sample - accumulation) * rcpSampleSize; + } + + output_storage_type accumulation; +}; + +} +} +} + +#endif diff --git a/include/nbl/builtin/hlsl/path_tracing/gaussian_filter.hlsl b/include/nbl/builtin/hlsl/path_tracing/gaussian_filter.hlsl new file mode 100644 index 0000000000..9667275f4e --- /dev/null +++ b/include/nbl/builtin/hlsl/path_tracing/gaussian_filter.hlsl @@ -0,0 +1,46 @@ +#ifndef _NBL_BUILTIN_HLSL_PATH_TRACING_GAUSSIAN_FILTER_INCLUDED_ +#define _NBL_BUILTIN_HLSL_PATH_TRACING_GAUSSIAN_FILTER_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ +namespace path_tracing +{ + +template) +struct GaussianFilter +{ + using this_t = GaussianFilter; + using scalar_type = T; + + using vector2_type = vector; + + static this_t create(const scalar_type gaussianFilterCutoff, const scalar_type stddev) + { + this_t retval; + retval.truncation = hlsl::exp(-0.5 * gaussianFilterCutoff * gaussianFilterCutoff); + retval.boxMuller.stddev = stddev; + return retval; + } + + vector2_type sample(const vector2_type randVec) + { + vector2_type remappedRand = randVec; + remappedRand.x *= 1.0 - truncation; + remappedRand.x += truncation; + typename nbl::hlsl::sampling::BoxMullerTransform::cache_type cache; + return boxMuller.generate(remappedRand, cache); + } + + scalar_type truncation; + nbl::hlsl::sampling::BoxMullerTransform boxMuller; +}; + +} +} +} + +#endif diff --git a/include/nbl/builtin/hlsl/path_tracing/unidirectional.hlsl b/include/nbl/builtin/hlsl/path_tracing/unidirectional.hlsl new file mode 100644 index 0000000000..98a81738cb --- /dev/null +++ b/include/nbl/builtin/hlsl/path_tracing/unidirectional.hlsl @@ -0,0 +1,220 @@ +// Copyright (C) 2018-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_BUILTIN_HLSL_PATH_TRACING_UNIDIRECTIONAL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_PATH_TRACING_UNIDIRECTIONAL_INCLUDED_ + +#include +#include +#include +#include +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace path_tracing +{ + +template && concepts::Ray && + concepts::Intersector && concepts::MaterialSystem && + concepts::NextEventEstimator && concepts::Accumulator && + concepts::Scene) +struct Unidirectional +{ + using this_t = Unidirectional; + using randgen_type = RandGen; + using intersector_type = Intersector; + using material_system_type = MaterialSystem; + using nee_type = NextEventEstimator; + using scene_type = Scene; + + using scalar_type = typename MaterialSystem::scalar_type; + using vector3_type = vector; + using monochrome_type = vector; + using measure_type = typename MaterialSystem::measure_type; + using spectral_type = typename NextEventEstimator::spectral_type; + using sample_type = typename NextEventEstimator::sample_type; + using ray_dir_info_type = typename sample_type::ray_dir_info_type; + using ray_type = Ray; + using object_handle_type = typename Intersector::object_handle_type; + using closest_hit_type = typename Intersector::closest_hit_type; + using bxdfnode_type = typename MaterialSystem::bxdfnode_type; + using anisotropic_interaction_type = typename MaterialSystem::anisotropic_interaction_type; + using cache_type = typename MaterialSystem::cache_type; + using quotient_pdf_type = typename NextEventEstimator::quotient_pdf_type; + using tolerance_method_type = typename NextEventEstimator::tolerance_method_type; + + scalar_type getLuma(NBL_CONST_REF_ARG(vector3_type) col) + { + return hlsl::dot(spectralTypeToLumaCoeffs, col); + } + + bool closestHitProgram(uint16_t depth, uint32_t _sample, NBL_REF_ARG(ray_type) ray, NBL_CONST_REF_ARG(closest_hit_type) intersectData) + { + anisotropic_interaction_type interaction = intersectData.getInteraction(); + + // emissive + typename scene_type::mat_light_id_type matLightID = scene.getMatLightIDs(intersectData.getObjectID()); + const typename material_system_type::material_id_type matID = matLightID.getMaterialID(); + if (materialSystem.hasEmission(matID)) + { + measure_type emissive = materialSystem.getEmission(matID, interaction); + + const typename nee_type::light_id_type lightID = matLightID.getLightID(); + if (ray.shouldDoMIS() && matLightID.isLight()) + { + emissive *= ray.getPayloadThroughput(); + const scalar_type pdf = nee.deferred_pdf(scene, lightID, ray); + assert(!hlsl::isinf(pdf)); + emissive *= ray.foundEmissiveMIS(pdf * pdf); + } + ray.addPayloadContribution(emissive); + } + + if (!matLightID.canContinuePath()) + return false; + + bxdfnode_type bxdf = materialSystem.getBxDFNode(matID, interaction); + + vector3_type eps0 = randGen(depth * 2u, _sample); + vector3_type eps1 = randGen(depth * 2u + 1u, _sample); + + const vector3_type intersectP = intersectData.getPosition(); + + // sample lights + const scalar_type neeProbability = bxdf.getNEEProb(); + scalar_type rcpChoiceProb; + sampling::PartitionRandVariable partitionRandVariable; + partitionRandVariable.leftProb = neeProbability; + assert(neeProbability >= 0.0 && neeProbability <= 1.0); + if (!partitionRandVariable(eps0.z, rcpChoiceProb)) + { + typename nee_type::sample_quotient_return_type ret = nee.template generate_and_quotient_and_pdf( + scene, materialSystem, intersectP, interaction, + eps0, depth + ); + scalar_type t = ret.getT(); + sample_type nee_sample = ret.getSample(); + quotient_pdf_type neeContrib = ret.getQuotientPdf(); + + // While NEE or other generators are not supposed to pick up Delta lobes by accident, we need the MIS weights to add up to 1 for the non-delta lobes. + // So we need to weigh the Delta lobes as if the MIS weight is always 1, but other areas regularly. + // Meaning that eval's pdf should equal quotient's pdf , this way even the diffuse contributions coming from within a specular lobe get a MIS weight near 0 for NEE. + // This stops a discrepancy in MIS weights and NEE mistakenly trying to add non-delta lobe contributions with a MIS weight > 0 and creating energy from thin air. + if (neeContrib.pdf() > scalar_type(0.0)) + { + // TODO: we'll need an `eval_and_mis_weight` and `quotient_and_mis_weight` + const scalar_type bsdf_pdf = materialSystem.pdf(matID, nee_sample, interaction); + neeContrib._quotient *= materialSystem.eval(matID, nee_sample, interaction) * rcpChoiceProb; + if (neeContrib.pdf() < bit_cast(numeric_limits::infinity)) + { + const scalar_type otherGenOverLightAndChoice = bsdf_pdf * rcpChoiceProb / neeContrib.pdf(); + neeContrib._quotient /= 1.f + otherGenOverLightAndChoice * otherGenOverLightAndChoice; // balance heuristic + } + + const vector3_type origin = intersectP; + const vector3_type direction = nee_sample.getL().getDirection(); + ray_type nee_ray; + nee_ray.init(origin, direction); + nee_ray.template setInteraction(interaction); + nee_ray.setT(t); + tolerance_method_type::template adjust(nee_ray, intersectData.getGeometricNormal(), depth); + if (getLuma(neeContrib.quotient()) > lumaContributionThreshold) + ray.addPayloadContribution(neeContrib.quotient() * intersector_type::traceShadowRay(scene, nee_ray, ret.getLightObjectID())); + } + } + + // sample BSDF + scalar_type bxdfPdf; + vector3_type bxdfSample; + vector3_type throughput = ray.getPayloadThroughput(); + { + cache_type _cache; + sample_type bsdf_sample = materialSystem.generate(matID, interaction, eps1, _cache); + + if (!bsdf_sample.isValid()) + return false; + + // the value of the bsdf divided by the probability of the sample being generated + quotient_pdf_type bsdf_quotient_pdf = materialSystem.quotient_and_pdf(matID, bsdf_sample, interaction, _cache); + throughput *= bsdf_quotient_pdf.quotient(); + bxdfPdf = bsdf_quotient_pdf.pdf(); + bxdfSample = bsdf_sample.getL().getDirection(); + } + + // additional threshold + const float lumaThroughputThreshold = lumaContributionThreshold; + if (bxdfPdf > bxdfPdfThreshold && getLuma(throughput) > lumaThroughputThreshold) + { + scalar_type otherTechniqueHeuristic = neeProbability / bxdfPdf; // numerically stable, don't touch + assert(!hlsl::isinf(otherTechniqueHeuristic)); + ray.updateThroughputAndMISWeights(throughput, otherTechniqueHeuristic * otherTechniqueHeuristic); + + // trace new ray + vector3_type origin = intersectP; + vector3_type direction = bxdfSample; + ray.init(origin, direction); + ray.template setInteraction(interaction); + tolerance_method_type::template adjust(ray, intersectData.getGeometricNormal(), depth); + + return true; + } + + return false; + } + + void missProgram(NBL_REF_ARG(ray_type) ray) + { + measure_type finalContribution = nee.get_environment_radiance(ray); + typename nee_type::light_id_type env_light_id = nee.get_env_light_id(); + const scalar_type pdf = nee.deferred_pdf(scene, env_light_id, ray); + finalContribution *= ray.getPayloadThroughput(); + if (pdf > scalar_type(0.0)) + finalContribution *= ray.foundEmissiveMIS(pdf * pdf); + ray.addPayloadContribution(finalContribution); + } + + // Li + void sampleMeasure(NBL_REF_ARG(ray_type) ray, uint32_t sampleIndex, uint32_t maxDepth, NBL_REF_ARG(Accumulator) accumulator) + { + // bounces + // note do 1-based indexing because we expect first dimension was consumed to generate the ray + bool continuePath = true; + for (uint16_t d = 1; (d <= maxDepth) && continuePath; d++) + { + ray.setT(numeric_limits::max); + closest_hit_type intersection = intersector_type::traceClosestHit(scene, ray); + + continuePath = intersection.foundHit(); + if (continuePath) + continuePath = closestHitProgram(d, sampleIndex, ray, intersection); + } + if (!continuePath) + missProgram(ray); + + const uint32_t sampleCount = sampleIndex + 1; + accumulator.addSample(sampleCount, ray.getPayloadAccumulatiion()); + + // TODO: russian roulette early exit? + } + + randgen_type randGen; + material_system_type materialSystem; + nee_type nee; + scene_type scene; + + scalar_type bxdfPdfThreshold; + scalar_type lumaContributionThreshold; // OETF smallest perceptible value + spectral_type spectralTypeToLumaCoeffs; +}; + +} +} +} + +#endif diff --git a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl index 9413bcee98..a52c9302d3 100644 --- a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl @@ -3,7 +3,6 @@ #include #include #include -#include #include namespace nbl @@ -13,76 +12,81 @@ namespace hlsl namespace rwmc { -template) -struct CascadeAccumulator +template && concepts::UnsignedIntegralScalar) +struct DefaultCascades { - struct CascadeEntry + using layer_type = CascadeLayerType; + using sample_count_type = SampleCountType; + NBL_CONSTEXPR_STATIC_INLINE uint32_t CascadeCount = CascadeCountValue; + + sample_count_type cascadeSampleCounter[CascadeCount]; + CascadeLayerType data[CascadeCount]; + + void clear(uint32_t cascadeIx) { - uint32_t cascadeSampleCounter[CascadeCount]; - CascadeLayerType data[CascadeCount]; + cascadeSampleCounter[cascadeIx] = sample_count_type(0u); + data[cascadeIx] = promote(0.0f); + } - void addSampleIntoCascadeEntry(CascadeLayerType _sample, uint32_t lowerCascadeIndex, float lowerCascadeLevelWeight, float higherCascadeLevelWeight, uint32_t sampleCount) + void addSampleIntoCascadeEntry(CascadeLayerType _sample, uint16_t lowerCascadeIndex, SSplattingParameters::scalar_t lowerCascadeLevelWeight, SSplattingParameters::scalar_t higherCascadeLevelWeight, uint32_t sampleCount) + { + const SSplattingParameters::scalar_t reciprocalSampleCount = SSplattingParameters::scalar_t(1.0f) / SSplattingParameters::scalar_t(sampleCount); + + sample_count_type lowerCascadeSampleCount = cascadeSampleCounter[lowerCascadeIndex]; + data[lowerCascadeIndex] += (_sample * lowerCascadeLevelWeight - (sampleCount - lowerCascadeSampleCount) * data[lowerCascadeIndex]) * reciprocalSampleCount; + cascadeSampleCounter[lowerCascadeIndex] = sample_count_type(sampleCount); + + uint16_t higherCascadeIndex = lowerCascadeIndex + uint16_t(1u); + if (higherCascadeIndex < CascadeCount) { - const float reciprocalSampleCount = 1.0f / float(sampleCount); - - uint32_t lowerCascadeSampleCount = cascadeSampleCounter[lowerCascadeIndex]; - data[lowerCascadeIndex] += (_sample * lowerCascadeLevelWeight - (sampleCount - lowerCascadeSampleCount) * data[lowerCascadeIndex]) * reciprocalSampleCount; - cascadeSampleCounter[lowerCascadeIndex] = sampleCount; - - uint32_t higherCascadeIndex = lowerCascadeIndex + 1u; - if (higherCascadeIndex < CascadeCount) - { - uint32_t higherCascadeSampleCount = cascadeSampleCounter[higherCascadeIndex]; - data[higherCascadeIndex] += (_sample * higherCascadeLevelWeight - (sampleCount - higherCascadeSampleCount) * data[higherCascadeIndex]) * reciprocalSampleCount; - cascadeSampleCounter[higherCascadeIndex] = sampleCount; - } + sample_count_type higherCascadeSampleCount = cascadeSampleCounter[higherCascadeIndex]; + data[higherCascadeIndex] += (_sample * higherCascadeLevelWeight - (sampleCount - higherCascadeSampleCount) * data[higherCascadeIndex]) * reciprocalSampleCount; + cascadeSampleCounter[higherCascadeIndex] = sample_count_type(sampleCount); } - }; - - using cascade_layer_scalar_type = typename vector_traits::scalar_type; - using this_t = CascadeAccumulator; - using input_sample_type = CascadeLayerType; - using output_storage_type = CascadeEntry; - using initialization_data = SplattingParameters; - output_storage_type accumulation; + } +}; + +template +struct CascadeAccumulator +{ + using scalar_t = typename SSplattingParameters::scalar_t; + using input_sample_type = typename CascadesType::layer_type; + using this_t = CascadeAccumulator; + using cascades_type = CascadesType; + NBL_CONSTEXPR_STATIC_INLINE uint32_t CascadeCount = cascades_type::CascadeCount; + NBL_CONSTEXPR_STATIC_INLINE scalar_t LastCascade = scalar_t(CascadeCount - 1u); + cascades_type accumulation; - SplattingParameters splattingParameters; + SSplattingParameters splattingParameters; - static this_t create(NBL_CONST_REF_ARG(SplattingParameters) settings) + static this_t create(NBL_CONST_REF_ARG(SPackedSplattingParameters) settings) { this_t retval; - for (int i = 0; i < CascadeCount; ++i) - { - retval.accumulation.data[i] = promote(0.0f); - retval.accumulation.cascadeSampleCounter[i] = 0u; - } - retval.splattingParameters = settings; + for (uint32_t i = 0u; i < CascadeCount; ++i) + retval.accumulation.clear(i); + retval.splattingParameters = settings.unpack(); return retval; } - - cascade_layer_scalar_type getLuma(NBL_CONST_REF_ARG(CascadeLayerType) col) - { - return hlsl::dot(hlsl::transpose(colorspace::scRGBtoXYZ)[1], col); - } // most of this code is stolen from https://cg.ivd.kit.edu/publications/2018/rwmc/tool/split.cpp void addSample(uint32_t sampleCount, input_sample_type _sample) { - const cascade_layer_scalar_type luma = getLuma(_sample); - const cascade_layer_scalar_type log2Luma = log2(luma); - const cascade_layer_scalar_type cascade = log2Luma * splattingParameters.rcpLog2Base - splattingParameters.baseRootOfStart; - const cascade_layer_scalar_type clampedCascade = clamp(cascade, 0, CascadeCount - 1); + const scalar_t luma = splattingParameters.calcLuma(_sample); + const scalar_t log2Luma = log2(luma); + const scalar_t cascade = log2Luma * splattingParameters.RcpLog2Base - splattingParameters.Log2BaseRootOfStart; + const scalar_t clampedCascade = clamp(cascade, scalar_t(0), LastCascade); + const scalar_t clampedCascadeFloor = floor(clampedCascade); // c<=0 -> 0, c>=Count-1 -> Count-1 - uint32_t lowerCascadeIndex = floor(cascade); + uint16_t lowerCascadeIndex = uint16_t(clampedCascadeFloor); // 0 whenever clamped or `cascade` is integer (when `clampedCascade` is integer) - cascade_layer_scalar_type higherCascadeWeight = clampedCascade - floor(clampedCascade); + scalar_t higherCascadeWeight = clampedCascade - clampedCascadeFloor; // never 0 thanks to magic of `1-fract(x)` - cascade_layer_scalar_type lowerCascadeWeight = cascade_layer_scalar_type(1) - higherCascadeWeight; + scalar_t lowerCascadeWeight = scalar_t(1) - higherCascadeWeight; // handle super bright sample case - if (cascade > CascadeCount - 1) - lowerCascadeWeight = splattingParameters.lastCascadeLuma / luma; + if (cascade > LastCascade) + lowerCascadeWeight = exp2(splattingParameters.BrightSampleLumaBias - log2Luma); accumulation.addSampleIntoCascadeEntry(_sample, lowerCascadeIndex, lowerCascadeWeight, higherCascadeWeight, sampleCount); } @@ -94,4 +98,4 @@ struct CascadeAccumulator } } -#endif \ No newline at end of file +#endif diff --git a/include/nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl b/include/nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl index 6ef687c506..afd5bf0573 100644 --- a/include/nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl @@ -1,45 +1,61 @@ -#ifndef _NBL_BUILTIN_HLSL_RWMC_RESOLVE_PARAMETERS_HLSL_INCLUDED_ -#define _NBL_BUILTIN_HLSL_RWMC_RESOLVE_PARAMETERS_HLSL_INCLUDED_ - -#include "nbl/builtin/hlsl/cpp_compat.hlsl" - -namespace nbl -{ -namespace hlsl -{ -namespace rwmc -{ - -struct ResolveParameters -{ - uint32_t lastCascadeIndex; - float initialEmin; // a minimum image brightness that we always consider reliable - float reciprocalBase; - float reciprocalN; - float reciprocalKappa; - float colorReliabilityFactor; - float NOverKappa; -}; - -inline ResolveParameters computeResolveParameters(float base, uint32_t sampleCount, float minReliableLuma, float kappa, uint32_t cascadeSize) -{ - ResolveParameters retval; - retval.lastCascadeIndex = cascadeSize - 1u; - retval.initialEmin = minReliableLuma; - retval.reciprocalBase = 1.f / base; - const float N = float(sampleCount); - retval.reciprocalN = 1.f / N; - retval.reciprocalKappa = 1.f / kappa; - // if not interested in exact expected value estimation (kappa!=1.f), can usually accept a bit more variance relative to the image brightness we already have - // allow up to ~ more energy in one sample to lessen bias in some cases - retval.colorReliabilityFactor = base + (1.f - base) * retval.reciprocalKappa; - retval.NOverKappa = N * retval.reciprocalKappa; - - return retval; -} - -} -} -} - -#endif \ No newline at end of file +#ifndef _NBL_BUILTIN_HLSL_RWMC_RESOLVE_PARAMETERS_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_RWMC_RESOLVE_PARAMETERS_HLSL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include + +namespace nbl +{ +namespace hlsl +{ +namespace rwmc +{ + +struct SResolveParameters +{ + using scalar_t = float32_t; + + struct SCreateParams + { + scalar_t minReliableLuma; + scalar_t kappa; + scalar_t start; + scalar_t base; + uint32_t sampleCount; + }; + + static SResolveParameters create(SCreateParams params) + { + SResolveParameters retval; + retval.initialEmin = params.minReliableLuma; + retval.reciprocalBase = 1.f / params.base; + const scalar_t N = scalar_t(params.sampleCount); + retval.reciprocalN = 1.f / N; + retval.reciprocalKappa = 1.f / params.kappa; + // if not interested in exact expected value estimation (kappa!=1.f), can usually accept a bit more variance relative to the image brightness we already have + // allow up to ~ more energy in one sample to lessen bias in some cases + retval.colorReliabilityFactor = params.base + (1.f - params.base) * retval.reciprocalKappa; + retval.NOverKappa = N * retval.reciprocalKappa; + + return retval; + } + + template + scalar_t calcLuma(NBL_CONST_REF_ARG(SampleType) col) + { + return hlsl::dot(hlsl::transpose(Colorspace::ToXYZ())[1], col); + } + + scalar_t initialEmin; // a minimum image brightness that we always consider reliable + scalar_t reciprocalBase; + scalar_t reciprocalN; + scalar_t reciprocalKappa; + scalar_t colorReliabilityFactor; + scalar_t NOverKappa; +}; + +} +} +} + +#endif diff --git a/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl b/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl index a3a3520415..736d2e0dfe 100644 --- a/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl @@ -3,6 +3,7 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" #include "nbl/builtin/hlsl/tgmath.hlsl" +#include namespace nbl { @@ -11,28 +12,54 @@ namespace hlsl namespace rwmc { -struct SplattingParameters +struct SSplattingParameters { - using scalar_t = float; + using scalar_t = float32_t; + scalar_t RcpLog2Base; + scalar_t Log2BaseRootOfStart; + scalar_t BrightSampleLumaBias; - static SplattingParameters create(const scalar_t base, const scalar_t start, const uint32_t cascadeCount) + template + scalar_t calcLuma(NBL_CONST_REF_ARG(CascadeLayerType) col) { - SplattingParameters retval; - const scalar_t log2Base = hlsl::log2(base); - const scalar_t log2Start = hlsl::log2(start); - retval.lastCascadeLuma = hlsl::exp2(log2Start + log2Base * (cascadeCount - 1)); - retval.rcpLog2Base = scalar_t(1.0) / log2Base; - retval.baseRootOfStart = log2Start * retval.rcpLog2Base; + return hlsl::dot(hlsl::transpose(Colorspace::ToXYZ())[1], col); + } +}; + +struct SPackedSplattingParameters +{ + // float16_t rcpLog2Base; 0 + // float16_t log2BaseRootOfStart; 1 + // pack as Half2x16 + int32_t PackedRcpLog2BaseAndLog2BaseRoot; + float32_t BrightSampleLumaBias; + + static SPackedSplattingParameters create(float32_t base, float32_t start, uint32_t cascadeCount) + { + const float32_t rcpLog2Base = 1.0f / hlsl::log2(base); + const float32_t log2BaseRootOfStart = hlsl::log2(start) * rcpLog2Base; + const float32_t brightSampleLumaBias = (log2BaseRootOfStart + float32_t(cascadeCount - 1u)) / rcpLog2Base; + float32_t2 packLogs = float32_t2(rcpLog2Base, log2BaseRootOfStart); + + SPackedSplattingParameters retval; + retval.PackedRcpLog2BaseAndLog2BaseRoot = hlsl::packHalf2x16(packLogs); + retval.BrightSampleLumaBias = brightSampleLumaBias; return retval; } - scalar_t lastCascadeLuma; - scalar_t baseRootOfStart; - scalar_t rcpLog2Base; + SSplattingParameters unpack() + { + SSplattingParameters retval; + const float32_t2 unpackedRcpLog2BaseAndLog2BaseRoot = hlsl::unpackHalf2x16(PackedRcpLog2BaseAndLog2BaseRoot); + retval.RcpLog2Base = unpackedRcpLog2BaseAndLog2BaseRoot[0]; + retval.Log2BaseRootOfStart = unpackedRcpLog2BaseAndLog2BaseRoot[1]; + retval.BrightSampleLumaBias = BrightSampleLumaBias; + return retval; + } }; } } } -#endif \ No newline at end of file +#endif diff --git a/include/nbl/builtin/hlsl/rwmc/resolve.hlsl b/include/nbl/builtin/hlsl/rwmc/resolve.hlsl index 906cad512b..348e26bdf3 100644 --- a/include/nbl/builtin/hlsl/rwmc/resolve.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/resolve.hlsl @@ -1,163 +1,189 @@ -#ifndef _NBL_BUILTIN_HLSL_RWMC_RESOLVE_HLSL_INCLUDED_ -#define _NBL_BUILTIN_HLSL_RWMC_RESOLVE_HLSL_INCLUDED_ - -#include "nbl/builtin/hlsl/cpp_compat.hlsl" -#include -#include -#include -#include -#include - -namespace nbl -{ -namespace hlsl -{ -namespace rwmc -{ - // declare concept -#define NBL_CONCEPT_NAME ResolveAccessorBase -#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename)(int32_t) -#define NBL_CONCEPT_TPLT_PRM_NAMES (T)(VectorScalarType)(Dims) -// not the greatest syntax but works -#define NBL_CONCEPT_PARAM_0 (a,T) -#define NBL_CONCEPT_PARAM_1 (scalar,VectorScalarType) -// start concept - NBL_CONCEPT_BEGIN(2) -// need to be defined AFTER the concept begins -#define a NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 -#define scalar NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 -NBL_CONCEPT_END( - ((NBL_CONCEPT_REQ_EXPR)((a.calcLuma(vector(scalar, scalar, scalar))))) -); -#undef a -#undef scalar -#include - -/* ResolveAccessor is required to: -* - satisfy `LoadableImage` concept requirements -* - implement function called `calcLuma` which calculates luma from a 3 component pixel value -*/ - -template -NBL_BOOL_CONCEPT ResolveAccessor = ResolveAccessorBase && concepts::accessors::LoadableImage; - -template && ResolveAccessor) -struct Resolver -{ - using output_type = OutputColorTypeVec; - using scalar_t = typename vector_traits::scalar_type; - - struct CascadeSample - { - float32_t3 centerValue; - float normalizedCenterLuma; - float normalizedNeighbourhoodAverageLuma; - }; - - static Resolver create(NBL_REF_ARG(ResolveParameters) resolveParameters) - { - Resolver retval; - retval.params = resolveParameters; - - return retval; - } - - output_type operator()(NBL_REF_ARG(CascadeAccessor) acc, const int16_t2 coord) - { - using scalar_t = typename vector_traits::scalar_type; - - scalar_t reciprocalBaseI = 1.f; - CascadeSample curr = __sampleCascade(acc, coord, 0u, reciprocalBaseI); - - output_type accumulation = output_type(0.0f, 0.0f, 0.0f); - scalar_t Emin = params.initialEmin; - - scalar_t prevNormalizedCenterLuma, prevNormalizedNeighbourhoodAverageLuma; - for (int16_t i = 0u; i <= params.lastCascadeIndex; i++) - { - const bool notFirstCascade = i != 0; - const bool notLastCascade = i != params.lastCascadeIndex; - - CascadeSample next; - if (notLastCascade) - { - reciprocalBaseI *= params.reciprocalBase; - next = __sampleCascade(acc, coord, int16_t(i + 1), reciprocalBaseI); - } - - scalar_t reliability = 1.f; - // sample counting-based reliability estimation - if (params.reciprocalKappa <= 1.f) - { - scalar_t localReliability = curr.normalizedCenterLuma; - // reliability in 3x3 pixel block (see robustness) - scalar_t globalReliability = curr.normalizedNeighbourhoodAverageLuma; - if (notFirstCascade) - { - localReliability += prevNormalizedCenterLuma; - globalReliability += prevNormalizedNeighbourhoodAverageLuma; - } - if (notLastCascade) - { - localReliability += next.normalizedCenterLuma; - globalReliability += next.normalizedNeighbourhoodAverageLuma; - } - // check if above minimum sampling threshold (avg 9 sample occurences in 3x3 neighbourhood), then use per-pixel reliability (NOTE: tertiary op is in reverse) - reliability = globalReliability < params.reciprocalN ? globalReliability : localReliability; - { - const scalar_t accumLuma = acc.calcLuma(accumulation); - if (accumLuma > Emin) - Emin = accumLuma; - - const scalar_t colorReliability = Emin * reciprocalBaseI * params.colorReliabilityFactor; - - reliability += colorReliability; - reliability *= params.NOverKappa; - reliability -= params.reciprocalKappa; - reliability = clamp(reliability * 0.5f, 0.f, 1.f); - } - } - accumulation += curr.centerValue * reliability; - - prevNormalizedCenterLuma = curr.normalizedCenterLuma; - prevNormalizedNeighbourhoodAverageLuma = curr.normalizedNeighbourhoodAverageLuma; - curr = next; - } - - return accumulation; - } - - ResolveParameters params; - - // pseudo private stuff: - - CascadeSample __sampleCascade(NBL_REF_ARG(CascadeAccessor) acc, int16_t2 coord, uint16_t cascadeIndex, scalar_t reciprocalBaseI) - { - output_type neighbourhood[9]; - neighbourhood[0] = acc.template get(coord + int16_t2(-1, -1), cascadeIndex).xyz; - neighbourhood[1] = acc.template get(coord + int16_t2(0, -1), cascadeIndex).xyz; - neighbourhood[2] = acc.template get(coord + int16_t2(1, -1), cascadeIndex).xyz; - neighbourhood[3] = acc.template get(coord + int16_t2(-1, 0), cascadeIndex).xyz; - neighbourhood[4] = acc.template get(coord + int16_t2(0, 0), cascadeIndex).xyz; - neighbourhood[5] = acc.template get(coord + int16_t2(1, 0), cascadeIndex).xyz; - neighbourhood[6] = acc.template get(coord + int16_t2(-1, 1), cascadeIndex).xyz; - neighbourhood[7] = acc.template get(coord + int16_t2(0, 1), cascadeIndex).xyz; - neighbourhood[8] = acc.template get(coord + int16_t2(1, 1), cascadeIndex).xyz; - - // numerical robustness - float32_t3 excl_hood_sum = ((neighbourhood[0] + neighbourhood[1]) + (neighbourhood[2] + neighbourhood[3])) + - ((neighbourhood[5] + neighbourhood[6]) + (neighbourhood[7] + neighbourhood[8])); - - CascadeSample retval; - retval.centerValue = neighbourhood[4]; - retval.normalizedNeighbourhoodAverageLuma = retval.normalizedCenterLuma = acc.calcLuma(neighbourhood[4]) * reciprocalBaseI; - retval.normalizedNeighbourhoodAverageLuma = (acc.calcLuma(excl_hood_sum) * reciprocalBaseI + retval.normalizedNeighbourhoodAverageLuma) / 9.f; - return retval; - } -}; - -} -} -} - -#endif \ No newline at end of file +#ifndef _NBL_BUILTIN_HLSL_RWMC_RESOLVE_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_RWMC_RESOLVE_HLSL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace rwmc +{ +// declare concept +#define NBL_CONCEPT_NAME ResolveLumaParamsBase +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (T)(SampleType) +#define NBL_CONCEPT_PARAM_0 (a,T) +NBL_CONCEPT_BEGIN(1) +#define a NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(T::scalar_t)) + ((NBL_CONCEPT_REQ_TYPE_ALIAS_CONCEPT)(concepts::FloatingPointScalar, typename T::scalar_t)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)( + (a.template calcLuma(::nbl::hlsl::experimental::declval())), + ::nbl::hlsl::is_same_v, + typename T::scalar_t + )) +); +#undef a +#include + +template +NBL_BOOL_CONCEPT ResolveLumaParams = ResolveLumaParamsBase; + +template +NBL_BOOL_CONCEPT ResolveAccessor = concepts::accessors::MipmappedLoadableImage; + +template) +struct SResolveAccessorAdaptor +{ + using output_scalar_t = OutputScalar; + NBL_CONSTEXPR_STATIC_INLINE int32_t Components = 3; + using output_t = vector; + NBL_CONSTEXPR_STATIC_INLINE int32_t image_dimension = 2; + + template + void get(NBL_REF_ARG(output_t) value, vector uv, uint16_t layer, uint16_t level) + { + typename AccessorType::output_t sampled; + accessor.template get(sampled, uv, layer, level); + value = sampled.xyz; + } + + AccessorType accessor; +}; + +template && + ResolveLumaParams +) +struct SResolver +{ + using output_t = typename CascadeAccessor::output_t; + using output_scalar_t = typename vector_traits::scalar_type; + using scalar_t = typename SResolveParameters::scalar_t; + NBL_CONSTEXPR_STATIC_INLINE uint16_t last_cascade = uint16_t(CascadeCount - 1u); + + struct SCascadeSample + { + output_t centerValue; + scalar_t normalizedCenterLuma; + scalar_t normalizedNeighbourhoodAverageLuma; + }; + + static SResolver create(NBL_REF_ARG(SResolveParameters) resolveParameters) + { + SResolver retval; + retval.params = resolveParameters; + + return retval; + } + + output_t operator()(NBL_REF_ARG(CascadeAccessor) acc, const int16_t2 coord) + { + scalar_t reciprocalBaseI = 1.f; + SCascadeSample curr = __sampleCascade(acc, coord, 0u, reciprocalBaseI); + + output_t accumulation = promote(0.0f); + scalar_t Emin = params.initialEmin; + + scalar_t prevNormalizedCenterLuma, prevNormalizedNeighbourhoodAverageLuma; + NBL_UNROLL + for (uint16_t i = 0u; i <= last_cascade; i++) + { + const bool notFirstCascade = i != 0; + const bool notLastCascade = i != last_cascade; + + SCascadeSample next; + if (notLastCascade) + { + reciprocalBaseI *= params.reciprocalBase; + next = __sampleCascade(acc, coord, int16_t(i + 1), reciprocalBaseI); + } + + scalar_t reliability = 1.f; + // sample counting-based reliability estimation + if (params.reciprocalKappa <= 1.f) + { + scalar_t localReliability = curr.normalizedCenterLuma; + // reliability in 3x3 pixel block (see robustness) + scalar_t globalReliability = curr.normalizedNeighbourhoodAverageLuma; + if (notFirstCascade) + { + localReliability += prevNormalizedCenterLuma; + globalReliability += prevNormalizedNeighbourhoodAverageLuma; + } + if (notLastCascade) + { + localReliability += next.normalizedCenterLuma; + globalReliability += next.normalizedNeighbourhoodAverageLuma; + } + // check if above minimum sampling threshold (avg 9 sample occurences in 3x3 neighbourhood), then use per-pixel reliability (NOTE: tertiary op is in reverse) + reliability = globalReliability < params.reciprocalN ? globalReliability : localReliability; + { + const scalar_t accumLuma = params.template calcLuma(accumulation); + if (accumLuma > Emin) + Emin = accumLuma; + + const scalar_t colorReliability = Emin * reciprocalBaseI * params.colorReliabilityFactor; + + reliability += colorReliability; + reliability *= params.NOverKappa; + reliability -= params.reciprocalKappa; + reliability = clamp(reliability * 0.5f, 0.f, 1.f); + } + } + accumulation += curr.centerValue * reliability; + + prevNormalizedCenterLuma = curr.normalizedCenterLuma; + prevNormalizedNeighbourhoodAverageLuma = curr.normalizedNeighbourhoodAverageLuma; + curr = next; + } + + return accumulation; + } + + SResolveParameters params; + + // pseudo private stuff: + + SCascadeSample __sampleCascade(NBL_REF_ARG(CascadeAccessor) acc, int16_t2 coord, uint16_t cascadeIndex, scalar_t reciprocalBaseI) + { + output_t sampleValue; + scalar_t excl_hood_luma_sum = 0.f; + + acc.template get(sampleValue, coord + int16_t2(-1, -1), cascadeIndex, 0u); + excl_hood_luma_sum += params.template calcLuma(sampleValue); + acc.template get(sampleValue, coord + int16_t2(0, -1), cascadeIndex, 0u); + excl_hood_luma_sum += params.template calcLuma(sampleValue); + acc.template get(sampleValue, coord + int16_t2(1, -1), cascadeIndex, 0u); + excl_hood_luma_sum += params.template calcLuma(sampleValue); + acc.template get(sampleValue, coord + int16_t2(-1, 0), cascadeIndex, 0u); + excl_hood_luma_sum += params.template calcLuma(sampleValue); + + SCascadeSample retval; + acc.template get(retval.centerValue, coord + int16_t2(0, 0), cascadeIndex, 0u); + const scalar_t centerLuma = params.template calcLuma(retval.centerValue); + acc.template get(sampleValue, coord + int16_t2(1, 0), cascadeIndex, 0u); + excl_hood_luma_sum += params.template calcLuma(sampleValue); + acc.template get(sampleValue, coord + int16_t2(-1, 1), cascadeIndex, 0u); + excl_hood_luma_sum += params.template calcLuma(sampleValue); + acc.template get(sampleValue, coord + int16_t2(0, 1), cascadeIndex, 0u); + excl_hood_luma_sum += params.template calcLuma(sampleValue); + acc.template get(sampleValue, coord + int16_t2(1, 1), cascadeIndex, 0u); + excl_hood_luma_sum += params.template calcLuma(sampleValue); + + retval.normalizedCenterLuma = centerLuma * reciprocalBaseI; + retval.normalizedNeighbourhoodAverageLuma = (excl_hood_luma_sum + centerLuma) * reciprocalBaseI / 9.f; + return retval; + } +}; + +} +} +} + +#endif diff --git a/include/nbl/builtin/hlsl/sampling/basic.hlsl b/include/nbl/builtin/hlsl/sampling/basic.hlsl index 9c575a22ce..c405275e55 100644 --- a/include/nbl/builtin/hlsl/sampling/basic.hlsl +++ b/include/nbl/builtin/hlsl/sampling/basic.hlsl @@ -18,29 +18,29 @@ namespace sampling template) struct PartitionRandVariable { - using floating_point_type = T; - using uint_type = unsigned_integer_of_size_t; + using floating_point_type = T; + using uint_type = unsigned_integer_of_size_t; - bool operator()(NBL_REF_ARG(floating_point_type) xi, NBL_REF_ARG(floating_point_type) rcpChoiceProb) - { - const floating_point_type NextULPAfterUnity = bit_cast(bit_cast(floating_point_type(1.0)) + uint_type(1u)); - const bool pickRight = xi >= leftProb * NextULPAfterUnity; + bool operator()(NBL_REF_ARG(floating_point_type) xi, NBL_REF_ARG(floating_point_type) rcpChoiceProb) + { + const floating_point_type NextULPAfterUnity = bit_cast(bit_cast(floating_point_type(1.0)) + uint_type(1u)); + const bool pickRight = xi >= leftProb * NextULPAfterUnity; - // This is all 100% correct taking into account the above NextULPAfterUnity - xi -= pickRight ? leftProb : floating_point_type(0.0); + // This is all 100% correct taking into account the above NextULPAfterUnity + xi -= pickRight ? leftProb : floating_point_type(0.0); - rcpChoiceProb = floating_point_type(1.0) / (pickRight ? (floating_point_type(1.0) - leftProb) : leftProb); - xi *= rcpChoiceProb; + rcpChoiceProb = floating_point_type(1.0) / (pickRight ? (floating_point_type(1.0) - leftProb) : leftProb); + xi *= rcpChoiceProb; - return pickRight; - } + return pickRight; + } - floating_point_type leftProb; + floating_point_type leftProb; }; -} -} -} +} // namespace sampling +} // namespace hlsl +} // namespace nbl #endif diff --git a/include/nbl/builtin/hlsl/sampling/bilinear.hlsl b/include/nbl/builtin/hlsl/sampling/bilinear.hlsl index a74869990f..74b93c831d 100644 --- a/include/nbl/builtin/hlsl/sampling/bilinear.hlsl +++ b/include/nbl/builtin/hlsl/sampling/bilinear.hlsl @@ -24,42 +24,88 @@ struct Bilinear using vector3_type = vector; using vector4_type = vector; + // BijectiveSampler concept types + using domain_type = vector2_type; + using codomain_type = vector2_type; + using density_type = scalar_type; + using weight_type = density_type; + + struct cache_type + { + density_type pdf; + }; + static Bilinear create(const vector4_type bilinearCoeffs) { Bilinear retval; retval.bilinearCoeffs = bilinearCoeffs; - retval.twiceAreasUnderXCurve = vector2_type(bilinearCoeffs[0] + bilinearCoeffs[1], bilinearCoeffs[2] + bilinearCoeffs[3]); + retval.bilinearCoeffDiffs = vector2_type(bilinearCoeffs[2]-bilinearCoeffs[0], bilinearCoeffs[3]-bilinearCoeffs[1]); + vector2_type twiceAreasUnderXCurve = vector2_type(bilinearCoeffs[0] + bilinearCoeffs[1], bilinearCoeffs[2] + bilinearCoeffs[3]); + retval.fourOverTwiceAreasUnderXCurveSum = scalar_type(4.0) / (twiceAreasUnderXCurve[0] + twiceAreasUnderXCurve[1]); + retval.lineary = Linear::create(twiceAreasUnderXCurve); return retval; } - vector2_type generate(NBL_REF_ARG(scalar_type) rcpPdf, const vector2_type _u) + codomain_type generate(const domain_type u, NBL_REF_ARG(cache_type) cache) { - vector2_type u; - Linear lineary = Linear::create(twiceAreasUnderXCurve); - u.y = lineary.generate(_u.y); + typename Linear::cache_type linearCache; + + vector2_type p; + p.y = lineary.generate(u.y, linearCache); - const vector2_type ySliceEndPoints = vector2_type(nbl::hlsl::mix(bilinearCoeffs[0], bilinearCoeffs[2], u.y), nbl::hlsl::mix(bilinearCoeffs[1], bilinearCoeffs[3], u.y)); + const vector2_type ySliceEndPoints = vector2_type(bilinearCoeffs[0] + p.y * bilinearCoeffDiffs[0], bilinearCoeffs[1] + p.y * bilinearCoeffDiffs[1]); Linear linearx = Linear::create(ySliceEndPoints); - u.x = linearx.generate(_u.x); + p.x = linearx.generate(u.x, linearCache); - rcpPdf = (twiceAreasUnderXCurve[0] + twiceAreasUnderXCurve[1]) / (4.0 * nbl::hlsl::mix(ySliceEndPoints[0], ySliceEndPoints[1], u.x)); + cache.pdf = backwardPdf(p); + return p; + } + + domain_type generateInverse(const codomain_type p, NBL_REF_ARG(cache_type) cache) + { + typename Linear::cache_type linearCache; + + vector2_type u; + const vector2_type ySliceEndPoints = vector2_type(bilinearCoeffs[0] + p.y * bilinearCoeffDiffs[0], bilinearCoeffs[1] + p.y * bilinearCoeffDiffs[1]); + Linear linearx = Linear::create(ySliceEndPoints); + u.x = linearx.generateInverse(p.x, linearCache); + u.y = lineary.generateInverse(p.y, linearCache); + cache.pdf = backwardPdf(p); return u; } - scalar_type pdf(const vector2_type u) + density_type forwardPdf(const cache_type cache) + { + return cache.pdf; + } + + weight_type forwardWeight(const cache_type cache) + { + return forwardPdf(cache); + } + + density_type backwardPdf(const codomain_type p) + { + const vector2_type ySliceEndPoints = vector2_type(bilinearCoeffs[0] + p.y * bilinearCoeffDiffs[0], bilinearCoeffs[1] + p.y * bilinearCoeffDiffs[1]); + return nbl::hlsl::mix(ySliceEndPoints[0], ySliceEndPoints[1], p.x) * fourOverTwiceAreasUnderXCurveSum; + } + + weight_type backwardWeight(const codomain_type p) { - return 4.0 * nbl::hlsl::mix(nbl::hlsl::mix(bilinearCoeffs[0], bilinearCoeffs[1], u.x), nbl::hlsl::mix(bilinearCoeffs[2], bilinearCoeffs[3], u.x), u.y) / (bilinearCoeffs[0] + bilinearCoeffs[1] + bilinearCoeffs[2] + bilinearCoeffs[3]); + return backwardPdf(p); } // unit square: x0y0 x1y0 // x0y1 x1y1 vector4_type bilinearCoeffs; // (x0y0, x0y1, x1y0, x1y1) - vector2_type twiceAreasUnderXCurve; + vector2_type bilinearCoeffDiffs; + scalar_type fourOverTwiceAreasUnderXCurveSum; + Linear lineary; }; -} -} -} +} // namespace sampling +} // namespace hlsl +} // namespace nbl #endif diff --git a/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl b/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl index 9474642f4c..1ee96a3f75 100644 --- a/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl +++ b/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl @@ -7,6 +7,7 @@ #include "nbl/builtin/hlsl/math/functions.hlsl" #include "nbl/builtin/hlsl/numbers.hlsl" +#include "nbl/builtin/hlsl/sampling/value_and_pdf.hlsl" namespace nbl { @@ -19,20 +20,70 @@ template) struct BoxMullerTransform { using scalar_type = T; - using vector2_type = vector; + using vector2_type = vector; - vector2_type operator()(const vector2_type xi) + // InvertibleSampler concept types + using domain_type = vector2_type; + using codomain_type = vector2_type; + using density_type = scalar_type; + using weight_type = density_type; + + struct cache_type + { + density_type pdf; + }; + + codomain_type generate(const domain_type u, NBL_REF_ARG(cache_type) cache) { scalar_type sinPhi, cosPhi; - math::sincos(2.0 * numbers::pi * xi.y - numbers::pi, sinPhi, cosPhi); - return vector2_type(cosPhi, sinPhi) * nbl::hlsl::sqrt(-2.0 * nbl::hlsl::log(xi.x)) * stddev; + math::sincos(scalar_type(2.0) * numbers::pi * u.y - numbers::pi, sinPhi, cosPhi); + const codomain_type outPos = vector2_type(cosPhi, sinPhi) * nbl::hlsl::sqrt(scalar_type(-2.0) * nbl::hlsl::log(u.x)) * stddev; + cache.pdf = backwardPdf(outPos); + return outPos; + } + + density_type forwardPdf(const cache_type cache) + { + return cache.pdf; + } + + vector2_type separateForwardPdf(const cache_type cache, const codomain_type outPos) + { + return separateBackwardPdf(outPos); + } + + weight_type forwardWeight(const cache_type cache) + { + return forwardPdf(cache); + } + + density_type backwardPdf(const codomain_type outPos) + { + const vector2_type marginals = separateBackwardPdf(outPos); + return marginals.x * marginals.y; + } + + vector2_type separateBackwardPdf(const codomain_type outPos) + { + const scalar_type stddev2 = stddev * stddev; + const scalar_type normalization = scalar_type(1.0) / (stddev * nbl::hlsl::sqrt(scalar_type(2.0) * numbers::pi)); + const vector2_type outPos2 = outPos * outPos; + return vector2_type( + normalization * nbl::hlsl::exp(scalar_type(-0.5) * outPos2.x / stddev2), + normalization * nbl::hlsl::exp(scalar_type(-0.5) * outPos2.y / stddev2) + ); + } + + weight_type backwardWeight(const codomain_type outPos) + { + return backwardPdf(outPos); } T stddev; }; -} -} -} +} // namespace sampling +} // namespace hlsl +} // namespace nbl #endif diff --git a/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl b/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl index 841fc9ff2d..abede02ed6 100644 --- a/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl +++ b/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl @@ -17,34 +17,123 @@ namespace sampling { template -vector concentricMapping(const vector _u) +struct ConcentricMapping { - //map [0;1]^2 to [-1;1]^2 - vector u = 2.0f * _u - hlsl::promote >(1.0); - - vector p; - if (hlsl::all >(glsl::equal(u, hlsl::promote >(0.0)))) - p = hlsl::promote >(0.0); - else - { - T r; - T theta; - if (abs(u.x) > abs(u.y)) { - r = u.x; - theta = 0.25 * numbers::pi * (u.y / u.x); - } else { - r = u.y; - theta = 0.5 * numbers::pi - 0.25 * numbers::pi * (u.x / u.y); - } - - p = r * vector(cos(theta), sin(theta)); - } - - return p; -} - -} -} -} + using scalar_type = T; + using vector2_type = vector; + using vector3_type = vector; + using vector4_type = vector; + + // BijectiveSampler concept types + using domain_type = vector2_type; + using codomain_type = vector2_type; + using density_type = scalar_type; + using weight_type = density_type; + + struct cache_type + { + density_type pdf; + // TODO: should we cache `r`? + }; + + static codomain_type generate(const domain_type _u, NBL_REF_ARG(cache_type) cache) + { + cache.pdf = numbers::inv_pi; + //map [0;1]^2 to [-1;1]^2 + domain_type u = 2.0f * _u - hlsl::promote >(1.0); + + vector p; + if (hlsl::all >(glsl::equal(u, hlsl::promote >(0.0)))) + p = hlsl::promote >(0.0); + else + { + T r; + T theta; + if (hlsl::abs(u.x) > hlsl::abs(u.y)) + { + r = u.x; + theta = 0.25 * numbers::pi * (u.y / u.x); + } + else + { + r = u.y; + theta = 0.5 * numbers::pi - 0.25 * numbers::pi * (u.x / u.y); + } + + p = r * vector(hlsl::cos(theta), hlsl::sin(theta)); + } + + return p; + } + + // Overload for BasicSampler + static codomain_type generate(domain_type _u) + { + cache_type dummy; + return generate(_u, dummy); + } + + static domain_type generateInverse(const codomain_type p, NBL_REF_ARG(cache_type) cache) + { + T theta = hlsl::atan2(p.y, p.x); // -pi -> pi + T r = hlsl::sqrt(p.x * p.x + p.y * p.y); + const T PiOver4 = T(0.25) * numbers::pi; + + vector u; + // TODO: should reduce branching somehow? + if (hlsl::abs(theta) < PiOver4 || hlsl::abs(theta) > 3 * PiOver4) + { + r = ieee754::copySign(r, p.x); + u.x = r; + if (p.x < 0) + { + if (p.y < 0) + { + u.y = (numbers::pi + theta) * r / PiOver4; + } + else + { + u.y = (theta - numbers::pi)*r / PiOver4; + } + } + else + { + u.y = (theta * r) / PiOver4; + } + } + else + { + r = ieee754::copySign(r, p.y); + u.y = r; + if (p.y < 0) + { + u.x = -(T(0.5) * numbers::pi + theta) * r / PiOver4; + } + else + { + u.x = (T(0.5) * numbers::pi - theta) * r / PiOver4; + } + } + + return (u + hlsl::promote >(1.0)) * T(0.5); + } + + static domain_type generateInverse(const codomain_type p) + { + cache_type dummy; + return generateInverse(p, dummy); + } + + // The PDF of Shirley mapping is constant (1/PI on the unit disk) + static density_type forwardPdf(cache_type cache) { return numbers::inv_pi; } + static density_type backwardPdf(codomain_type v) { return numbers::inv_pi; } + + static weight_type forwardWeight(cache_type cache) { return forwardPdf(cache); } + static weight_type backwardWeight(codomain_type v) { return backwardPdf(v); } +}; + +} // namespace sampling +} // namespace hlsl +} // namespace nbl #endif diff --git a/include/nbl/builtin/hlsl/sampling/concepts.hlsl b/include/nbl/builtin/hlsl/sampling/concepts.hlsl new file mode 100644 index 0000000000..537d87bcd8 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/concepts.hlsl @@ -0,0 +1,297 @@ +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_CONCEPTS_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_CONCEPTS_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ +namespace concepts +{ + +// ============================================================================ +// SampleWithPDF +// +// Checks that a sample type bundles a value with its PDF. +// +// Required methods: +// value() - the sampled value +// pdf() - the probability density +// +// Satisfied by: codomain_and_pdf, domain_and_pdf, quotient_and_pdf +// ============================================================================ + +// clang-format off +#define NBL_CONCEPT_NAME SampleWithPDF +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (T) +#define NBL_CONCEPT_PARAM_0 (s, T) +NBL_CONCEPT_BEGIN(1) +#define s NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_EXPR)(s.pdf())) + ((NBL_CONCEPT_REQ_EXPR)(s.value()))); +#undef s +#include +// clang-format on + +// ============================================================================ +// SampleWithRcpPDF +// +// Checks that a sample type bundles a value with its reciprocal PDF. +// +// Required methods: +// value() - the sampled value +// rcpPdf() - the reciprocal probability density +// +// Satisfied by: codomain_and_rcpPdf, domain_and_rcpPdf +// ============================================================================ + +// clang-format off +#define NBL_CONCEPT_NAME SampleWithRcpPDF +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (T) +#define NBL_CONCEPT_PARAM_0 (s, T) +NBL_CONCEPT_BEGIN(1) +#define s NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_EXPR)(s.rcpPdf())) + ((NBL_CONCEPT_REQ_EXPR)(s.value()))); +#undef s +#include +// clang-format on + +// ============================================================================ +// SampleWithDensity +// +// A sample type that bundles a value with either its PDF or reciprocal PDF. +// This is the disjunction of SampleWithPDF and SampleWithRcpPDF. +// ============================================================================ +template +NBL_BOOL_CONCEPT SampleWithDensity = SampleWithPDF || SampleWithRcpPDF; + +// ============================================================================ +// BasicSampler +// +// The simplest sampler: maps domain -> codomain. +// +// Required types: +// domain_type - the input space (e.g. float for 1D, float2 for 2D) +// codomain_type - the output space (e.g. float3 for directions) +// +// Required methods: +// codomain_type generate(domain_type u) +// ============================================================================ + +// clang-format off +#define NBL_CONCEPT_NAME BasicSampler +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (T) +#define NBL_CONCEPT_PARAM_0 (_sampler, T) +#define NBL_CONCEPT_PARAM_1 (u, typename T::domain_type) +NBL_CONCEPT_BEGIN(2) +#define _sampler NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define u NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(T::domain_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::codomain_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE) ((_sampler.generate(u)), ::nbl::hlsl::is_same_v, typename T::codomain_type))); +#undef u +#undef _sampler +#include +// clang-format on + +// ============================================================================ +// TractableSampler +// +// A sampler whose density can be computed analytically in the forward +// (sampling) direction. generate returns a codomain_type value and writes +// intermediates to a cache_type out-param for later pdf evaluation. +// +// The cache_type out-param stores intermediates computed during generate +// (e.g. DG1 in Cook-Torrance, or simply the pdf for simple samplers) for +// reuse by forwardPdf without redundant recomputation. +// +// For constant-pdf samplers, forwardPdf(cache) == __pdf() (cache ignored). +// For variable-pdf samplers (e.g. Linear), forwardPdf(cache) returns the +// pre-computed pdf rather than re-evaluating __pdf(x) from the sample value. +// For complex samplers (e.g. Cook-Torrance), cache carries DG1/Fresnel and +// forwardPdf computes the pdf from those stored intermediates. +// +// Required types: +// domain_type, codomain_type, density_type, cache_type +// +// Required methods: +// codomain_type generate(domain_type u, out cache_type cache) +// density_type forwardPdf(cache_type cache) +// ============================================================================ + +// clang-format off +#define NBL_CONCEPT_NAME TractableSampler +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (T) +#define NBL_CONCEPT_PARAM_0 (_sampler, T) +#define NBL_CONCEPT_PARAM_1 (u, typename T::domain_type) +#define NBL_CONCEPT_PARAM_2 (cache, typename T::cache_type) +NBL_CONCEPT_BEGIN(3) +#define _sampler NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define u NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define cache NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(T::domain_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::codomain_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::density_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::cache_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE) ((_sampler.generate(u, cache)), ::nbl::hlsl::is_same_v, typename T::codomain_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE) ((_sampler.forwardPdf(cache)), ::nbl::hlsl::is_same_v, typename T::density_type))); +#undef cache +#undef u +#undef _sampler +#include +// clang-format on + +// ============================================================================ +// ResamplableSampler +// +// A sampler with forward and backward importance weights, enabling use in +// Multiple Importance Sampling (MIS) and Resampled Importance Sampling (RIS). +// +// Note: resampling does not require tractability - the weights need not be +// normalized probability densities, so this concept is satisfied by +// intractable samplers as well. +// +// Unlike TractableSampler, generate returns bare codomain_type (not sample_type) +// and writes a cache_type out-param for later reuse by forwardWeight. +// +// Required types: +// domain_type - the input space +// codomain_type - the output space +// cache_type - stores intermediates from generate for forward weight reuse +// weight_type - the type of the importance weight +// +// Required methods: +// codomain_type generate(domain_type u, out cache_type cache) +// weight_type forwardWeight(cache_type cache) - forward weight for MIS +// weight_type backwardWeight(codomain_type v) - backward weight for RIS +// ============================================================================ + +// clang-format off +#define NBL_CONCEPT_NAME ResamplableSampler +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (T) +#define NBL_CONCEPT_PARAM_0 (_sampler, T) +#define NBL_CONCEPT_PARAM_1 (u, typename T::domain_type) +#define NBL_CONCEPT_PARAM_2 (v, typename T::codomain_type) +#define NBL_CONCEPT_PARAM_3 (cache, typename T::cache_type) +NBL_CONCEPT_BEGIN(4) +#define _sampler NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define u NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define v NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +#define cache NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_3 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(T::domain_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::codomain_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::cache_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::weight_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((_sampler.generate(u, cache)), ::nbl::hlsl::is_same_v, typename T::codomain_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((_sampler.forwardWeight(cache)), ::nbl::hlsl::is_same_v, typename T::weight_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((_sampler.backwardWeight(v)), ::nbl::hlsl::is_same_v, typename T::weight_type))); +#undef cache +#undef v +#undef u +#undef _sampler +#include +// clang-format on + +// ============================================================================ +// InvertibleSampler +// +// Extends TractableSampler with the ability to evaluate the PDF given +// a codomain value (i.e. without knowing the original domain input). +// The reverse mapping could be implemented via bisection search and is +// not necessarily bijective - input/output pairs need not match. +// +// Also exposes forward and backward importance weights for use in MIS and RIS. +// For an invertible sampler these are just the forward and backward PDFs, +// but the names signal the intended use at call sites. +// +// Required types (in addition to TractableSampler): +// weight_type - the type of the importance weight +// +// Required methods (in addition to TractableSampler): +// density_type backwardPdf(codomain_type v) - evaluate pdf at codomain value v +// weight_type forwardWeight(cache_type cache) - weight for MIS, reuses generate cache +// weight_type backwardWeight(codomain_type v) - weight for RIS, evaluated at v +// ============================================================================ + +// clang-format off +#define NBL_CONCEPT_NAME InvertibleSampler +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (T) +#define NBL_CONCEPT_PARAM_0 (_sampler, T) +#define NBL_CONCEPT_PARAM_1 (u, typename T::domain_type) +#define NBL_CONCEPT_PARAM_2 (v, typename T::codomain_type) +#define NBL_CONCEPT_PARAM_3 (cache, typename T::cache_type) +NBL_CONCEPT_BEGIN(4) +#define _sampler NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define u NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define v NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +#define cache NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_3 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE_ALIAS_CONCEPT)(TractableSampler, T)) + ((NBL_CONCEPT_REQ_TYPE)(T::weight_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((_sampler.backwardPdf(v)), ::nbl::hlsl::is_same_v, typename T::density_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((_sampler.forwardWeight(cache)), ::nbl::hlsl::is_same_v, typename T::weight_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((_sampler.backwardWeight(v)), ::nbl::hlsl::is_same_v, typename T::weight_type))); +#undef cache +#undef v +#undef u +#undef _sampler +#include +// clang-format on + +// ============================================================================ +// BijectiveSampler +// +// The mapping domain <-> codomain is bijective (1:1), so it can be +// inverted. Extends InvertibleSampler with generateInverse. +// +// Because the mapping is bijective, the absolute value of the determinant +// of the Jacobian matrix of the inverse equals the reciprocal of the +// absolute value of the determinant of the Jacobian matrix of the forward +// mapping (the Jacobian is an NxM matrix, not a scalar): +// backwardPdf(v) == 1.0 / forwardPdf(cache) (where v == generate(u, cache).value()) +// +// Required methods (in addition to InvertibleSampler): +// domain_type generateInverse(codomain_type v, out cache_type cache) +// ============================================================================ + +// clang-format off +#define NBL_CONCEPT_NAME BijectiveSampler +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (T) +#define NBL_CONCEPT_PARAM_0 (_sampler, T) +#define NBL_CONCEPT_PARAM_1 (v, typename T::codomain_type) +#define NBL_CONCEPT_PARAM_2 (cache, typename T::cache_type) +NBL_CONCEPT_BEGIN(3) +#define _sampler NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define v NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define cache NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE_ALIAS_CONCEPT)(InvertibleSampler, T)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((_sampler.generateInverse(v, cache)), ::nbl::hlsl::is_same_v, typename T::domain_type))); +#undef cache +#undef v +#undef _sampler +#include +// clang-format on + +} // namespace concepts +} // namespace sampling +} // namespace hlsl +} // namespace nbl + +#endif // _NBL_BUILTIN_HLSL_SAMPLING_CONCEPTS_INCLUDED_ diff --git a/include/nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl b/include/nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl index ddbb961300..85dd962397 100644 --- a/include/nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl +++ b/include/nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl @@ -19,72 +19,184 @@ namespace sampling template) struct ProjectedHemisphere { - using vector_t2 = vector; - using vector_t3 = vector; - - static vector_t3 generate(const vector_t2 _sample) - { - vector_t2 p = concentricMapping(_sample * T(0.99999) + T(0.000005)); - T z = hlsl::sqrt(hlsl::max(T(0.0), T(1.0) - p.x * p.x - p.y * p.y)); - return vector_t3(p.x, p.y, z); - } - - static T pdf(const T L_z) - { - return L_z * numbers::inv_pi; - } - - template > - static sampling::quotient_and_pdf quotient_and_pdf(const T L) - { - return sampling::quotient_and_pdf::create(hlsl::promote(1.0), pdf(L)); - } - - template > - static sampling::quotient_and_pdf quotient_and_pdf(const vector_t3 L) - { - return sampling::quotient_and_pdf::create(hlsl::promote(1.0), pdf(L.z)); - } + using vector_t2 = vector; + using vector_t3 = vector; + + // BijectiveSampler concept types + using scalar_type = T; + using domain_type = vector_t2; + using codomain_type = vector_t3; + using density_type = T; + using weight_type = density_type; + + struct cache_type + { + density_type pdf; + }; + + static codomain_type __generate(const domain_type _sample) + { + vector_t2 p = ConcentricMapping::generate(_sample * T(0.99999) + T(0.000005)); + T z = hlsl::sqrt(hlsl::max(T(0.0), T(1.0) - p.x * p.x - p.y * p.y)); + return vector_t3(p.x, p.y, z); + } + + static codomain_type generate(const domain_type _sample, NBL_REF_ARG(cache_type) cache) + { + const codomain_type L = __generate(_sample); + cache.pdf = __pdf(L.z); + return L; + } + + static domain_type __generateInverse(const codomain_type L) + { + return ConcentricMapping::generateInverse(L.xy); + } + + static domain_type generateInverse(const codomain_type L, NBL_REF_ARG(cache_type) cache) + { + cache.pdf = __pdf(L.z); + return __generateInverse(L); + } + + static T __pdf(const T L_z) + { + return L_z * numbers::inv_pi; + } + + static scalar_type pdf(const T L_z) + { + return __pdf(L_z); + } + + static density_type forwardPdf(const cache_type cache) + { + return cache.pdf; + } + + static weight_type forwardWeight(const cache_type cache) + { + return forwardPdf(cache); + } + + static density_type backwardPdf(const codomain_type L) + { + return __pdf(L.z); + } + + static weight_type backwardWeight(const codomain_type L) + { + return backwardPdf(L); + } + + template > + static quotient_and_pdf quotientAndPdf(const T L) + { + return quotient_and_pdf::create(hlsl::promote(1.0), __pdf(L)); + } + + template > + static quotient_and_pdf quotientAndPdf(const vector_t3 L) + { + return quotient_and_pdf::create(hlsl::promote(1.0), __pdf(L.z)); + } }; template) struct ProjectedSphere { - using vector_t2 = vector; - using vector_t3 = vector; - using hemisphere_t = ProjectedHemisphere; - - static vector_t3 generate(NBL_REF_ARG(vector_t3) _sample) - { - vector_t3 retval = hemisphere_t::generate(_sample.xy); - const bool chooseLower = _sample.z > T(0.5); - retval.z = chooseLower ? (-retval.z) : retval.z; - if (chooseLower) - _sample.z -= T(0.5); - _sample.z *= T(2.0); - return retval; - } - - static T pdf(T L_z) - { - return T(0.5) * hemisphere_t::pdf(L_z); - } - - template > - static sampling::quotient_and_pdf quotient_and_pdf(T L) - { - return sampling::quotient_and_pdf::create(hlsl::promote(1.0), pdf(L)); - } - - template > - static sampling::quotient_and_pdf quotient_and_pdf(const vector_t3 L) - { - return sampling::quotient_and_pdf::create(hlsl::promote(1.0), pdf(L.z)); - } + using vector_t2 = vector; + using vector_t3 = vector; + using hemisphere_t = ProjectedHemisphere; + + // BijectiveSampler concept types + using scalar_type = T; + using domain_type = vector_t3; + using codomain_type = vector_t3; + using density_type = T; + using weight_type = density_type; + + struct cache_type + { + density_type pdf; + }; + + static codomain_type __generate(NBL_REF_ARG(domain_type) _sample) + { + vector_t3 retval = hemisphere_t::__generate(_sample.xy); + const bool chooseLower = _sample.z > T(0.5); + retval.z = chooseLower ? (-retval.z) : retval.z; + if (chooseLower) + _sample.z -= T(0.5); + _sample.z *= T(2.0); + return retval; + } + + static codomain_type generate(NBL_REF_ARG(domain_type) _sample, NBL_REF_ARG(cache_type) cache) + { + const codomain_type L = __generate(_sample); + cache.pdf = __pdf(L.z); + return L; + } + + static domain_type __generateInverse(const codomain_type L) + { + // NOTE: incomplete information to recover exact z component; we only know which hemisphere L came from, + // so we return a canonical value (0.0 for upper, 1.0 for lower) that round-trips correctly through __generate + return vector_t3(hemisphere_t::__generateInverse(L), hlsl::mix(T(1.0), T(0.0), L.z > T(0.0))); + } + + static domain_type generateInverse(const codomain_type L, NBL_REF_ARG(cache_type) cache) + { + cache.pdf = __pdf(L.z); + return __generateInverse(L); + } + + static T __pdf(T L_z) + { + return T(0.5) * hemisphere_t::__pdf(hlsl::abs(L_z)); + } + + static scalar_type pdf(T L_z) + { + return __pdf(L_z); + } + + static density_type forwardPdf(const cache_type cache) + { + return cache.pdf; + } + + static weight_type forwardWeight(const cache_type cache) + { + return forwardPdf(cache); + } + + static density_type backwardPdf(const codomain_type L) + { + return __pdf(L.z); + } + + static weight_type backwardWeight(const codomain_type L) + { + return backwardPdf(L); + } + + template > + static quotient_and_pdf quotientAndPdf(T L) + { + return quotient_and_pdf::create(hlsl::promote(1.0), __pdf(L)); + } + + template > + static quotient_and_pdf quotientAndPdf(const vector_t3 L) + { + return quotient_and_pdf::create(hlsl::promote(1.0), __pdf(L.z)); + } }; -} -} -} +} // namespace sampling +} // namespace hlsl +} // namespace nbl #endif diff --git a/include/nbl/builtin/hlsl/sampling/linear.hlsl b/include/nbl/builtin/hlsl/sampling/linear.hlsl index 6c3cf1fad9..1de23eec5d 100644 --- a/include/nbl/builtin/hlsl/sampling/linear.hlsl +++ b/include/nbl/builtin/hlsl/sampling/linear.hlsl @@ -21,30 +21,80 @@ struct Linear using scalar_type = T; using vector2_type = vector; - static Linear create(const vector2_type linearCoeffs) // start and end importance values (start, end) + // BijectiveSampler concept types + using domain_type = scalar_type; + using codomain_type = scalar_type; + using density_type = scalar_type; + using weight_type = density_type; + + struct cache_type + { + density_type pdf; + }; + + static Linear create(const vector2_type linearCoeffs) // start and end importance values (start, end), assumed to be at x=0 and x=1 { Linear retval; retval.linearCoeffStart = linearCoeffs[0]; - retval.rcpDiff = 1.0 / (linearCoeffs[0] - linearCoeffs[1]); + retval.linearCoeffDiff = linearCoeffs[1] - linearCoeffs[0]; + retval.rcpCoeffSum = scalar_type(1.0) / (linearCoeffs[0] + linearCoeffs[1]); + retval.rcpDiff = -scalar_type(1.0) / retval.linearCoeffDiff; vector2_type squaredCoeffs = linearCoeffs * linearCoeffs; retval.squaredCoeffStart = squaredCoeffs[0]; retval.squaredCoeffDiff = squaredCoeffs[1] - squaredCoeffs[0]; return retval; } - scalar_type generate(const scalar_type u) + density_type __pdf(const codomain_type x) + { + if (x < scalar_type(0.0) || x > scalar_type(1.0)) + return scalar_type(0.0); + return scalar_type(2.0) * (linearCoeffStart + x * linearCoeffDiff) * rcpCoeffSum; + } + + codomain_type generate(const domain_type u, NBL_REF_ARG(cache_type) cache) + { + const codomain_type x = hlsl::mix(u, (linearCoeffStart - sqrt(squaredCoeffStart + u * squaredCoeffDiff)) * rcpDiff, abs(rcpDiff) < hlsl::numeric_limits::max); + cache.pdf = __pdf(x); + return x; + } + + domain_type generateInverse(const codomain_type x, NBL_REF_ARG(cache_type) cache) + { + cache.pdf = __pdf(x); + return x * (scalar_type(2.0) * linearCoeffStart + linearCoeffDiff * x) * rcpCoeffSum; + } + + density_type forwardPdf(const cache_type cache) + { + return cache.pdf; + } + + weight_type forwardWeight(const cache_type cache) + { + return forwardPdf(cache); + } + + density_type backwardPdf(const codomain_type x) + { + return __pdf(x); + } + + weight_type backwardWeight(const codomain_type x) { - return hlsl::mix(u, (linearCoeffStart - hlsl::sqrt(squaredCoeffStart + u * squaredCoeffDiff)) * rcpDiff, hlsl::abs(rcpDiff) < numeric_limits::max); + return backwardPdf(x); } - scalar_type linearCoeffStart; + scalar_type linearCoeffStart; + scalar_type linearCoeffDiff; + scalar_type rcpCoeffSum; scalar_type rcpDiff; scalar_type squaredCoeffStart; scalar_type squaredCoeffDiff; }; -} -} -} +} // namespace sampling +} // namespace hlsl +} // namespace nbl #endif diff --git a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl index e60fe28423..2639bc7610 100644 --- a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl @@ -26,72 +26,73 @@ struct ProjectedSphericalTriangle using vector3_type = vector; using vector4_type = vector; - static ProjectedSphericalTriangle create(NBL_CONST_REF_ARG(shapes::SphericalTriangle) tri) + // InvertibleSampler concept types + using domain_type = vector2_type; + using codomain_type = vector3_type; + using density_type = scalar_type; + using weight_type = density_type; + + struct cache_type { - ProjectedSphericalTriangle retval; - retval.tri = tri; - return retval; - } + density_type pdf; + }; - vector4_type computeBilinearPatch(const vector3_type receiverNormal, bool isBSDF) + // NOTE: produces a degenerate (all-zero) bilinear patch when the receiver normal faces away + // from all three triangle vertices, resulting in NaN PDFs (0 * inf). Callers must ensure + // at least one vertex has positive projection onto the receiver normal. + Bilinear computeBilinearPatch() { const scalar_type minimumProjSolidAngle = 0.0; - matrix m = matrix(tri.vertex0, tri.vertex1, tri.vertex2); - const vector3_type bxdfPdfAtVertex = math::conditionalAbsOrMax(isBSDF, nbl::hlsl::mul(m, receiverNormal), hlsl::promote(minimumProjSolidAngle)); + matrix m = matrix(sphtri.tri_vertices[0], sphtri.tri_vertices[1], sphtri.tri_vertices[2]); + const vector3_type bxdfPdfAtVertex = math::conditionalAbsOrMax(receiverWasBSDF, hlsl::mul(m, receiverNormal), hlsl::promote(minimumProjSolidAngle)); - return bxdfPdfAtVertex.yyxz; + return Bilinear::create(bxdfPdfAtVertex.yyxz); } - vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector3_type receiverNormal, bool isBSDF, const vector2_type _u) + codomain_type generate(const domain_type u, NBL_REF_ARG(cache_type) cache) { - vector2_type u; - // pre-warp according to proj solid angle approximation - vector4_type patch = computeBilinearPatch(receiverNormal, isBSDF); - Bilinear bilinear = Bilinear::create(patch); - u = bilinear.generate(rcpPdf, _u); - - // now warp the points onto a spherical triangle - const vector3_type L = sphtri.generate(solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, u); - rcpPdf *= solidAngle; - + Bilinear bilinear = computeBilinearPatch(); + typename Bilinear::cache_type bilinearCache; + const vector2_type warped = bilinear.generate(u, bilinearCache); + typename SphericalTriangle::cache_type sphtriCache; + const vector3_type L = sphtri.generate(warped, sphtriCache); + // combined weight: sphtri pdf (1/solidAngle) * bilinear pdf at u + cache.pdf = sphtri.forwardPdf(sphtriCache) * bilinear.forwardPdf(bilinearCache); return L; } - vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, const vector3_type receiverNormal, bool isBSDF, const vector2_type u) + density_type forwardPdf(const cache_type cache) { - scalar_type cos_a, cos_c, csc_b, csc_c; - vector3_type cos_vertices, sin_vertices; - const scalar_type solidAngle = tri.solidAngleOfTriangle(cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c); - return generate(rcpPdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, receiverNormal, isBSDF, u); + return cache.pdf; } - scalar_type pdf(scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector3_type receiverNormal, bool receiverWasBSDF, const vector3_type L) + weight_type forwardWeight(const cache_type cache) { - scalar_type pdf; - const vector2_type u = sphtri.generateInverse(pdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, L); - - vector4_type patch = computeBilinearPatch(receiverNormal, receiverWasBSDF); - Bilinear bilinear = Bilinear::create(patch); - return pdf * bilinear.pdf(u); + return forwardPdf(cache); } - scalar_type pdf(const vector3_type receiverNormal, bool receiverWasBSDF, const vector3_type L) + density_type backwardPdf(const vector3_type L) { - scalar_type pdf; - const vector2_type u = sphtri.generateInverse(pdf, L); + const density_type pdf = sphtri.backwardPdf(L); + typename SphericalTriangle::cache_type dummyCache; + const vector2_type u = sphtri.generateInverse(L, dummyCache); + Bilinear bilinear = computeBilinearPatch(); + return pdf * bilinear.backwardPdf(u); + } - vector4_type patch = computeBilinearPatch(receiverNormal, receiverWasBSDF); - Bilinear bilinear = Bilinear::create(patch); - return pdf * bilinear.pdf(u); + weight_type backwardWeight(const vector3_type L) + { + return backwardPdf(L); } - shapes::SphericalTriangle tri; sampling::SphericalTriangle sphtri; + vector3_type receiverNormal; + bool receiverWasBSDF; }; -} -} -} +} // namespace sampling +} // namespace hlsl +} // namespace nbl #endif diff --git a/include/nbl/builtin/hlsl/sampling/quotient_and_pdf.hlsl b/include/nbl/builtin/hlsl/sampling/quotient_and_pdf.hlsl index 26a62ea617..056f5a91c7 100644 --- a/include/nbl/builtin/hlsl/sampling/quotient_and_pdf.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quotient_and_pdf.hlsl @@ -25,26 +25,29 @@ struct quotient_and_pdf static this_t create(const Q _quotient, const P _pdf) { this_t retval; - retval.quotient = _quotient; - retval.pdf = _pdf; + retval._quotient = _quotient; + retval._pdf = _pdf; return retval; } static this_t create(const scalar_q _quotient, const P _pdf) { this_t retval; - retval.quotient = hlsl::promote(_quotient); - retval.pdf = _pdf; + retval._quotient = hlsl::promote(_quotient); + retval._pdf = _pdf; return retval; } + Q quotient() { return _quotient; } + P pdf() { return _pdf; } + Q value() { - return quotient*pdf; + return _quotient * _pdf; } - Q quotient; - P pdf; + Q _quotient; + P _pdf; }; } diff --git a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl index f9e3d2f7ae..afa805150b 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl @@ -8,7 +8,7 @@ #include #include #include -#include +#include namespace nbl { @@ -25,66 +25,110 @@ struct SphericalRectangle using vector3_type = vector; using vector4_type = vector; - static SphericalRectangle create(NBL_CONST_REF_ARG(shapes::SphericalRectangle) rect) + // InvertibleSampler concept types + using domain_type = vector2_type; + using codomain_type = vector2_type; + using density_type = scalar_type; + using weight_type = density_type; + + struct cache_type { - SphericalRectangle retval; - retval.rect = rect; - return retval; - } + density_type pdf; + }; - vector2_type generate(const vector2_type rectangleExtents, const vector2_type uv, NBL_REF_ARG(scalar_type) S) + NBL_CONSTEXPR_STATIC_INLINE scalar_type ClampEps = 1e-5; + + static SphericalRectangle create(NBL_CONST_REF_ARG(shapes::SphericalRectangle) rect, const vector3_type observer) { - const vector4_type denorm_n_z = vector4_type(-rect.r0.y, rect.r0.x + rectangleExtents.x, rect.r0.y + rectangleExtents.y, -rect.r0.x); - const vector4_type n_z = denorm_n_z / hlsl::sqrt(hlsl::promote(rect.r0.z * rect.r0.z) + denorm_n_z * denorm_n_z); - const vector4_type cosGamma = vector4_type( + SphericalRectangle retval; + + retval.r0 = hlsl::mul(rect.basis, rect.origin - observer); + const vector4_type denorm_n_z = vector4_type(-retval.r0.y, retval.r0.x + rect.extents.x, retval.r0.y + rect.extents.y, -retval.r0.x); + const vector4_type n_z = denorm_n_z / hlsl::sqrt(hlsl::promote(retval.r0.z * retval.r0.z) + denorm_n_z * denorm_n_z); + retval.cosGamma = vector4_type( -n_z[0] * n_z[1], -n_z[1] * n_z[2], -n_z[2] * n_z[3], -n_z[3] * n_z[0] ); - math::sincos_accumulator angle_adder = math::sincos_accumulator::create(cosGamma[0]); - angle_adder.addCosine(cosGamma[1]); + math::sincos_accumulator angle_adder = math::sincos_accumulator::create(retval.cosGamma[0]); + angle_adder.addCosine(retval.cosGamma[1]); scalar_type p = angle_adder.getSumofArccos(); - angle_adder = math::sincos_accumulator::create(cosGamma[2]); - angle_adder.addCosine(cosGamma[3]); + angle_adder = math::sincos_accumulator::create(retval.cosGamma[2]); + angle_adder.addCosine(retval.cosGamma[3]); scalar_type q = angle_adder.getSumofArccos(); const scalar_type k = scalar_type(2.0) * numbers::pi - q; - const scalar_type b0 = n_z[0]; - const scalar_type b1 = n_z[2]; - S = p + q - scalar_type(2.0) * numbers::pi; + retval.solidAngle = p + q - scalar_type(2.0) * numbers::pi; + + // flip z axis if r0.z > 0 + retval.r0 = hlsl::promote(-hlsl::abs(retval.r0.z)); + retval.r1 = retval.r0 + vector3_type(rect.extents.x, rect.extents.y, 0); - const scalar_type CLAMP_EPS = 1e-5; + retval.b0 = n_z[0]; + retval.b1 = n_z[2]; + return retval; + } - // flip z axis if rect.r0.z > 0 - rect.r0.z = ieee754::flipSignIfRHSNegative(rect.r0.z, -rect.r0.z); - vector3_type r1 = rect.r0 + vector3_type(rectangleExtents.x, rectangleExtents.y, 0); + codomain_type generate(const domain_type u, NBL_REF_ARG(cache_type) cache) + { + math::sincos_accumulator angle_adder = math::sincos_accumulator::create(cosGamma[2]); + angle_adder.addCosine(cosGamma[3]); + scalar_type q = angle_adder.getSumofArccos(); + const scalar_type k = scalar_type(2.0) * numbers::pi - q; - const scalar_type au = uv.x * S + k; + const scalar_type au = u.x * solidAngle + k; const scalar_type fu = (hlsl::cos(au) * b0 - b1) / hlsl::sin(au); const scalar_type cu_2 = hlsl::max(fu * fu + b0 * b0, 1.f); // forces `cu` to be in [-1,1] const scalar_type cu = ieee754::flipSignIfRHSNegative(scalar_type(1.0) / hlsl::sqrt(cu_2), fu); - scalar_type xu = -(cu * rect.r0.z) / hlsl::sqrt(scalar_type(1.0) - cu * cu); - xu = hlsl::clamp(xu, rect.r0.x, r1.x); // avoid Infs - const scalar_type d_2 = xu * xu + rect.r0.z * rect.r0.z; + scalar_type xu = -(cu * r0.z) / hlsl::sqrt(scalar_type(1.0) - cu * cu); + xu = hlsl::clamp(xu, r0.x, r1.x); // avoid Infs + const scalar_type d_2 = xu * xu + r0.z * r0.z; const scalar_type d = hlsl::sqrt(d_2); - const scalar_type h0 = rect.r0.y / hlsl::sqrt(d_2 + rect.r0.y * rect.r0.y); + const scalar_type h0 = r0.y / hlsl::sqrt(d_2 + r0.y * r0.y); const scalar_type h1 = r1.y / hlsl::sqrt(d_2 + r1.y * r1.y); - const scalar_type hv = h0 + uv.y * (h1 - h0); + const scalar_type hv = h0 + u.y * (h1 - h0); const scalar_type hv2 = hv * hv; - const scalar_type yv = hlsl::mix(r1.y, (hv * d) / hlsl::sqrt(scalar_type(1.0) - hv2), hv2 < scalar_type(1.0) - CLAMP_EPS); + const scalar_type yv = hlsl::mix(r1.y, (hv * d) / hlsl::sqrt(scalar_type(1.0) - hv2), hv2 < scalar_type(1.0) - ClampEps); + + cache.pdf = scalar_type(1.0) / solidAngle; + + return vector2_type((xu - r0.x), (yv - r0.y)); + } + + density_type forwardPdf(const cache_type cache) + { + return cache.pdf; + } - return vector2_type((xu - rect.r0.x) / rectangleExtents.x, (yv - rect.r0.y) / rectangleExtents.y); + weight_type forwardWeight(const cache_type cache) + { + return forwardPdf(cache); + } + + density_type backwardPdf(const codomain_type L) + { + return scalar_type(1.0) / solidAngle; + } + + weight_type backwardWeight(const codomain_type L) + { + return backwardPdf(L); } - shapes::SphericalRectangle rect; + scalar_type solidAngle; + vector4_type cosGamma; + scalar_type b0; + scalar_type b1; + vector3_type r0; + vector3_type r1; }; -} -} -} +} // namespace sampling +} // namespace hlsl +} // namespace nbl #endif diff --git a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl index 5770403cd2..84c8dc207e 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl @@ -21,102 +21,142 @@ namespace sampling template struct SphericalTriangle { - using scalar_type = T; - using vector2_type = vector; - using vector3_type = vector; - - static SphericalTriangle create(NBL_CONST_REF_ARG(shapes::SphericalTriangle) tri) - { - SphericalTriangle retval; - retval.tri = tri; - return retval; - } - - // WARNING: can and will return NAN if one or three of the triangle edges are near zero length - vector3_type generate(scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector2_type u) - { - scalar_type negSinSubSolidAngle,negCosSubSolidAngle; - math::sincos(solidAngle * u.x - numbers::pi, negSinSubSolidAngle, negCosSubSolidAngle); - - const scalar_type p = negCosSubSolidAngle * sin_vertices[0] - negSinSubSolidAngle * cos_vertices[0]; - const scalar_type q = -negSinSubSolidAngle * sin_vertices[0] - negCosSubSolidAngle * cos_vertices[0]; - - // TODO: we could optimize everything up and including to the first slerp, because precision here is just godawful - scalar_type u_ = q - cos_vertices[0]; - scalar_type v_ = p + sin_vertices[0] * cos_c; - - // the slerps could probably be optimized by sidestepping `normalize` calls and accumulating scaling factors - vector3_type C_s = tri.vertex0; - if (csc_b < numeric_limits::max) - { - const scalar_type cosAngleAlongAC = ((v_ * q - u_ * p) * cos_vertices[0] - v_) / ((v_ * p + u_ * q) * sin_vertices[0]); - if (nbl::hlsl::abs(cosAngleAlongAC) < 1.f) - C_s += math::quaternion::slerp_delta(tri.vertex0, tri.vertex2 * csc_b, cosAngleAlongAC); - } - - vector3_type retval = tri.vertex1; - const scalar_type cosBC_s = nbl::hlsl::dot(C_s, tri.vertex1); - const scalar_type csc_b_s = 1.0 / nbl::hlsl::sqrt(1.0 - cosBC_s * cosBC_s); - if (csc_b_s < numeric_limits::max) - { - const scalar_type cosAngleAlongBC_s = nbl::hlsl::clamp(1.0 + cosBC_s * u.y - u.y, -1.f, 1.f); - if (nbl::hlsl::abs(cosAngleAlongBC_s) < 1.f) - retval += math::quaternion::slerp_delta(tri.vertex1, C_s * csc_b_s, cosAngleAlongBC_s); - } - return retval; - } - - vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, const vector2_type u) - { - scalar_type cos_a, cos_c, csc_b, csc_c; - vector3_type cos_vertices, sin_vertices; - - rcpPdf = tri.solidAngleOfTriangle(cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c); - - return generate(rcpPdf, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, u); - } - - vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector3_type L) - { - pdf = 1.0 / solidAngle; - - const scalar_type cosAngleAlongBC_s = nbl::hlsl::dot(L, tri.vertex1); - const scalar_type csc_a_ = 1.0 / nbl::hlsl::sqrt(1.0 - cosAngleAlongBC_s * cosAngleAlongBC_s); - const scalar_type cos_b_ = nbl::hlsl::dot(L, tri.vertex0); - - const scalar_type cosB_ = (cos_b_ - cosAngleAlongBC_s * cos_c) * csc_a_ * csc_c; - const scalar_type sinB_ = nbl::hlsl::sqrt(1.0 - cosB_ * cosB_); - - const scalar_type cosC_ = sin_vertices[0] * sinB_* cos_c - cos_vertices[0] * cosB_; - const scalar_type sinC_ = nbl::hlsl::sqrt(1.0 - cosC_ * cosC_); - - math::sincos_accumulator angle_adder = math::sincos_accumulator::create(cos_vertices[0], sin_vertices[0]); - angle_adder.addAngle(cosB_, sinB_); - angle_adder.addAngle(cosC_, sinC_); - const scalar_type subTriSolidAngleRatio = (angle_adder.getSumofArccos() - numbers::pi) * pdf; - const scalar_type u = subTriSolidAngleRatio > numeric_limits::min ? subTriSolidAngleRatio : 0.0; - - const scalar_type cosBC_s = (cos_vertices[0] + cosB_ * cosC_) / (sinB_ * sinC_); - const scalar_type v = (1.0 - cosAngleAlongBC_s) / (1.0 - (cosBC_s < bit_cast(0x3f7fffff) ? cosBC_s : cos_c)); - - return vector2_type(u,v); - } - - vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, const vector3_type L) - { - scalar_type cos_a, cos_c, csc_b, csc_c; - vector3_type cos_vertices, sin_vertices; - - const scalar_type solidAngle = tri.solidAngleOfTriangle(cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c); - - return generateInverse(pdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, L); - } - - shapes::SphericalTriangle tri; + using scalar_type = T; + using vector2_type = vector; + using vector3_type = vector; + using uint_type = unsigned_integer_of_size_t; + + // BijectiveSampler concept types + using domain_type = vector2_type; + using codomain_type = vector3_type; + using density_type = scalar_type; + using weight_type = density_type; + + struct cache_type + { + density_type pdf; + }; + + static SphericalTriangle create(NBL_CONST_REF_ARG(shapes::SphericalTriangle) tri) + { + SphericalTriangle retval; + vector3_type cos_vertices, sin_vertices; + shapes::SphericalTriangle tri_mut = tri; + retval.solidAngle = tri_mut.solidAngle(cos_vertices, sin_vertices); + retval.cosA = cos_vertices[0]; + retval.sinA = sin_vertices[0]; + retval.tri_vertices[0] = tri.vertices[0]; + retval.tri_vertices[1] = tri.vertices[1]; + retval.tri_vertices[2] = tri.vertices[2]; + retval.triCosC = tri.cos_sides[2]; + retval.triCscB = tri.csc_sides[1]; + retval.triCscC = tri.csc_sides[2]; + return retval; + } + + codomain_type generate(const domain_type u, NBL_REF_ARG(cache_type) cache) + { + scalar_type negSinSubSolidAngle, negCosSubSolidAngle; + math::sincos(solidAngle * u.x - numbers::pi, negSinSubSolidAngle, negCosSubSolidAngle); + + const scalar_type p = negCosSubSolidAngle * sinA - negSinSubSolidAngle * cosA; + const scalar_type q = -negSinSubSolidAngle * sinA - negCosSubSolidAngle * cosA; + + // TODO: we could optimize everything up and including to the first slerp, because precision here is just godawful + scalar_type u_ = q - cosA; + scalar_type v_ = p + sinA * triCosC; + + // the slerps could probably be optimized by sidestepping `normalize` calls and accumulating scaling factors + vector3_type C_s = tri_vertices[0]; + if (triCscB < numeric_limits::max) + { + const scalar_type cosAngleAlongAC = ((v_ * q - u_ * p) * cosA - v_) / ((v_ * p + u_ * q) * sinA); + if (nbl::hlsl::abs(cosAngleAlongAC) < 1.f) + C_s += math::quaternion::slerp_delta(tri_vertices[0], tri_vertices[2] * triCscB, cosAngleAlongAC); + } + + vector3_type retval = tri_vertices[1]; + const scalar_type cosBC_s = nbl::hlsl::dot(C_s, tri_vertices[1]); + const scalar_type csc_b_s = 1.0 / nbl::hlsl::sqrt(1.0 - cosBC_s * cosBC_s); + if (csc_b_s < numeric_limits::max) + { + const scalar_type cosAngleAlongBC_s = nbl::hlsl::clamp(scalar_type(1.0) + cosBC_s * u.y - u.y, scalar_type(-1.0), scalar_type(1.0)); + if (nbl::hlsl::abs(cosAngleAlongBC_s) < scalar_type(1.0)) + retval += math::quaternion::slerp_delta(tri_vertices[1], C_s * csc_b_s, cosAngleAlongBC_s); + } + + cache.pdf = scalar_type(1.0) / solidAngle; + + return retval; + } + + domain_type _generateInverse(const codomain_type L) + { + const scalar_type cosAngleAlongBC_s = nbl::hlsl::dot(L, tri_vertices[1]); + const scalar_type csc_a_ = 1.0 / nbl::hlsl::sqrt(1.0 - cosAngleAlongBC_s * cosAngleAlongBC_s); + const scalar_type cos_b_ = nbl::hlsl::dot(L, tri_vertices[0]); + + const scalar_type cosB_ = (cos_b_ - cosAngleAlongBC_s * triCosC) * csc_a_ * triCscC; + const scalar_type sinB_ = nbl::hlsl::sqrt(1.0 - cosB_ * cosB_); + + const scalar_type cosC_ = sinA * sinB_ * triCosC - cosA * cosB_; + const scalar_type sinC_ = nbl::hlsl::sqrt(1.0 - cosC_ * cosC_); + + math::sincos_accumulator angle_adder = math::sincos_accumulator::create(cosA, sinA); + angle_adder.addAngle(cosB_, sinB_); + angle_adder.addAngle(cosC_, sinC_); + const scalar_type subTriSolidAngleRatio = (angle_adder.getSumofArccos() - numbers::pi) * (scalar_type(1.0) / solidAngle); + const scalar_type u = subTriSolidAngleRatio > numeric_limits::min ? subTriSolidAngleRatio : 0.0; + + const scalar_type sinBC_s_product = sinB_ * sinC_; + + // 1 ULP below 1.0, ensures (1.0 - cosBC_s) is strictly positive in float + const scalar_type one_below_one = bit_cast(bit_cast(scalar_type(1)) - uint_type(1)); + const scalar_type cosBC_s = sinBC_s_product > numeric_limits::min ? (cosA + cosB_ * cosC_) / sinBC_s_product : triCosC; + const scalar_type v_denom = scalar_type(1.0) - (cosBC_s < one_below_one ? cosBC_s : triCosC); + const scalar_type v = (scalar_type(1.0) - cosAngleAlongBC_s) / v_denom; + + return vector2_type(u, v); + } + + domain_type generateInverse(const codomain_type L, NBL_REF_ARG(cache_type) cache) + { + cache.pdf = scalar_type(1.0) / solidAngle; + return _generateInverse(L); + } + + density_type forwardPdf(const cache_type cache) + { + return cache.pdf; + } + + weight_type forwardWeight(const cache_type cache) + { + return forwardPdf(cache); + } + + density_type backwardPdf(const codomain_type L) + { + return scalar_type(1.0) / solidAngle; + } + + weight_type backwardWeight(const codomain_type L) + { + return backwardPdf(L); + } + + scalar_type solidAngle; + scalar_type cosA; + scalar_type sinA; + + vector3_type tri_vertices[3]; + scalar_type triCosC; + scalar_type triCscB; + scalar_type triCscC; }; -} -} -} +} // namespace sampling +} // namespace hlsl +} // namespace nbl #endif diff --git a/include/nbl/builtin/hlsl/sampling/uniform_spheres.hlsl b/include/nbl/builtin/hlsl/sampling/uniform_spheres.hlsl index 5fc3bc7a0b..605d7e9370 100644 --- a/include/nbl/builtin/hlsl/sampling/uniform_spheres.hlsl +++ b/include/nbl/builtin/hlsl/sampling/uniform_spheres.hlsl @@ -20,57 +20,161 @@ namespace sampling template) struct UniformHemisphere { - using vector_t2 = vector; - using vector_t3 = vector; - - static vector_t3 generate(const vector_t2 _sample) - { - T z = _sample.x; - T r = hlsl::sqrt(hlsl::max(T(0.0), T(1.0) - z * z)); - T phi = T(2.0) * numbers::pi * _sample.y; - return vector_t3(r * hlsl::cos(phi), r * hlsl::sin(phi), z); - } - - static T pdf() - { - return T(1.0) / (T(2.0) * numbers::pi); - } - - template > - static quotient_and_pdf quotient_and_pdf() - { - return quotient_and_pdf::create(hlsl::promote(1.0), pdf()); - } + using vector_t2 = vector; + using vector_t3 = vector; + + // BijectiveSampler concept types + using scalar_type = T; + using domain_type = vector_t2; + using codomain_type = vector_t3; + using density_type = T; + using weight_type = density_type; + + struct cache_type + { + density_type pdf; + }; + + static codomain_type __generate(const domain_type _sample) + { + T z = _sample.x; + T r = hlsl::sqrt(hlsl::max(T(0.0), T(1.0) - z * z)); + T phi = T(2.0) * numbers::pi * _sample.y; + return vector_t3(r * hlsl::cos(phi), r * hlsl::sin(phi), z); + } + + static codomain_type generate(const domain_type _sample, NBL_REF_ARG(cache_type) cache) + { + cache.pdf = __pdf(); + return __generate(_sample); + } + + static domain_type __generateInverse(const codomain_type _sample) + { + T phi = hlsl::atan2(_sample.y, _sample.x); + const T twopi = T(2.0) * numbers::pi; + phi += hlsl::mix(T(0.0), twopi, phi < T(0.0)); + return vector_t2(_sample.z, phi / twopi); + } + + static domain_type generateInverse(const codomain_type _sample, NBL_REF_ARG(cache_type) cache) + { + cache.pdf = __pdf(); + return __generateInverse(_sample); + } + + static scalar_type __pdf() + { + return T(1.0) / (T(2.0) * numbers::pi); + } + + static density_type forwardPdf(const cache_type cache) + { + return cache.pdf; + } + + static weight_type forwardWeight(const cache_type cache) + { + return forwardPdf(cache); + } + + static density_type backwardPdf(const vector_t3 _sample) + { + return __pdf(); + } + + static weight_type backwardWeight(const codomain_type sample) + { + return backwardPdf(sample); + } + + template > + static quotient_and_pdf quotientAndPdf() + { + return quotient_and_pdf::create(hlsl::promote(1.0), __pdf()); + } }; template) struct UniformSphere { - using vector_t2 = vector; - using vector_t3 = vector; - - static vector_t3 generate(const vector_t2 _sample) - { - T z = T(1.0) - T(2.0) * _sample.x; - T r = hlsl::sqrt(hlsl::max(T(0.0), T(1.0) - z * z)); - T phi = T(2.0) * numbers::pi * _sample.y; - return vector_t3(r * hlsl::cos(phi), r * hlsl::sin(phi), z); - } - - static T pdf() - { - return T(1.0) / (T(4.0) * numbers::pi); - } - - template > - static quotient_and_pdf quotient_and_pdf() - { - return quotient_and_pdf::create(hlsl::promote(1.0), pdf()); - } + using vector_t2 = vector; + using vector_t3 = vector; + + // BijectiveSampler concept types + using scalar_type = T; + using domain_type = vector_t2; + using codomain_type = vector_t3; + using density_type = T; + using weight_type = density_type; + + struct cache_type + { + density_type pdf; + }; + + static codomain_type __generate(const domain_type _sample) + { + T z = T(1.0) - T(2.0) * _sample.x; + T r = hlsl::sqrt(hlsl::max(T(0.0), T(1.0) - z * z)); + T phi = T(2.0) * numbers::pi * _sample.y; + return vector_t3(r * hlsl::cos(phi), r * hlsl::sin(phi), z); + } + + static codomain_type generate(const domain_type _sample, NBL_REF_ARG(cache_type) cache) + { + cache.pdf = __pdf(); + return __generate(_sample); + } + + static domain_type __generateInverse(const codomain_type _sample) + { + T phi = hlsl::atan2(_sample.y, _sample.x); + const T twopi = T(2.0) * numbers::pi; + phi += hlsl::mix(T(0.0), twopi, phi < T(0.0)); + return vector_t2((T(1.0) - _sample.z) * T(0.5), phi / twopi); + } + + static domain_type generateInverse(const codomain_type _sample, NBL_REF_ARG(cache_type) cache) + { + cache.pdf = __pdf(); + return __generateInverse(_sample); + } + + static T __pdf() + { + return T(1.0) / (T(4.0) * numbers::pi); + } + + static density_type forwardPdf(const cache_type cache) + { + return cache.pdf; + } + + static weight_type forwardWeight(const cache_type cache) + { + return forwardPdf(cache); + } + + static density_type backwardPdf(const vector_t3 _sample) + { + return __pdf(); + } + + static weight_type backwardWeight(const codomain_type sample) + { + return backwardPdf(sample); + } + + template > + static quotient_and_pdf quotientAndPdf() + { + return quotient_and_pdf::create(hlsl::promote(1.0), __pdf()); + } }; -} +} // namespace sampling -} -} +} // namespace hlsl +} // namespace nbl #endif diff --git a/include/nbl/builtin/hlsl/sampling/value_and_pdf.hlsl b/include/nbl/builtin/hlsl/sampling/value_and_pdf.hlsl new file mode 100644 index 0000000000..a037c0e3d8 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/value_and_pdf.hlsl @@ -0,0 +1,75 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_VALUE_AND_PDF_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_VALUE_AND_PDF_INCLUDED_ + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ + +template +struct value_and_rcpPdf +{ + using this_t = value_and_rcpPdf; + + static this_t create(const V _value, const P _rcpPdf) + { + this_t retval; + retval._value = _value; + retval._rcpPdf = _rcpPdf; + return retval; + } + + V value() { return _value; } + P rcpPdf() { return _rcpPdf; } + + V _value; + P _rcpPdf; +}; + +template +struct value_and_pdf +{ + using this_t = value_and_pdf; + + static this_t create(const V _value, const P _pdf) + { + this_t retval; + retval._value = _value; + retval._pdf = _pdf; + return retval; + } + + V value() { return _value; } + P pdf() { return _pdf; } + + V _value; + P _pdf; +}; + +// Returned by TractableSampler::generate, codomain sample bundled with its rcpPdf +template +using codomain_and_rcpPdf = value_and_rcpPdf; + +// Returned by TractableSampler::generate, codomain sample bundled with its pdf +template +using codomain_and_pdf = value_and_pdf; + +// Returned by BijectiveSampler::invertGenerate, domain value bundled with its rcpPdf +template +using domain_and_rcpPdf = value_and_rcpPdf; + +// Returned by BijectiveSampler::invertGenerate, domain value bundled with its pdf +template +using domain_and_pdf = value_and_pdf; + +} // namespace sampling +} // namespace hlsl +} // namespace nbl + +#endif diff --git a/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl index 11442bef7c..9743049a60 100644 --- a/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl @@ -17,32 +17,91 @@ namespace hlsl namespace shapes { +// What are we likely to do with a Spherical Rectangle? +// 1) Initialize it multiple times from different observers +// 2) Sample it repeatedly + +// How are we likely to get a spherical rect? +// 1) from OBB matrix (with a model space z-axis scale thats irrelevant - but should be forced to 1.f to not mess with distance) +// 2) in a compressed form + +// So, to bring multiple world-space observers into Spherical Rectangle's own space, we need the basis matrix. +// The matrix should be a matrix where the last column is the translation, a 3x3 matrix with a pre-transform translation (worldSpace rectangle origin to be subtracted). + +// You can compute it from an OBB matrix (as given by/to imguizmo to position a [0,1]^2 rectangle mesh where Z+ is the front face. + +/* +matrix check = mul(modelSpace,tranpose(modelSpace)); +// orthogonality (don't need to check the other 3 lower half numbers, cause MM^T is symmetric) +assert(check[0][1]==0.f); +assert(check[0][2]==0.f); +assert(check[1][2]==0.f); +// the scales are squared +const vector2_type scalesSq = vector2_type(check[0][0],check[1][1]); +const vector2_type scalesRcp = rsqrt(scalesSq); +// only rotation, scale needs to be thrown away +basis = tranpose(modelSpace); +// right now `mul(basis,fromObserver)` will apply extent scales on the dot product +// need to remove that +basis[0] *= scalesRcp[0]; +basis[1] *= scalesRcp[1]; +// but also back it up so we know the size of the original rectangle +extents = promote(vector2_type>(1.f)/scalesRcp; +if (dontAssertZScaleIsOne) + basis[2] *= rsqrt(check[2][2]); +else +{ + assert(check[2][2]==1.f); +} +*/ + +// Now, can apply translation: +// 1) post-rotation so a it automatically gets added during a affine pseudo-mul of a 3x4, so pseudo_mul(basis,observer) +// 2) pre-rotation so you keep a worldspace rectangle origin and subtract it before, e.g. mul(basis,worldSpaceOrigin-observer) - this one is possibly better due to next point + +// So we need to store: +// 1) first two COLUMNS of the original OBB matrix (rows of 3x3 basis matrix with the scale still in there), thats kinda your right and up vectors +// 2) pre-rotation translation / the world-space translation of the rectangle +// Theoretically you could get away with not storing one of the up vector components but its not always the same component you can reconstruct (plane orthogonal to up isn't always the XY plane). +// Could compress up vector as a rotation of the default vector orthogonal to right as given by the frisvad-basis function around the right vector plus a scale +// but that becomes a very expensive decompression step involving a quaternion with uniform scale. + +template +struct CompressedSphericalRectangle +{ + using vector3_type = vector; + + vector3_type origin; + vector3_type right; + vector3_type up; +}; + template struct SphericalRectangle { using scalar_type = Scalar; + using vector2_type = vector; using vector3_type = vector; - using vector4_type = vector; using matrix3x3_type = matrix; - static SphericalRectangle create(const vector3_type observer, const vector3_type rectangleOrigin, const matrix3x3_type basis) + static SphericalRectangle create(NBL_CONST_REF_ARG(CompressedSphericalRectangle) compressed) { SphericalRectangle retval; - retval.r0 = nbl::hlsl::mul(basis, rectangleOrigin - observer); + retval.origin = compressed.origin; + retval.extents = vector2_type(hlsl::length(compressed.right), hlsl::length(compressed.up)); + retval.basis[0] = compressed.right / retval.extents[0]; + retval.basis[1] = compressed.up / retval.extents[1]; + assert(hlsl::dot(retval.basis[0], retval.basis[1]) > scalar_type(0.0)); + retval.basis[2] = hlsl::normalize(hlsl::cross(retval.basis[0], retval.basis[1])); return retval; } - static SphericalRectangle create(const vector3_type observer, const vector3_type rectangleOrigin, const vector3_type T, vector3_type B, const vector3_type N) + scalar_type solidAngle(const vector3_type observer) { - SphericalRectangle retval; - matrix3x3_type TBN = nbl::hlsl::transpose(matrix3x3_type(T, B, N)); - retval.r0 = nbl::hlsl::mul(TBN, rectangleOrigin - observer); - return retval; - } + const vector3_type r0 = hlsl::mul(basis, origin - observer); - scalar_type solidAngleOfRectangle(const vector rectangleExtents) - { - const vector4_type denorm_n_z = vector4_type(-r0.y, r0.x + rectangleExtents.x, r0.y + rectangleExtents.y, -r0.x); + using vector4_type = vector; + const vector4_type denorm_n_z = vector4_type(-r0.y, r0.x + extents.x, r0.y + extents.y, -r0.x); const vector4_type n_z = denorm_n_z / nbl::hlsl::sqrt((vector4_type)(r0.z * r0.z) + denorm_n_z * denorm_n_z); const vector4_type cosGamma = vector4_type( -n_z[0] * n_z[1], @@ -57,7 +116,9 @@ struct SphericalRectangle return angle_adder.getSumofArccos() - scalar_type(2.0) * numbers::pi; } - vector3_type r0; + vector3_type origin; + vector2_type extents; + matrix3x3_type basis; }; } diff --git a/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl index f574b106ce..118f022640 100644 --- a/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl @@ -25,38 +25,37 @@ struct SphericalTriangle using scalar_type = T; using vector3_type = vector; - static SphericalTriangle create(const vector3_type vertex0, const vector3_type vertex1, const vector3_type vertex2, const vector3_type origin) + static SphericalTriangle create(const vector3_type vertices[3], const vector3_type origin) { SphericalTriangle retval; - retval.vertex0 = nbl::hlsl::normalize(vertex0 - origin); - retval.vertex1 = nbl::hlsl::normalize(vertex1 - origin); - retval.vertex2 = nbl::hlsl::normalize(vertex2 - origin); - retval.cos_sides = vector3_type(hlsl::dot(retval.vertex1, retval.vertex2), hlsl::dot(retval.vertex2, retval.vertex0), hlsl::dot(retval.vertex0, retval.vertex1)); - const vector3_type csc_sides2 = hlsl::promote(1.0) - retval.cos_sides * retval.cos_sides; - retval.csc_sides.x = hlsl::rsqrt(csc_sides2.x); - retval.csc_sides.y = hlsl::rsqrt(csc_sides2.y); - retval.csc_sides.z = hlsl::rsqrt(csc_sides2.z); + retval.vertices[0] = nbl::hlsl::normalize(vertices[0] - origin); + retval.vertices[1] = nbl::hlsl::normalize(vertices[1] - origin); + retval.vertices[2] = nbl::hlsl::normalize(vertices[2] - origin); + retval.cos_sides = vector3_type(hlsl::dot(retval.vertices[1], retval.vertices[2]), hlsl::dot(retval.vertices[2], retval.vertices[0]), hlsl::dot(retval.vertices[0], retval.vertices[1])); + const vector3_type sin_sides2 = hlsl::promote(1.0) - retval.cos_sides * retval.cos_sides; + retval.csc_sides = hlsl::rsqrt(sin_sides2); return retval; } + // checks if any angles are small enough to disregard bool pyramidAngles() { - return hlsl::any >(csc_sides >= (vector3_type)(numeric_limits::max)); + return hlsl::any >(csc_sides >= hlsl::promote(numeric_limits::max)); } - scalar_type solidAngleOfTriangle(NBL_REF_ARG(vector3_type) cos_vertices, NBL_REF_ARG(vector3_type) sin_vertices, NBL_REF_ARG(scalar_type) cos_a, NBL_REF_ARG(scalar_type) cos_c, NBL_REF_ARG(scalar_type) csc_b, NBL_REF_ARG(scalar_type) csc_c) + vector3_type __getCosVertices() + { + // using Spherical Law of Cosines (TODO: do we need to clamp anymore? since the pyramid angles method introduction?) + return hlsl::clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, hlsl::promote(-1.0), hlsl::promote(1.0)); + } + + scalar_type solidAngle(NBL_REF_ARG(vector3_type) cos_vertices, NBL_REF_ARG(vector3_type) sin_vertices) { if (pyramidAngles()) return 0.f; - // these variables might eventually get optimized out - cos_a = cos_sides[0]; - cos_c = cos_sides[2]; - csc_b = csc_sides[1]; - csc_c = csc_sides[2]; - // Both vertices and angles at the vertices are denoted by the same upper case letters A, B, and C. The angles A, B, C of the triangle are equal to the angles between the planes that intersect the surface of the sphere or, equivalently, the angles between the tangent vectors of the great circle arcs where they meet at the vertices. Angles are in radians. The angles of proper spherical triangles are (by convention) less than PI - cos_vertices = hlsl::clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, hlsl::promote(-1.0), hlsl::promote(1.0)); // using Spherical Law of Cosines (TODO: do we need to clamp anymore? since the pyramid angles method introduction?) + cos_vertices = __getCosVertices(); sin_vertices = hlsl::sqrt(hlsl::promote(1.0) - cos_vertices * cos_vertices); math::sincos_accumulator angle_adder = math::sincos_accumulator::create(cos_vertices[0], sin_vertices[0]); @@ -65,39 +64,30 @@ struct SphericalTriangle return angle_adder.getSumofArccos() - numbers::pi; } - scalar_type solidAngleOfTriangle() + scalar_type solidAngle() { vector3_type dummy0,dummy1; - scalar_type dummy2,dummy3,dummy4,dummy5; - return solidAngleOfTriangle(dummy0,dummy1,dummy2,dummy3,dummy4,dummy5); + return solidAngle(dummy0,dummy1); } - scalar_type projectedSolidAngleOfTriangle(const vector3_type receiverNormal, NBL_REF_ARG(vector3_type) cos_sides, NBL_REF_ARG(vector3_type) csc_sides, NBL_REF_ARG(vector3_type) cos_vertices) + scalar_type projectedSolidAngle(const vector3_type receiverNormal, NBL_REF_ARG(vector3_type) cos_sides, NBL_REF_ARG(vector3_type) csc_sides, NBL_REF_ARG(vector3_type) cos_vertices) { if (pyramidAngles()) return 0.f; - vector3_type awayFromEdgePlane0 = hlsl::cross(vertex1, vertex2) * csc_sides[0]; - vector3_type awayFromEdgePlane1 = hlsl::cross(vertex2, vertex0) * csc_sides[1]; - vector3_type awayFromEdgePlane2 = hlsl::cross(vertex0, vertex1) * csc_sides[2]; - - // useless here but could be useful somewhere else - cos_vertices[0] = hlsl::dot(awayFromEdgePlane1, awayFromEdgePlane2); - cos_vertices[1] = hlsl::dot(awayFromEdgePlane2, awayFromEdgePlane0); - cos_vertices[2] = hlsl::dot(awayFromEdgePlane0, awayFromEdgePlane1); - // TODO: above dot products are in the wrong order, either work out which is which, or try all 6 permutations till it works - cos_vertices = hlsl::clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, hlsl::promote(-1.0), hlsl::promote(1.0)); + cos_vertices = __getCosVertices(); - matrix awayFromEdgePlane = matrix(awayFromEdgePlane0, awayFromEdgePlane1, awayFromEdgePlane2); + matrix awayFromEdgePlane; + awayFromEdgePlane[0] = hlsl::cross(vertices[1], vertices[2]) * csc_sides[0]; + awayFromEdgePlane[1] = hlsl::cross(vertices[2], vertices[0]) * csc_sides[1]; + awayFromEdgePlane[2] = hlsl::cross(vertices[0], vertices[1]) * csc_sides[2]; const vector3_type externalProducts = hlsl::abs(hlsl::mul(/* transposed already */awayFromEdgePlane, receiverNormal)); - const vector3_type pyramidAngles = acos(cos_sides); - return hlsl::dot(pyramidAngles, externalProducts) / (2.f * numbers::pi); + const vector3_type pyramidAngles = hlsl::acos(cos_sides); + return hlsl::dot(pyramidAngles, externalProducts) / (2.f * numbers::pi); } - vector3_type vertex0; - vector3_type vertex1; - vector3_type vertex2; + vector3_type vertices[3]; vector3_type cos_sides; vector3_type csc_sides; }; diff --git a/include/nbl/builtin/hlsl/testing/approx_compare.hlsl b/include/nbl/builtin/hlsl/testing/approx_compare.hlsl new file mode 100644 index 0000000000..945e9a48cb --- /dev/null +++ b/include/nbl/builtin/hlsl/testing/approx_compare.hlsl @@ -0,0 +1,82 @@ +#ifndef _NBL_BUILTIN_HLSL_TESTING_APPROX_COMPARE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_TESTING_APPROX_COMPARE_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ +namespace testing +{ +namespace impl +{ + +template +struct AbsoluteAndRelativeApproxCompareHelper; + +template +NBL_PARTIAL_REQ_TOP(concepts::FloatingPointLikeScalar) +struct AbsoluteAndRelativeApproxCompareHelper) > +{ + static bool __call(NBL_CONST_REF_ARG(FloatingPoint) lhs, NBL_CONST_REF_ARG(FloatingPoint) rhs, const float64_t maxAbsoluteDifference, const float64_t maxRelativeDifference) + { + // Absolute check first: catches small-magnitude values where relative comparison breaks down + if (hlsl::abs(float64_t(lhs) - float64_t(rhs)) <= maxAbsoluteDifference) + return true; + + // Fall back to relative comparison for larger values + return RelativeApproxCompareHelper::__call(lhs, rhs, maxRelativeDifference); + } +}; + +template +NBL_PARTIAL_REQ_TOP(concepts::FloatingPointLikeVectorial) +struct AbsoluteAndRelativeApproxCompareHelper) > +{ + static bool __call(NBL_CONST_REF_ARG(FloatingPointVector) lhs, NBL_CONST_REF_ARG(FloatingPointVector) rhs, const float64_t maxAbsoluteDifference, const float64_t maxRelativeDifference) + { + using traits = nbl::hlsl::vector_traits; + for (uint32_t i = 0; i < traits::Dimension; ++i) + { + if (!AbsoluteAndRelativeApproxCompareHelper::__call(lhs[i], rhs[i], maxAbsoluteDifference, maxRelativeDifference)) + return false; + } + + return true; + } +}; + +template +NBL_PARTIAL_REQ_TOP(concepts::Matricial && concepts::FloatingPointLikeScalar::scalar_type>) +struct AbsoluteAndRelativeApproxCompareHelper && concepts::FloatingPointLikeScalar::scalar_type>) > +{ + static bool __call(NBL_CONST_REF_ARG(FloatingPointMatrix) lhs, NBL_CONST_REF_ARG(FloatingPointMatrix) rhs, const float64_t maxAbsoluteDifference, const float64_t maxRelativeDifference) + { + using traits = nbl::hlsl::matrix_traits; + for (uint32_t i = 0; i < traits::RowCount; ++i) + { + if (!AbsoluteAndRelativeApproxCompareHelper::__call(lhs[i], rhs[i], maxAbsoluteDifference, maxRelativeDifference)) + return false; + } + + return true; + } +}; + +} + +// Composite comparator that builds on top of relativeApproxCompare. +// Checks absolute difference first (handles small-magnitude values where +// relative comparison breaks down), then falls back to relative comparison. +template +bool approxCompare(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs, const float64_t maxAbsoluteDifference, const float64_t maxRelativeDifference) +{ + return impl::AbsoluteAndRelativeApproxCompareHelper::__call(lhs, rhs, maxAbsoluteDifference, maxRelativeDifference); +} + +} +} +} + +#endif diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index d228be8ea4..da8a6c9fa1 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -280,6 +280,8 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/projected_spherical_ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/spherical_rectangle.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/cos_weighted_spheres.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/quotient_and_pdf.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/value_and_pdf.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/concepts.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/uniform_spheres.hlsl") # LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ndarray_addressing.hlsl") @@ -317,6 +319,12 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/bxdf/transmission/oren_nayar. LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/bxdf/transmission/smooth_dielectric.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/bxdf/transmission/iridescent.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/bxdf/transmission/delta_distribution.hlsl") +#path tracing +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/path_tracing/concepts.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/path_tracing/unidirectional.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/path_tracing/default_accumulator.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/path_tracing/gaussian_filter.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/path_tracing/basic_ray_gen.hlsl") #subgroup LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/subgroup/ballot.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/subgroup/basic.hlsl") @@ -386,6 +394,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/rwmc/ResolveParameters.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/morton.hlsl") #testing LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/testing/relative_approx_compare.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/testing/approx_compare.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/testing/orientation_compare.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/testing/vector_length_compare.hlsl")