diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/ColorLib.h b/src/OpenColorIO/ops/fixedfunction/ACES2/ColorLib.h index f10ddf3d56..426e31f271 100644 --- a/src/OpenColorIO/ops/fixedfunction/ACES2/ColorLib.h +++ b/src/OpenColorIO/ops/fixedfunction/ACES2/ColorLib.h @@ -7,6 +7,7 @@ #include "transforms/builtins/ColorMatrixHelpers.h" #include "MatrixLib.h" +#include namespace OCIO_NAMESPACE { @@ -14,30 +15,6 @@ namespace OCIO_NAMESPACE namespace ACES2 { -inline f3 HSV_to_RGB(const f3 &HSV) -{ - const float C = HSV[2] * HSV[1]; - const float X = C * (1.f - std::abs(std::fmod(HSV[0] * 6.f, 2.f) - 1.f)); - const float m = HSV[2] - C; - - f3 RGB{}; - if (HSV[0] < 1.f/6.f) { - RGB = {C, X, 0.f}; - } else if (HSV[0] < 2./6.) { - RGB = {X, C, 0.f}; - } else if (HSV[0] < 3./6.) { - RGB = {0.f, C, X}; - } else if (HSV[0] < 4./6.) { - RGB = {0.f, X, C}; - } else if (HSV[0] < 5./6.) { - RGB = {X, 0.f, C}; - } else { - RGB = {C, 0.f, X}; - } - RGB = add_f_f3(m, RGB); - return RGB; -} - inline m33f RGBtoXYZ_f33(const Primaries &C) { return m33_from_ocio_matrix_array( diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h index aeb1524071..18f0123636 100644 --- a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h +++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h @@ -7,44 +7,98 @@ #include "MatrixLib.h" #include "ColorLib.h" +#include namespace OCIO_NAMESPACE { namespace ACES2 { +constexpr float PI = 3.14159265358979f; -constexpr int TABLE_SIZE = 360; -constexpr int TABLE_ADDITION_ENTRIES = 2; -constexpr int TABLE_TOTAL_SIZE = TABLE_SIZE + TABLE_ADDITION_ENTRIES; -constexpr int GAMUT_TABLE_BASE_INDEX = 1; +constexpr float hue_limit = 360.0f; +//constexpr float hue_limit = 2.0f * PI; +inline float _wrap_to_hue_limit(float y) +{ + if ( y < 0.f) + { + y = y + hue_limit; + } + return y; +} + +inline float wrap_to_hue_limit(float hue) +{ + float y = std::fmod(hue, hue_limit); + return _wrap_to_hue_limit(y); +} +inline constexpr float to_degrees(const float v) { return v; } +inline float from_degrees(const float v) { return wrap_to_hue_limit(v); } +inline constexpr float to_radians(const float v) { return PI * v / 180.0f; }; +inline float _from_radians(const float v) { return _wrap_to_hue_limit(180.0f * v / PI); }; // v needs to be wrapped already +inline float from_radians(const float v) { return wrap_to_hue_limit(180.0f * v / PI); }; +/* +inline constexpr float to_degrees(const float v) { return 180.0f * v / PI; } +inline float from_degrees(const float v) { return wrap_to_hue_limit(PI * v / 180.0f); } +inline constexpr float to_radians(const float v) { return v; } +inline float _from_radians(const float v) { return _wrap_to_hue_limit(v); }; +inline float from_radians(const float v) { return wrap_to_hue_limit(v); }; +*/ + +struct TableBase +{ + static constexpr unsigned int _TABLE_ADDITION_ENTRIES = 2; + static constexpr unsigned int base_index = 1; + static constexpr unsigned int nominal_size = 360; + static constexpr unsigned int total_size = nominal_size + _TABLE_ADDITION_ENTRIES; + + static constexpr unsigned int lower_wrap_index = 0; + static constexpr unsigned int upper_wrap_index = base_index + nominal_size; + static constexpr unsigned int first_nominal_index = base_index; + static constexpr unsigned int last_nominal_index = upper_wrap_index - 1; + + inline float base_hue_for_position(unsigned int i_lo) const + { + if (hue_limit == float(nominal_size)) // TODO C++ 17 if constexpr + return float(i_lo); + + const float result = i_lo * hue_limit / nominal_size; + return result; + } + + inline unsigned int hue_position_in_uniform_table(float wrapped_hue) const + { + if (hue_limit == float(nominal_size)) // TODO C++ 17 if constexpr + return static_cast(wrapped_hue); + else + return static_cast(wrapped_hue / hue_limit * float(nominal_size)); // TODO: can we use the 'lost' fraction for the lerps? + } + + inline unsigned int nominal_hue_position_in_uniform_table(float wrapped_hue) const + { + return first_nominal_index + hue_position_in_uniform_table(wrapped_hue); + } +}; -struct Table3D +struct Table3D : public TableBase, std::array { - static constexpr int base_index = GAMUT_TABLE_BASE_INDEX; - static constexpr int size = TABLE_SIZE; - static constexpr int total_size = TABLE_TOTAL_SIZE; - float table[TABLE_TOTAL_SIZE][3]; }; -struct Table1D +struct Table1D : public TableBase, std::array { - static constexpr int base_index = GAMUT_TABLE_BASE_INDEX; - static constexpr int size = TABLE_SIZE; - static constexpr int total_size = TABLE_TOTAL_SIZE; - float table[TABLE_TOTAL_SIZE]; }; struct JMhParams { - float F_L; - float z; - float A_w; + m33f MATRIX_RGB_to_CAM16_c; + m33f MATRIX_CAM16_c_to_RGB; + m33f MATRIX_cone_response_to_Aab; + m33f MATRIX_Aab_to_cone_response; + float F_L_n; // F_L normalised + float cz; + float inv_cz; // 1/cz float A_w_J; - f3 XYZ_w; - f3 D_RGB; - m33f MATRIX_RGB_to_CAM16; - m33f MATRIX_CAM16_to_RGB; + float inv_A_w_J; // 1/A_w_J }; struct ToneScaleParams @@ -57,41 +111,63 @@ struct ToneScaleParams float s_2; float u_2; float m_2; + float forward_limit; + float inverse_limit; + float log_peak; }; -struct ChromaCompressParams +struct SharedCompressionParameters { float limit_J_max; - float model_gamma; + float model_gamma_inv; + Table1D reach_m_table; +}; + +struct ResolvedSharedCompressionParameters +{ + float limit_J_max; + float model_gamma_inv; + float reachMaxM; +}; + +struct ChromaCompressParams +{ float sat; float sat_thr; float compr; - Table1D reach_m_table; float chroma_compress_scale; static constexpr float cusp_mid_blend = 1.3f; }; +struct HueDependantGamutParams +{ + float gamma_bottom_inv; + f2 JMcusp; + float gamma_top_inv; + float focusJ; + float analytical_threshold; +}; struct GamutCompressParams { - float limit_J_max; float mid_J; - float model_gamma; float focus_dist; - float lower_hull_gamma; - Table1D reach_m_table; + float lower_hull_gamma_inv; + std::array hue_linearity_search_range; + Table1D hue_table;; Table3D gamut_cusp_table; - Table1D upper_hull_gamma_table; }; // CAM constexpr float reference_luminance = 100.f; constexpr float L_A = 100.f; constexpr float Y_b = 20.f; -constexpr float ac_resp = 1.f; -constexpr float ra = 2.f * ac_resp; -constexpr float ba = 0.05f + (2.f - ra); constexpr f3 surround = {0.9f, 0.59f, 0.9f}; // Dim surround +constexpr float J_scale = 100.0f; +constexpr float cam_nl_Y_reference = 100.0f; +constexpr float cam_nl_offset = 0.2713f * cam_nl_Y_reference; +constexpr float cam_nl_scale = 4.0f * cam_nl_Y_reference; + // Chroma compression constexpr float chroma_compress = 2.4f; constexpr float chroma_compress_fact = 3.3f; @@ -100,11 +176,11 @@ constexpr float chroma_expand_fact = 0.69f; constexpr float chroma_expand_thr = 0.5f; // Gamut compression -constexpr float smooth_cusps = 0.12f; +constexpr float smooth_cusps = 0.12f; // C++ 14 required for constexpr std::max(0.000001f, 0.12f); constexpr float smooth_m = 0.27f; constexpr float cusp_mid_blend = 1.3f; constexpr float focus_gain_blend = 0.3f; -constexpr float focus_adjust_gain = 0.55f; +constexpr float focus_adjust_gain_inv = 1.0f / 0.55f; constexpr float focus_distance = 1.35f; constexpr float focus_distance_scaling = 1.75f; constexpr float compression_threshold = 0.75f; @@ -125,6 +201,11 @@ constexpr float gammaMaximum = 5.0f; constexpr float gammaSearchStep = 0.4f; constexpr float gammaAccuracy = 1e-5f; +constexpr int cuspCornerCount = 6; +constexpr int totalCornerCount = cuspCornerCount + 2; +constexpr int max_sorted_corners = 2 * cuspCornerCount; +constexpr float reach_cusp_tolerance = 1e-3f; +constexpr float display_cusp_tolerance = 1e-7f; } // namespace ACES2 diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/MatrixLib.h b/src/OpenColorIO/ops/fixedfunction/ACES2/MatrixLib.h index 27a24df0c9..d552742ccb 100644 --- a/src/OpenColorIO/ops/fixedfunction/ACES2/MatrixLib.h +++ b/src/OpenColorIO/ops/fixedfunction/ACES2/MatrixLib.h @@ -6,6 +6,7 @@ #include "ops/matrix/MatrixOpData.h" +#include namespace OCIO_NAMESPACE { diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp index 80b8ba5c72..3e23f59a27 100644 --- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp +++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp @@ -3,6 +3,10 @@ #include "Transform.h" +#include +#include +#include +#include namespace OCIO_NAMESPACE { @@ -13,48 +17,47 @@ namespace ACES2 // // Table lookups // - -float wrap_to_360(float hue) +inline f3 lerp(const f3& lower, const f3& upper, const float t) { - float y = std::fmod(hue, 360.f); - if ( y < 0.f) - { - y = y + 360.f; - } - return y; + return { + lerpf(lower[0], upper[0], t), + lerpf(lower[1], upper[1], t), + lerpf(lower[2], upper[2], t) + }; } -float base_hue_for_position(int i_lo, int table_size) +inline f3 lerp(const float lower[3], const float upper[3], const float t) { - const float result = i_lo * 360.f / table_size; - return result; + return { + lerpf(lower[0], upper[0], t), + lerpf(lower[1], upper[1], t), + lerpf(lower[2], upper[2], t) + }; } -int hue_position_in_uniform_table(float hue, int table_size) +inline unsigned int midpoint(unsigned int a, unsigned int b) { - const float wrapped_hue = wrap_to_360(hue); - return int(wrapped_hue / 360.f * (float) table_size); + return (a + b) / 2; } -int next_position_in_table(int entry, int table_size) +inline float midpoint(float a, float b) { - return (entry + 1) % table_size; + return (a + b) / 2.f; } -int clamp_to_table_bounds(int entry, int table_size) +unsigned int lookup_hue_interval(float h, const Table1D &hues, const std::array & hue_linearity_search_range) { - return std::min(table_size - 1, std::max(0, entry)); -} + // Search the given Table for the interval containing the desired hue + // Returns the upper index of the interval -f2 cusp_from_table(float h, const Table3D >) -{ - int i_lo = 0; - int i_hi = gt.base_index + gt.size; // allowed as we have an extra entry in the table - int i = clamp_to_table_bounds(hue_position_in_uniform_table(h, gt.size) + gt.base_index, gt.total_size); + // We can narrow the search range based on the hues being almost uniform + unsigned int i = hues.nominal_hue_position_in_uniform_table(h); + unsigned int i_lo = std::max(int(hues.lower_wrap_index), int(i) + hue_linearity_search_range[0]); // Should be nominal not lower_wrap? + unsigned int i_hi = std::min(int(hues.upper_wrap_index), int(i) + hue_linearity_search_range[1]); while (i_lo + 1 < i_hi) { - if (h > gt.table[i][2]) + if (h > hues[i]) { i_lo = i; } @@ -62,165 +65,273 @@ f2 cusp_from_table(float h, const Table3D >) { i_hi = i; } - i = clamp_to_table_bounds((i_lo + i_hi) / 2, gt.total_size); + i = midpoint(i_lo, i_hi); } - i_hi = std::max(1, i_hi); - - const f3 lo { - gt.table[i_hi-1][0], - gt.table[i_hi-1][1], - gt.table[i_hi-1][2] - }; - - const f3 hi { - gt.table[i_hi][0], - gt.table[i_hi][1], - gt.table[i_hi][2] - }; - - const float t = (h - lo[2]) / (hi[2] - lo[2]); - const float cuspJ = lerpf(lo[0], hi[0], t); - const float cuspM = lerpf(lo[1], hi[1], t); + i_hi = std::max(1U, i_hi); // TODO: should not be needed if we initialise with the correct lo and hi - return { cuspJ, cuspM }; + return i_hi; } -float reach_m_from_table(float h, const ACES2::Table1D >) +inline float interpolation_weight(float h, float h_lo, float h_hi) { - const int i_lo = clamp_to_table_bounds(hue_position_in_uniform_table(h, gt.size), gt.total_size); - const int i_hi = clamp_to_table_bounds(next_position_in_table(i_lo, gt.size), gt.total_size); - - const float t = (h - i_lo) / (i_hi - i_lo); - return lerpf(gt.table[i_lo], gt.table[i_hi], t); + return (h - h_lo) / (h_hi - h_lo); } -float hue_dependent_upper_hull_gamma(float h, const ACES2::Table1D >) +inline float interpolation_weight(float h, unsigned int h_lo, unsigned int h_hi) { - const int i_lo = clamp_to_table_bounds(hue_position_in_uniform_table(h, gt.size) + gt.base_index, gt.total_size); - const int i_hi = clamp_to_table_bounds(next_position_in_table(i_lo, gt.size), gt.total_size); + return (h - h_lo) / (h_hi - h_lo); +} - const float base_hue = (float) (i_lo - gt.base_index); +inline f3 cusp_from_table(unsigned int i_hi, float t, const Table3D >) +{ + return lerp(gt[i_hi-1], gt[i_hi], t); +} - const float t = wrap_to_360(h) - base_hue; +float reach_m_from_table(float h, const ACES2::Table1D &rt) +{ + const unsigned int base = rt.hue_position_in_uniform_table(h); + const float t = h - base; // NOTE assumes uniform 1 degree 360 spacing + const unsigned int i_lo = base + rt.first_nominal_index; + const unsigned int i_hi = i_lo + 1; // NOTE assumes uniform 1 degree 360 spacing - return lerpf(gt.table[i_lo], gt.table[i_hi], t); + return lerpf(rt[i_lo], rt[i_hi], t); } // // CAM // -// Post adaptation non linear response compression -float panlrc_forward(float v, float F_L) +inline float _post_adaptation_cone_response_compression_fwd(float Rc) +{ + const float F_L_Y = powf(Rc, 0.42f); + const float Ra = (F_L_Y) / (cam_nl_offset + F_L_Y); + return Ra; +} + +inline float _post_adaptation_cone_response_compression_inv(float Ra) { - const float F_L_v = powf(F_L * std::abs(v) / reference_luminance, 0.42f); + const float F_L_Y = (cam_nl_offset * Ra) / (1.0f - Ra); // TODO: what happens when Ra >= 1.0 + const float Rc = powf(F_L_Y, 1.f / 0.42f); + return Rc; +} + + +float post_adaptation_cone_response_compression_fwd(float v) +{ + const float abs_v = std::abs(v); + const float Ra = _post_adaptation_cone_response_compression_fwd(abs_v); // Note that std::copysign(1.f, 0.f) returns 1 but the CTL copysign(1.,0.) returns 0. // TODO: Should we change the behaviour? - return (400.f * std::copysign(1.f, v) * F_L_v) / (27.13f + F_L_v); + return std::copysign(Ra, v); } -float panlrc_inverse(float v, float F_L) +float post_adaptation_cone_response_compression_inv(float v) { - return std::copysign(1.f, v) * reference_luminance / F_L * powf((27.13f * std::abs(v) / (400.f - std::abs(v))), 1.f / 0.42f); + const float abs_v = std::abs(v); + const float Rc = _post_adaptation_cone_response_compression_inv(abs_v); + return std::copysign(Rc, v); } -// Optimization used during initialization -float Y_to_J(float Y, const JMhParams ¶ms) +inline float Achromatic_n_to_J(float A, const float cz) { - float F_L_Y = powf(params.F_L * std::abs(Y) / reference_luminance, 0.42f); - return std::copysign(1.f, Y) * reference_luminance * powf(((400.f * F_L_Y) / (27.13f + F_L_Y)) / params.A_w_J, surround[1] * params.z); + return J_scale * powf(A, cz); } -f3 RGB_to_JMh(const f3 &RGB, const JMhParams &p) +inline float J_to_Achromatic_n(float J, const float inv_cz) { - const float red = RGB[0]; - const float grn = RGB[1]; - const float blu = RGB[2]; + return powf(J * (1.0f / J_scale), inv_cz); +} - const float red_m = red * p.MATRIX_RGB_to_CAM16[0] + grn * p.MATRIX_RGB_to_CAM16[1] + blu * p.MATRIX_RGB_to_CAM16[2]; - const float grn_m = red * p.MATRIX_RGB_to_CAM16[3] + grn * p.MATRIX_RGB_to_CAM16[4] + blu * p.MATRIX_RGB_to_CAM16[5]; - const float blu_m = red * p.MATRIX_RGB_to_CAM16[6] + grn * p.MATRIX_RGB_to_CAM16[7] + blu * p.MATRIX_RGB_to_CAM16[8]; +// Optimization for achromatic values - const float red_a = panlrc_forward(red_m * p.D_RGB[0], p.F_L); - const float grn_a = panlrc_forward(grn_m * p.D_RGB[1], p.F_L); - const float blu_a = panlrc_forward(blu_m * p.D_RGB[2], p.F_L); +inline float _A_to_Y(float A, const JMhParams &p) +{ + const float Ra = p.A_w_J * A; + const float Y = _post_adaptation_cone_response_compression_inv(Ra) / p.F_L_n; + return Y; +} - const float A = 2.f * red_a + grn_a + 0.05f * blu_a; - const float a = red_a - 12.f * grn_a / 11.f + blu_a / 11.f; - const float b = (red_a + grn_a - 2.f * blu_a) / 9.f; +inline float _J_to_Y(float abs_J, const JMhParams &p) +{ + return _A_to_Y(J_to_Achromatic_n(abs_J, p.inv_cz), p); +} - const float J = 100.f * powf(A / p.A_w, surround[1] * p.z); +inline float _Y_to_J(float abs_Y, const JMhParams &p) +{ + const float Ra = _post_adaptation_cone_response_compression_fwd(abs_Y * p.F_L_n); + const float J = Achromatic_n_to_J(Ra * p.inv_A_w_J, p.cz); + return J; +} + +float Y_to_J(float Y, const JMhParams &p) +{ + const float abs_Y = std::abs(Y); + const float J = _Y_to_J(abs_Y, p); + return std::copysign(J, Y); +} + +f3 RGB_to_Aab(const f3 &RGB, const JMhParams &p) +{ + const f3 rgb_m = mult_f3_f33(RGB, p.MATRIX_RGB_to_CAM16_c); + + const f3 rgb_a = { + post_adaptation_cone_response_compression_fwd(rgb_m[0]), + post_adaptation_cone_response_compression_fwd(rgb_m[1]), + post_adaptation_cone_response_compression_fwd(rgb_m[2]) + }; - const float M = J == 0.f ? 0.f : 43.f * surround[2] * sqrt(a * a + b * b); + const f3 Aab = mult_f3_f33(rgb_a, p.MATRIX_cone_response_to_Aab); + return Aab; +} - const float PI = 3.14159265358979f; - const float h_rad = std::atan2(b, a); - float h = std::fmod(h_rad * 180.f / PI, 360.f); - if (h < 0.f) +f3 Aab_to_JMh(const f3 &Aab, const JMhParams &p) +{ + if (Aab[0] <= 0.f) { - h += 360.f; + return {0.f, 0.f, 0.f}; } + const float J = Achromatic_n_to_J(Aab[0], p.cz); + const float M = sqrt(Aab[1] * Aab[1] + Aab[2] * Aab[2]); + const float h_rad = std::atan2(Aab[2], Aab[1]); + float h = _from_radians(h_rad); // Call to unwrapped hue version due to atan2 limits return {J, M, h}; } -f3 JMh_to_RGB(const f3 &JMh, const JMhParams &p) +f3 RGB_to_JMh(const f3 &RGB, const JMhParams &p) +{ + const f3 Aab = RGB_to_Aab(RGB, p); + const f3 JMh = Aab_to_JMh(Aab, p); + return JMh; +} + +f3 JMh_to_Aab(const f3 &JMh, const float &cos_hr, const float &sin_hr, const JMhParams &p) { const float J = JMh[0]; const float M = JMh[1]; - const float h = JMh[2]; - const float PI = 3.14159265358979f; - const float h_rad = h * PI / 180.f; + const float A = J_to_Achromatic_n(J, p.inv_cz); + const float a = M * cos_hr; + const float b = M * sin_hr; + return {A, a, b}; +} + +f3 JMh_to_Aab(const f3 &JMh, const JMhParams &p) +{ + const float h = JMh[2]; + const float h_rad = to_radians(h); + const float cos_hr = cos(h_rad); + const float sin_hr = sin(h_rad); - const float scale = M / (43.f * surround[2]); - const float A = p.A_w * powf(J / 100.f, 1.f / (surround[1] * p.z)); - const float a = scale * cos(h_rad); - const float b = scale * sin(h_rad); + return JMh_to_Aab(JMh, cos_hr, sin_hr, p); +} - const float red_a = (460.f * A + 451.f * a + 288.f * b) / 1403.f; - const float grn_a = (460.f * A - 891.f * a - 261.f * b) / 1403.f; - const float blu_a = (460.f * A - 220.f * a - 6300.f * b) / 1403.f; +f3 Aab_to_RGB(const f3 &Aab, const JMhParams &p) +{ + const f3 rgb_a = mult_f3_f33(Aab, p.MATRIX_Aab_to_cone_response); - float red_m = panlrc_inverse(red_a, p.F_L) / p.D_RGB[0]; - float grn_m = panlrc_inverse(grn_a, p.F_L) / p.D_RGB[1]; - float blu_m = panlrc_inverse(blu_a, p.F_L) / p.D_RGB[2]; + const f3 rgb_m = { + post_adaptation_cone_response_compression_inv(rgb_a[0]), + post_adaptation_cone_response_compression_inv(rgb_a[1]), + post_adaptation_cone_response_compression_inv(rgb_a[2]) + }; - const float red = red_m * p.MATRIX_CAM16_to_RGB[0] + grn_m * p.MATRIX_CAM16_to_RGB[1] + blu_m * p.MATRIX_CAM16_to_RGB[2]; - const float grn = red_m * p.MATRIX_CAM16_to_RGB[3] + grn_m * p.MATRIX_CAM16_to_RGB[4] + blu_m * p.MATRIX_CAM16_to_RGB[5]; - const float blu = red_m * p.MATRIX_CAM16_to_RGB[6] + grn_m * p.MATRIX_CAM16_to_RGB[7] + blu_m * p.MATRIX_CAM16_to_RGB[8]; + const f3 rgb = mult_f3_f33(rgb_m, p.MATRIX_CAM16_c_to_RGB); + return rgb; +} - return {red, grn, blu}; +f3 JMh_to_RGB(const f3 &JMh, const JMhParams &p) +{ + const f3 Aab = JMh_to_Aab(JMh, p); + const f3 rgb = Aab_to_RGB(Aab, p); + return rgb; } // // Tonescale / Chroma compress // -float chroma_compress_norm(float h, float chroma_compress_scale) -{ - const float PI = 3.14159265358979f; - const float h_rad = h / 180.f * PI; - const float a = cos(h_rad); - const float b = sin(h_rad); - const float cos_hr2 = a * a - b * b; - const float sin_hr2 = 2.0f * a * b; - const float cos_hr3 = 4.0f * a * a * a - 3.0f * a; - const float sin_hr3 = 3.0f * b - 4.0f * b * b * b; +/* +//TODO: move to header +// https://stackoverflow.com/questions/6996764/fastest-way-to-do-horizontal-sse-vector-sum-or-other-reduction/35270026#35270026 +// https://gcc.godbolt.org/#compilers:!((compiler:g6,options:%27-xc+-O3+-Wall+-fverbose-asm+-march%3Dhaswell+-mno-avx%27,sourcez:MQSwdgxgNgrgJgUwAQB4QFt3gC4CdwB0AFgHwBQoksiqAztnDseWQPStLZEi1I9JQQAa2QBZAIa5x2AOS0ANEgBGMbJwDuCcSLicA9kgh70ABxBRk2A0oTZsCXIb2ICbDtHFgA5khhgAZnq42H7SCFAAnkiIECCIvFa%2BtMjoegBu4ia0rLREMP4muuD0Wrp6/kipaURQWWT%2BUHrSSES0MOgA%2BlkdtMkAjAAUHR3ofQBMABxIaQCUSADeSEvLK6tr62vs00gAvEgA2kgAIkgAwkgAPkgAQkgAgkgAumQrw6OTSLn5y3sjnV/%2BBoILq0AZpRTgpAdUSiDoAZQAEgBVABiKIAMgBRAZjRQAZkUAAZFH0ZjMANxLLaHc4nK63B7PV4jcZTNroXhLX6YDriOBwEFgxQAimbDjs3h7Q5HADU51OMrpNxlDzuMtuTOWAI2XKhPKqCBqgoBwvatFFqy2B2W5yWStl8sVTypHAsai4yCMpnMDiQ4jSejifsq6WUUVwCBgtHAPgBLy1Zp1uz1nT5At6Awlwry/gp8aWEZCuDArx5EDS2F6HX8eLGmbNeYAvmR6o1mq12iCesk8UMWR9Zgt81D%2B2yc8m/iN0oa4DATILZpT1lslLgmnAIOJ6Ehwgh0AgwJWkAS%2BvokLjCcO3qzPonuan%2BQvs/k8yttet71O0oaoMac6aOVFLZuC8IgWnEKAKgAWhIAQ9HUcDIOHCUNk/NNu3rDln1zclh0LGBiw2Sdy0rWhq1rTDzVw5sWy2Wh0AgixHCMGhowALwQRQVDUKMYEYqJaEaTRHD8CxeicRAoPY5B%2BHEJATDXJQLHQVsmjUDtOm6Ig0z7d4pkHeYr1HW8OQnHltMfLIhWmV8E1Mj8Uw6CyBSsrMJVsgtbAIkt1mIisqxrOt3OoltQH8RAKmGO4ADUAA1hlU9t2TGABWAA2Lt/QAD101K0psodmT06YhN1P48o6Td6CyCqslZMEPJHYrqhAUCzPQCqECyvBxAgbB/FZJ8kFJSlgNasDWSM5qb3a3lLNBNIhIhECiAtZYtjTTgiGQUrJpWfDCI0rtegQXs0lZNakw2sAijAQQwEsbbPh7aYHGjPQwEUdRuAgMD%2BD0ExsAwCCkECRwYti4ctgGMADHULd7Fu%2BhcBgPqQA%2BhQ/Ruv0oCgJByi2vc/QjQmkAAFluLAwAwdoZjIZtgAPRh/BbNx2HZrY4D0GAlIQMguZ5iwWnZLp02SOtr0mXQ0jgOZFiTBWtjSZNDluK5GSmm8/AisqeW1hB/HABAXNBS6FY2TmPpkNR1CCXAIi%2B5APsiJJkDTOEjkUB7v0cKxUe4bxSa8SQlHELxkCUEAj3UKOwM8JAUQABT9flNY%2BAFsFMT8DSNKz9f8RRiIRwoF1li0tg9En%2BFhkNqlqOA04mXR30/KrsG6QpMxzTOTEag6fL8ysBX8NKyaGHl0NoOAwTgbCyRC2iOY5vhD1rMgcGFzsEDMWtuwQCXRxAJAsrligQHCg2RwhhKitZI/uFH3XOj8ExeqEbgOm3kBR4GLLFBPpcOoth4iggDBwnhdCwzAFBeIeBUZA2/PcOKAhbC8CjJ8f0bswz2CQDHLg3M1APWNjGYMVQKDhGSI3e%2B38yZPx6DmIEn8d51j/lCGE8JkRoixAMPoRJ8SKDGPPchN1z5UJMo/NCj4v4UQfmTf%2BjVJYTGobWOhAIgSNCYSAPoaVMKj0LuwxEqIMTYl4UgYkx4BHzxWFsOE8MTCk1KtgW2O5lIHkrGI9kKjJECmkUFdoeiWggFrH3LyhFB7RkGtGCinjBGAOsRwOEcJMRjFrg3eJ/dHJdR6n1TRMioBEjNjYpJcidzfhLPgmAbUo5OG9GJM8VQkb2D5PjCoclBD2CkHjEwWTcC6FwLWeQWVMDyEvDRDeR1fHdh7D0ISPQGK4xYggXSd9j4zEMisLKs1nK5JYfI3CGytnoV8b/PZeFQklnCVoyYPQgksKbKzReKdMiI1BmudASAJBSFkLwCGLQ7AmAAFzsHoG/acuAGjwQIF6VgABHGACB6DozANkPotY%2BgAE4%2BhkzSqwIg8EoJWCku0KCHV0pQTxfgNiH1sD8VYKisYDKyZ9EJMAeljLmWJXUiLboDEvnLI%2BCfQqywjDIrUIokyABFPiug9hZX2cKjGYrjKNCOHxPGex2RSr5PKl0gTVUgz2AMJAUFFAmuPniJAMpj4AHZ/4pKtVlfKdMVgiu3OK7g%2Br1WORzr%2BVy7QtWz0ldKxqdF2ieuTEas1ZqsqngdRah1KVLU2v/oSJNWV7XH1oQ6p1w5XVKuKuycN3jBQqrVYoD1aqQ2ugMIa41pr63H1TQ6jNWUs3HxzS6xVTUbyNGTIWtVOqkBjQjXW0d0bY3mrTYmh11qkDOoVaK7tHxuCzTURYY0Yay0mU9USGNVaTIjqjQ2rKTbj4TvTWm%2BNmap1pvyjOuduau3ivZLNSeoJGjlpAOXDg6EsjRANlob5hgoCeC8HIT4DChYAyBlgDiuAzlFguWWfyZFAqYXuUAA%3D%3D)),filterAsm:(binary:!t,commentOnly:!t,directives:!t,intel:!t,labels:!t),version:3 +// Alternate https://github.com/vectorclass/version2/blob/master/vectorf128.h#L1048 + +#if OCIO_USE_SSE2 || OCIO_USE_AVX +inline float hsum_ps_sse1(__m128 v) { // v = [ D C | B A ] + __m128 shuf = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 3, 0, 1)); // shuf = [ C D | A B ] + __m128 sums = _mm_add_ps(v, shuf); // sums = [ D+C C+D | B+A A+B ] + shuf = _mm_movehl_ps(shuf, sums); // shuf = [ C D | D+C C+D ] + sums = _mm_add_ss(sums, shuf); // sums = [ D+C C+D | B+A A+B+C+D ] + return _mm_cvtss_f32(sums); // A+B+C+D +} +#endif + +#ifdef OCIO_USE_AVX +inline float hsum256_ps_avx(__m256 v) { // v = [ H G | F E | D C | B A ] + __m128 vlow = _mm256_castps256_ps128(v); // vlow = [ D C | B A ] + __m128 vhigh = _mm256_extractf128_ps(v, 1); // vhigh = [ H G | F E ] + __m128 v128 = _mm_add_ps(vlow, vhigh); // v128 = [ H+D G+C | F+B E+A ] + return hsum_ps_sse1(v128); +} +#endif +#endif +*/ - const float M = 11.34072f * a + - 16.46899f * cos_hr2 + - 7.88380f * cos_hr3 + - 14.66441f * b + - -6.37224f * sin_hr2 + - 9.19364f * sin_hr3 + - 77.12896f; +float chroma_compress_norm(float cos_hr1, float sin_hr1, float chroma_compress_scale) +{ + const float cos_hr2 = 2.0f * cos_hr1 * cos_hr1 - 1.0f; + const float sin_hr2 = 2.0f * cos_hr1 * sin_hr1; + const float cos_hr3 = 4.0f * cos_hr1 * cos_hr1 * cos_hr1 - 3.0f * cos_hr1; + const float sin_hr3 = 3.0f * sin_hr1 - 4.0f * sin_hr1 * sin_hr1 * sin_hr1; + + constexpr unsigned int AVX_ALIGNMENT = 32; + alignas(AVX_ALIGNMENT) const float trig_angles_hr[8] = { + cos_hr1, cos_hr2, cos_hr3, 0.0f, + sin_hr1, sin_hr2, sin_hr3, 1.0f + }; + alignas(AVX_ALIGNMENT) static constexpr float weights[8] = { // TODO: investigate reordering of the entries so we are summing equal magnitude values first? + 11.34072f, 16.46899f, 7.88380f, 0.0f, + 14.66441f, -6.37224f, 9.19364f, 77.12896f + }; - return M * chroma_compress_scale; + /* + // TODO: benchmark this across multiple platforms to justify the multiple code paths. +#if OCIO_USE_SSE2 +#if OCIO_USE_AVX + __m256 trigs = _mm256_load_ps(trig_angles_hr); + __m256 trig_weights = _mm256_load_ps(weights); + __m256 t1 = _mm256_mul_ps(trigs, trig_weights); + const float M = hsum256_ps_avx(t1); +#else + __m128 cosines = _mm_load_ps(trig_angles_hr); + __m128 sines = _mm_load_ps(&trig_angles_hr[4]); + __m128 cosine_weights = _mm_load_ps(weights); + __m128 sine_weights = _mm_load_ps(&weights[4]); + + __m128 t1 = _mm_mul_ps(cosines, cosine_weights); + __m128 t2 = _mm_mul_ps(sines, sine_weights); + __m128 t3 = _mm_add_ps(t1, t2); // TODO use fmadd_ps_sse2 from Lut1DOpCPU_SSE2.cpp ? + const float M = hsum_ps_sse1(t3); +#endif +#else + */ + const float M = weights[0] * trig_angles_hr[0] + + weights[1] * trig_angles_hr[1] + + weights[2] * trig_angles_hr[2] + + weights[4] * trig_angles_hr[4] + + weights[5] * trig_angles_hr[5] + + weights[6] * trig_angles_hr[6] + + weights[7]; + /* +#endif + */ + + return M * chroma_compress_scale; // TODO: is it worth prescaling the above weights? } -float toe_fwd( float x, float limit, float k1_in, float k2_in) +inline float toe_fwd( float x, float limit, float k1_in, float k2_in) { if (x > limit) { @@ -230,10 +341,13 @@ float toe_fwd( float x, float limit, float k1_in, float k2_in) const float k2 = std::max(k2_in, 0.001f); const float k1 = sqrt(k1_in * k1_in + k2 * k2); const float k3 = (limit + k1) / (limit + k2); - return 0.5f * (k3 * x - k1 + sqrt((k3 * x - k1) * (k3 * x - k1) + 4.f * k2 * k3 * x)); + + const float minus_b = k3 * x - k1; + const float minus_ac = k2 * k3 * x; // a is 1.0 + return 0.5f * (minus_b + sqrt(minus_b * minus_b + 4.f * minus_ac)); // a is 1.0, mins_b squared == b^2 } -float toe_inv( float x, float limit, float k1_in, float k2_in) +inline float toe_inv( float x, float limit, float k1_in, float k2_in) { if (x > limit) { @@ -246,33 +360,74 @@ float toe_inv( float x, float limit, float k1_in, float k2_in) return (x * x + k1 * x) / (k3 * (x + k2)); } -f3 tonescale_chroma_compress_fwd(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const ChromaCompressParams &pc) +template +inline float aces_tonescale(const float Y_in, const ToneScaleParams &pt) { - const float J = JMh[0]; - const float M = JMh[1]; - const float h = JMh[2]; + if (inverse) + { + const float Y_ts_norm = Y_in / reference_luminance; // TODO + const float Z = std::max(0.f, std::min(pt.inverse_limit, Y_ts_norm)); // TODO: could eliminate max in the context of the full tonescale + const float f = (Z + sqrt(Z * (4.f * pt.t_1 + Z))) / 2.f; + const float Y = pt.s_2 / (powf((pt.m_2 / f), (1.f / pt.g)) - 1.f); // TODO: investigate precomputing reciprocal m_2 and a negative power? may swap a division for a multiply? powf(pt.m_2_recip * f, (-1.f / pt.g)) + return Y; + } + const float f = pt.m_2 * powf(Y_in / (Y_in + pt.s_2), pt.g); + const float Y_ts = std::max(0.f, f * f / (f + pt.t_1)) * pt.n_r; // max prevents -ve values being output also handles division by zero possibility // TODO: can we eliminate the n_r scaling? should it be reference_luminance + return Y_ts; +} + +template +inline float tonescale(const float J, const JMhParams &p, const ToneScaleParams &pt) // TODO: consider computing tonescale from and to A rather than J to avoid extra pow() calls +{ // Tonescale applied in Y (convert to and from J) - const float A = p.A_w_J * powf(std::abs(J) / 100.f, 1.f / (surround[1] * p.z)); - const float Y = std::copysign(1.f, J) * 100.f / p.F_L * powf((27.13f * A) / (400.f - A), 1.f / 0.42f) / 100.f; + const float J_abs = std::abs(J); + const float Y_in = _J_to_Y(J_abs, p); + const float Y_out = aces_tonescale(Y_in, pt); + const float J_out = _Y_to_J(Y_out, p); + return std::copysign(J_out, J); +} - const float f = pt.m_2 * powf(std::max(0.f, Y) / (Y + pt.s_2), pt.g); - const float Y_ts = std::max(0.f, f * f / (f + pt.t_1)) * pt.n_r; +float tonescale_fwd(const float J, const JMhParams &p, const ToneScaleParams &pt) +{ + return tonescale(J, p, pt); +} + +float tonescale_inv(const float J, const JMhParams &p, const ToneScaleParams &pt) +{ + return tonescale(J, p, pt); +} - const float F_L_Y = powf(p.F_L * std::abs(Y_ts) / 100.f, 0.42f); - const float J_ts = std::copysign(1.f, Y_ts) * 100.f * powf(((400.f * F_L_Y) / (27.13f + F_L_Y)) / p.A_w_J, surround[1] * p.z); - // ChromaCompress +template +inline float tonescale_A_to_J(const float A, const JMhParams &p, const ToneScaleParams &pt) // TODO: consider computing tonescale from and to A rather than J to avoid extra pow() calls +{ + const float Y_in = _A_to_Y(A, p); + const float Y_out = aces_tonescale(Y_in, pt); + const float J_out = _Y_to_J(Y_out, p); + return std::copysign(J_out, A); +} + +float tonescale_A_to_J_fwd(const float A, const JMhParams &p, const ToneScaleParams &pt) +{ + return tonescale_A_to_J(A, p, pt); +} + +f3 chroma_compress_fwd(const f3 &JMh, const float J_ts, const float Mnorm, const ResolvedSharedCompressionParameters &pr, const ChromaCompressParams &pc) +{ + const float J = JMh[0]; + const float M = JMh[1]; + const float h = JMh[2]; + float M_cp = M; if (M != 0.0) { - const float nJ = J_ts / pc.limit_J_max; + const float nJ = J_ts / pr.limit_J_max; const float snJ = std::max(0.f, 1.f - nJ); - const float Mnorm = chroma_compress_norm(h, pc.chroma_compress_scale); - const float limit = powf(nJ, pc.model_gamma) * reach_m_from_table(h, pc.reach_m_table) / Mnorm; + const float limit = powf(nJ, pr.model_gamma_inv) * pr.reachMaxM / Mnorm; - M_cp = M * powf(J_ts / J, pc.model_gamma); + M_cp = M * powf(J_ts / J, pr.model_gamma_inv); M_cp = M_cp / Mnorm; M_cp = limit - toe_fwd(limit - M_cp, limit - 0.001f, snJ * pc.sat, sqrt(nJ * nJ + pc.sat_thr)); M_cp = toe_fwd(M_cp, limit, nJ * pc.compr, snJ); @@ -282,49 +437,45 @@ f3 tonescale_chroma_compress_fwd(const f3 &JMh, const JMhParams &p, const ToneSc return {J_ts, M_cp, h}; } -f3 tonescale_chroma_compress_inv(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const ChromaCompressParams &pc) +f3 chroma_compress_inv(const f3 &JMh, const float J, const float Mnorm, const ResolvedSharedCompressionParameters &pr, const ChromaCompressParams &pc) { const float J_ts = JMh[0]; const float M_cp = JMh[1]; const float h = JMh[2]; - - // Inverse Tonescale applied in Y (convert to and from J) - const float A = p.A_w_J * powf(std::abs(J_ts) / 100.f, 1.f / (surround[1] * p.z)); - const float Y_ts = std::copysign(1.f, J_ts) * 100.f / p.F_L * powf((27.13f * A) / (400.f - A), 1.f / 0.42f) / 100.f; - - const float Z = std::max(0.f, std::min(pt.n / (pt.u_2 * pt.n_r), Y_ts)); - const float ht = (Z + sqrt(Z * (4.f * pt.t_1 + Z))) / 2.f; - const float Y = pt.s_2 / (powf((pt.m_2 / ht), (1.f / pt.g)) - 1.f) * pt.n_r; - - const float F_L_Y = powf(p.F_L * std::abs(Y) / 100.f, 0.42f); - const float J = std::copysign(1.f, Y) * 100.f * powf(((400.f * F_L_Y) / (27.13f + F_L_Y)) / p.A_w_J, surround[1] * p.z); - - // Inverse ChromaCompress float M = M_cp; if (M_cp != 0.0) { - const float nJ = J_ts / pc.limit_J_max; + const float nJ = J_ts / pr.limit_J_max; const float snJ = std::max(0.f, 1.f - nJ); - const float Mnorm = chroma_compress_norm(h, pc.chroma_compress_scale); - const float limit = powf(nJ, pc.model_gamma) * reach_m_from_table(h, pc.reach_m_table) / Mnorm; + const float limit = powf(nJ, pr.model_gamma_inv) * pr.reachMaxM / Mnorm; M = M_cp / Mnorm; M = toe_inv(M, limit, nJ * pc.compr, snJ); M = limit - toe_inv(limit - M, limit - 0.001f, snJ * pc.sat, sqrt(nJ * nJ + pc.sat_thr)); M = M * Mnorm; - M = M * powf(J_ts / J, -pc.model_gamma); + M = M * powf(J_ts / J, -pr.model_gamma_inv); } return {J, M, h}; } -JMhParams init_JMhParams(const Primaries &P) +inline float model_gamma(void) { - JMhParams p; + // c * z nonlinearity + return surround[1] * (1.48f + sqrt(Y_b / reference_luminance)); +} + +JMhParams init_JMhParams(const Primaries &prims) +{ + const m33f base_cone_response_to_Aab = { + 2.0f, 1.0f, 1.0f / 20.0f, + 1.0f, -12.0f / 11.0f, 1.0f / 11.0f, + 1.0f / 9.0f, 1.0f / 9.0f, -2.0f / 9.0f + }; const m33f MATRIX_16 = XYZtoRGB_f33(CAM16::primaries); - const m33f RGB_to_XYZ = RGBtoXYZ_f33(P); + const m33f RGB_to_XYZ = RGBtoXYZ_f33(prims); const f3 XYZ_w = mult_f3_f33(f3_from_f(reference_luminance), RGB_to_XYZ); const float Y_W = XYZ_w[1]; @@ -332,16 +483,18 @@ JMhParams init_JMhParams(const Primaries &P) const f3 RGB_w = mult_f3_f33(XYZ_w, MATRIX_16); // Viewing condition dependent parameters - const float K = 1.f / (5.f * L_A + 1.f); - const float K4 = powf(K, 4.f); - const float N = Y_b / Y_W; + constexpr float K = 1.f / (5.f * L_A + 1.f); + constexpr float K4 = K * K * K * K; const float F_L = 0.2f * K4 * (5.f * L_A) + 0.1f * powf((1.f - K4), 2.f) * powf(5.f * L_A, 1.f/3.f); - const float z = 1.48f + sqrt(N); + + const float F_L_n = F_L / reference_luminance; + const float cz = model_gamma(); + const float inv_cz = 1.0f / cz; const f3 D_RGB = { - Y_W / RGB_w[0], - Y_W / RGB_w[1], - Y_W / RGB_w[2] + F_L_n * Y_W / RGB_w[0], + F_L_n * Y_W / RGB_w[1], + F_L_n * Y_W / RGB_w[2] }; const f3 RGB_WC { @@ -351,77 +504,349 @@ JMhParams init_JMhParams(const Primaries &P) }; const f3 RGB_AW = { - panlrc_forward(RGB_WC[0], F_L), - panlrc_forward(RGB_WC[1], F_L), - panlrc_forward(RGB_WC[2], F_L) + post_adaptation_cone_response_compression_fwd(RGB_WC[0]), + post_adaptation_cone_response_compression_fwd(RGB_WC[1]), + post_adaptation_cone_response_compression_fwd(RGB_WC[2]) }; - const float A_w = ra * RGB_AW[0] + RGB_AW[1] + ba * RGB_AW[2]; + const m33f cone_response_to_Aab = mult_f33_f33(scale_f33(Identity_M33, f3_from_f(cam_nl_scale)), base_cone_response_to_Aab); // TODO: this scale maybe ill conditioning the matrix + const float A_w = cone_response_to_Aab[0] * RGB_AW[0] + cone_response_to_Aab[1] * RGB_AW[1] + cone_response_to_Aab[2] * RGB_AW[2]; + const float A_w_J = _post_adaptation_cone_response_compression_fwd(F_L); + const float inv_A_w_J = 1.0f / A_w_J; - const float F_L_W = powf(F_L, 0.42f); - const float A_w_J = (400.f * F_L_W) / (27.13f + F_L_W); + // TODO: evaluate the condition number of the below matrices and consider extracting a power of 2 scale out of them may improve noise behaviour + // power of 2 to make cost of extra scaling limited vs generalised multiply/divide ? - p.XYZ_w = XYZ_w; - p.F_L = F_L; - p.z = z; - p.D_RGB = D_RGB; - p.A_w = A_w; - p.A_w_J = A_w_J; + // Note we are prescaling the CAM16 LMS responses to directly provide for chromatic adaptation. + const m33f MATRIX_RGB_to_CAM16 = mult_f33_f33(RGBtoRGB_f33(prims, CAM16::primaries), scale_f33(Identity_M33, f3_from_f(reference_luminance))); + const m33f MATRIX_RGB_to_CAM16_c = mult_f33_f33(scale_f33(Identity_M33, D_RGB), MATRIX_RGB_to_CAM16); + const m33f MATRIX_CAM16_c_to_RGB = invert_f33(MATRIX_RGB_to_CAM16_c); - p.MATRIX_RGB_to_CAM16 = mult_f33_f33(RGBtoRGB_f33(P, CAM16::primaries), scale_f33(Identity_M33, f3_from_f(100.f))); - p.MATRIX_CAM16_to_RGB = invert_f33(p.MATRIX_RGB_to_CAM16); + const m33f MATRIX_cone_response_to_Aab = { + cone_response_to_Aab[0] / A_w, cone_response_to_Aab[1] / A_w, cone_response_to_Aab[2] / A_w, + cone_response_to_Aab[3] * 43.f * surround[2], cone_response_to_Aab[4] * 43.f * surround[2], cone_response_to_Aab[5] * 43.f * surround[2], + cone_response_to_Aab[6] * 43.f * surround[2], cone_response_to_Aab[7] * 43.f * surround[2], cone_response_to_Aab[8] * 43.f * surround[2], + }; + const m33f MATRIX_Aab_to_cone_response = invert_f33(MATRIX_cone_response_to_Aab); + + JMhParams p = { + MATRIX_RGB_to_CAM16_c, + MATRIX_CAM16_c_to_RGB, + MATRIX_cone_response_to_Aab, + MATRIX_Aab_to_cone_response, + F_L_n, + cz, + inv_cz, + A_w_J, + inv_A_w_J + }; + return p; +} - return p; +inline f3 generate_unit_cube_cusp_corners(const unsigned int corner) +{ + // Generation order R, Y, G, C, B, M to ensure hues rotate in correct order + return {float(static_cast(((corner + 1) % cuspCornerCount) < 3)), + float(static_cast(((corner + 5) % cuspCornerCount) < 3)), + float(static_cast(((corner + 3) % cuspCornerCount) < 3)) + }; } -Table3D make_gamut_table(const Primaries &P, float peakLuminance) +void build_limiting_cusp_corners_tables(std::array& RGB_corners, std::array& JMh_corners, + const JMhParams ¶ms, const float peakLuminance) { - const JMhParams params = init_JMhParams(P); + // We calculate the RGB and JMh values for the limiting gamut cusp corners + // They are then arranged into a cycle with the lowest JMh value at [1] to allow for hue wrapping + std::array temp_RGB_corners; + std::array temp_JMh_corners; + unsigned int min_index = 0; + for (unsigned int i = 0; i != cuspCornerCount; ++i) + { + temp_RGB_corners[i] = mult_f_f3(peakLuminance / reference_luminance, generate_unit_cube_cusp_corners(i)); + temp_JMh_corners[i] = RGB_to_JMh(temp_RGB_corners[i], params); + if (temp_JMh_corners[i][2] < temp_JMh_corners[min_index][2]) + min_index = i; + } - Table3D gamutCuspTableUnsorted{}; - for (int i = 0; i < gamutCuspTableUnsorted.size; i++) + // Rotate entries placing lowest at [1] (not [0]) + for (unsigned int i = 0; i != cuspCornerCount; ++i) { - const float hNorm = (float) i / gamutCuspTableUnsorted.size; - const f3 HSV = {hNorm, 1., 1.}; - const f3 RGB = HSV_to_RGB(HSV); - const f3 scaledRGB = mult_f_f3(peakLuminance / reference_luminance, RGB); - const f3 JMh = RGB_to_JMh(scaledRGB, params); + RGB_corners[i + 1] = temp_RGB_corners[(i + min_index) % cuspCornerCount]; + JMh_corners[i + 1] = temp_JMh_corners[(i + min_index) % cuspCornerCount]; + } - gamutCuspTableUnsorted.table[i][0] = JMh[0]; - gamutCuspTableUnsorted.table[i][1] = JMh[1]; - gamutCuspTableUnsorted.table[i][2] = JMh[2]; + // Copy end elements to create a cycle + RGB_corners[0] = RGB_corners[cuspCornerCount]; + RGB_corners[cuspCornerCount + 1] = RGB_corners[1]; + JMh_corners[0] = JMh_corners[cuspCornerCount]; + JMh_corners[cuspCornerCount + 1] = JMh_corners[1]; + + // Wrap the hues, to maintain monotonicity these entries will fall outside [0.0, hue_limit) + JMh_corners[0][2] = JMh_corners[0][2] - hue_limit; + JMh_corners[cuspCornerCount + 1][2] = JMh_corners[cuspCornerCount + 1][2] + hue_limit; +} + +void find_reach_corners_table(std::array& JMh_corners, const JMhParams ¶ms, const float limitJ, const float maximum_source) +{ + // We need to find the value of JMh that corresponds to limitJ for each corner + // This is done by scaling the unit corners converting to JMh until the J value is near the limitJ + // As an optimisation we use the equivalent Achromatic value to search for the J value and avoid the + // non-linear transform during the search + // Strictly speaking we should only need to find the R, G and B "corners" as the reach is unbounded and + // as such does not form a cube, but is formed by the transformed 3 lower planes of the cube and + // the plane at J = limitJ + std::array temp_JMh_corners; + const float limitA = J_to_Achromatic_n(limitJ, params.inv_cz); + + unsigned int min_index = 0; + for (unsigned int i = 0; i != cuspCornerCount; ++i) + { + const f3 rgb_vector = generate_unit_cube_cusp_corners(i); + + float lower = 0.0f; + float upper = maximum_source; + while ((upper - lower) > reach_cusp_tolerance) + { + float test = midpoint(lower, upper); + const f3 test_corner = mult_f_f3(test, rgb_vector); + const float A = RGB_to_Aab(test_corner, params)[0]; + if (A < limitA) + { + lower = test; + } + else + { + upper = test; + } + if (A == limitA) + break; + } + temp_JMh_corners[i] = RGB_to_JMh(mult_f_f3(upper, rgb_vector), params); + + if (temp_JMh_corners[i][2] < temp_JMh_corners[min_index][2]) + min_index = i; } - int minhIndex = 0; - for (int i = 0; i < gamutCuspTableUnsorted.size; i++) + // Rotate entries placing lowest at [1] (not [0]) // TODO: could use std::rotate_copy or even the ranges vs in C++20 + for (unsigned int i = 0; i != cuspCornerCount; ++i) { - if ( gamutCuspTableUnsorted.table[i][2] < gamutCuspTableUnsorted.table[minhIndex][2]) - minhIndex = i; + JMh_corners[i + 1] = temp_JMh_corners[(i + min_index) % cuspCornerCount]; } - Table3D gamutCuspTable{}; - for (int i = 0; i < gamutCuspTableUnsorted.size; i++) + // Copy end elements to create a cycle + JMh_corners[0] = JMh_corners[cuspCornerCount]; + JMh_corners[cuspCornerCount + 1] = JMh_corners[1]; + + // Wrap the hues, to maintain monotonicity these entries will fall outside [0.0, hue_limit) + JMh_corners[0][2] = JMh_corners[0][2] - hue_limit; + JMh_corners[cuspCornerCount + 1][2] = JMh_corners[cuspCornerCount + 1][2] + hue_limit; +} + +unsigned int extract_sorted_cube_hues(std::array& sorted_hues, + const std::array& reach_JMh, const std::array& display_JMh) +{ + // Basic merge of 2 sorted arrays, extracting the unique hues. + // Return the count of the unique hues + //TODO: use STL for this and similar functions + unsigned int idx = 0; + unsigned int reach_idx = 1; + unsigned int display_idx = 1; + while ((reach_idx < (cuspCornerCount + 1)) || (display_idx < (cuspCornerCount + 1))) { - gamutCuspTable.table[i + gamutCuspTable.base_index][0] = gamutCuspTableUnsorted.table[(minhIndex+i) % gamutCuspTableUnsorted.size][0]; - gamutCuspTable.table[i + gamutCuspTable.base_index][1] = gamutCuspTableUnsorted.table[(minhIndex+i) % gamutCuspTableUnsorted.size][1]; - gamutCuspTable.table[i + gamutCuspTable.base_index][2] = gamutCuspTableUnsorted.table[(minhIndex+i) % gamutCuspTableUnsorted.size][2]; + const float reach_hue = reach_JMh[reach_idx][2]; + const float display_hue = display_JMh[display_idx][2]; + if (reach_hue == display_hue) + { + sorted_hues[idx] = reach_hue; + ++reach_idx; + ++display_idx; // When equal consume both + } + else + { + if (reach_hue < display_hue) + { + sorted_hues[idx] = reach_hue; + ++reach_idx; + } + else + { + sorted_hues[idx] = display_hue; + ++display_idx; + } + } + ++idx; } + return idx; +} - // Copy last populated entry to first empty spot - gamutCuspTable.table[0][0] = gamutCuspTable.table[gamutCuspTable.base_index + gamutCuspTable.size-1][0]; - gamutCuspTable.table[0][1] = gamutCuspTable.table[gamutCuspTable.base_index + gamutCuspTable.size-1][1]; - gamutCuspTable.table[0][2] = gamutCuspTable.table[gamutCuspTable.base_index + gamutCuspTable.size-1][2]; +void build_hue_sample_interval(const unsigned int samples, const float lower, const float upper, Table1D &hue_table, const unsigned int base) +{ + const float delta = (upper - lower) / float(samples); + for (unsigned int i = 0; i != samples; ++i) + { + hue_table[base + i] = lower + float(i) * delta; + } +} - // Copy first populated entry to last empty spot - gamutCuspTable.table[gamutCuspTable.base_index + gamutCuspTable.size][0] = gamutCuspTable.table[gamutCuspTable.base_index][0]; - gamutCuspTable.table[gamutCuspTable.base_index + gamutCuspTable.size][1] = gamutCuspTable.table[gamutCuspTable.base_index][1]; - gamutCuspTable.table[gamutCuspTable.base_index + gamutCuspTable.size][2] = gamutCuspTable.table[gamutCuspTable.base_index][2]; +void build_hue_table(Table1D &hue_table, const std::array& sorted_hues, const unsigned int unique_hues) +{ + const float ideal_spacing = hue_table.nominal_size / hue_limit; + std::array samples_count = {}; + unsigned int last_idx = std::numeric_limits::max(); + unsigned int min_index = sorted_hues[0] == 0.0f ? 0 : 1; // Ensure we can always sample at 0.0 hue + for (unsigned int hue_idx = 0; hue_idx != unique_hues; ++hue_idx) + { + // BUG: "hue_table.size - 1" will fail if we have multiple hues mapping near the top of the table + unsigned int nominal_idx = std::min(std::max(static_cast(std::round(sorted_hues[hue_idx] * ideal_spacing)), min_index), hue_table.nominal_size - 1); + if (last_idx == nominal_idx) + { + // Last two hues should sample at same index, need to adjust them + // Adjust previous sample down if we can + if (hue_idx > 1 && samples_count[hue_idx - 2] != (samples_count[hue_idx - 1] - 1)) + { + samples_count[hue_idx - 1] = samples_count[hue_idx - 1] - 1; + } + else + { + nominal_idx = nominal_idx + 1; + } + } + samples_count[hue_idx] = std::min(nominal_idx, hue_table.nominal_size - 1U); + last_idx = min_index = nominal_idx; + } - // Wrap the hues, to maintain monotonicity. These entries will fall outside [0.0, 360.0] - gamutCuspTable.table[0][2] = gamutCuspTable.table[0][2] - 360.f; - gamutCuspTable.table[gamutCuspTable.size+1][2] = gamutCuspTable.table[gamutCuspTable.size+1][2] + 360.f; + unsigned int total_samples = 0; + // Special cases for ends + unsigned int i = 0; + build_hue_sample_interval(samples_count[i], 0.0f, sorted_hues[i], hue_table, total_samples + 1); + total_samples += samples_count[i]; + for (++i; i != unique_hues; ++i) + { + const unsigned int samples = samples_count[i] - samples_count[i - 1]; + build_hue_sample_interval(samples, sorted_hues[i - 1], sorted_hues[i], hue_table, total_samples + 1); + total_samples += samples; + } + // BUG: could break if we are unlucky with samples all being used up by this point + build_hue_sample_interval(hue_table.nominal_size - total_samples, sorted_hues[i - 1], hue_limit, hue_table, total_samples + 1); - return gamutCuspTable; + hue_table[hue_table.lower_wrap_index] = hue_table[hue_table.last_nominal_index] - hue_limit; + hue_table[hue_table.upper_wrap_index] = hue_table[hue_table.first_nominal_index] + hue_limit; +} + +std::array find_display_cusp_for_hue(float hue, const std::array& RGB_corners, const std::array& JMh_corners, + const JMhParams ¶ms, std::array & previous) +{ + // This works by finding the required line segment between two of the XYZ cusp corners, then binary searching + // along the line calculating the JMh of points along the line till we find the required value. + // All values on the line segments are valid cusp locations. + + unsigned int upper_corner = 1; + for (unsigned int i = upper_corner; i != totalCornerCount; ++i) // TODO: binary search? + { + if (JMh_corners[i][2] > hue) + { + upper_corner = i; + break; + } + } + const unsigned int lower_corner = upper_corner - 1; + + // hue should now be within [lower_corner, upper_corner), handle exact match + if (JMh_corners[lower_corner][2] == hue) + { + return {JMh_corners[lower_corner][0], JMh_corners[lower_corner][1]}; + } + + // search by lerping between RGB corners for the hue + const f3 cusp_lower = RGB_corners[lower_corner]; + const f3 cusp_upper = RGB_corners[upper_corner]; + f3 sample; + + float sample_t; + float lower_t = (upper_corner == previous[0]) ? previous[1] : 0.0f; // If we are still on the same segment start from where we left off + float upper_t = 1.0f; + + f3 JMh; + + // There is an edge case where we need to search towards the range when across the [0.0f, hue_limit) boundary + // each edge needs the directions swapped. This is handled by comparing against the appropriate corner to make + // sure we are still in the expected range between the lower and upper corner hue limits + while ((upper_t - lower_t) > display_cusp_tolerance) + { + sample_t = midpoint(lower_t, upper_t); + sample = lerp(cusp_lower, cusp_upper, sample_t); + JMh = RGB_to_JMh(sample, params); + if (JMh[2] < JMh_corners[lower_corner][2]) + { + upper_t = sample_t; + } + else if (JMh[2] >= JMh_corners[upper_corner][2]) + { + lower_t = sample_t; + } + else if (JMh[2] > hue) + { + upper_t = sample_t; + } + else + { + lower_t = sample_t; + } + } + + // Use the midpoint of the final interval for the actual samples + sample_t = midpoint(lower_t, upper_t); + sample = lerp(cusp_lower, cusp_upper, sample_t); + JMh = RGB_to_JMh(sample, params); + + previous[0] = float(upper_corner); + previous[1] = sample_t; + + return {JMh[0], JMh[1]}; +} + +Table3D build_cusp_table(const Table1D& hue_table, const std::array& RGB_corners, const std::array& JMh_corners, const JMhParams ¶ms) +{ + std::array previous = {0.0f, 0.0f}; + Table3D output_table; + for (unsigned int i = output_table.first_nominal_index; i != output_table.upper_wrap_index; ++i) + { + const float hue = hue_table[i]; + const std::array JM = find_display_cusp_for_hue(hue, RGB_corners, JMh_corners, params, previous); + output_table[i][0] = JM[0]; + output_table[i][1] = JM[1] * (1.f + smooth_m * smooth_cusps);; + output_table[i][2] = hue; + } + + // Copy extra entries to ease the code to handle hues wrapping around + output_table[output_table.lower_wrap_index][0] = output_table[output_table.last_nominal_index][0]; + output_table[output_table.lower_wrap_index][1] = output_table[output_table.last_nominal_index][1]; + output_table[output_table.lower_wrap_index][2] = hue_table[hue_table.lower_wrap_index]; + output_table[output_table.upper_wrap_index][0] = output_table[output_table.first_nominal_index][0]; + output_table[output_table.upper_wrap_index][1] = output_table[output_table.first_nominal_index][1]; + output_table[output_table.upper_wrap_index][2] = hue_table[hue_table.upper_wrap_index]; + return output_table; +} + +Table3D make_uniform_hue_gamut_table(const JMhParams &reach_params, const JMhParams ¶ms, float peakLuminance, float forward_limit, const SharedCompressionParameters& sp, Table1D& hue_table) +{ + // The principal here is to sample the hues as uniformly as possible, whilst ensuring we sample the corners + // of the limiting gamut and the reach primaries at limit J Max + // + // The corners are calculated then the hues are extracted and merged to form a unique sorted hue list + // We then build the hue table from the list, those hues are then used to compute the JMh od the limiting + // gamut cusp. + + std::array reach_JMh_corners; + std::array limiting_RGB_corners; + std::array limiting_JMh_corners; + std::array sorted_hues; + + find_reach_corners_table(reach_JMh_corners, reach_params, sp.limit_J_max, forward_limit); + build_limiting_cusp_corners_tables(limiting_RGB_corners, limiting_JMh_corners, params, peakLuminance); + const unsigned int unique_hues = extract_sorted_cube_hues(sorted_hues, reach_JMh_corners, limiting_JMh_corners); + build_hue_table(hue_table, sorted_hues, unique_hues); + return build_cusp_table(hue_table, limiting_RGB_corners, limiting_JMh_corners, params); } bool any_below_zero(const f3 &rgb) @@ -429,22 +854,20 @@ bool any_below_zero(const f3 &rgb) return (rgb[0] < 0. || rgb[1] < 0. || rgb[2] < 0.); } -Table1D make_reach_m_table(const Primaries &P, float peakLuminance) +Table1D make_reach_m_table(const JMhParams ¶ms, const float limit_J_max) { - const JMhParams params = init_JMhParams(P); - const float limit_J_max = Y_to_J(peakLuminance, params); - Table1D gamutReachTable{}; - for (int i = 0; i < gamutReachTable.size; i++) { - const float hue = (float) i; + for (unsigned int i = 0; i < gamutReachTable.nominal_size; i++) { + const float hue = gamutReachTable.base_hue_for_position(i); - const float search_range = 50.f; - float low = 0.; + constexpr float search_range = 50.f; + constexpr float search_maximum = 1300.f; // TODO: magic limit + float low = 0.f; float high = low + search_range; bool outside = false; - while ((outside != true) & (high < 1300.f)) + while ((outside != true) & (high < search_maximum)) { const f3 searchJMh = {limit_J_max, high, hue}; const f3 newLimitRGB = JMh_to_RGB(searchJMh, params); @@ -456,7 +879,7 @@ Table1D make_reach_m_table(const Primaries &P, float peakLuminance) } } - while (high-low > 1e-2) + while (high-low > 1e-2) // TODO:tolerance as constexpr { const float sampleM = (high + low) / 2.f; const f3 searchJMh = {limit_J_max, sampleM, hue}; @@ -472,265 +895,293 @@ Table1D make_reach_m_table(const Primaries &P, float peakLuminance) } } - gamutReachTable.table[i] = high; + gamutReachTable[i + gamutReachTable.base_index] = high; } + gamutReachTable[gamutReachTable.lower_wrap_index] = gamutReachTable[gamutReachTable.last_nominal_index]; + gamutReachTable[gamutReachTable.upper_wrap_index] = gamutReachTable[gamutReachTable.first_nominal_index]; return gamutReachTable; } -bool outside_hull(const f3 &rgb) +inline bool outside_hull(const f3 &rgb, float maxRGBtestVal) { // limit value, once we cross this value, we are outside of the top gamut shell - const float maxRGBtestVal = 1.0; return rgb[0] > maxRGBtestVal || rgb[1] > maxRGBtestVal || rgb[2] > maxRGBtestVal; } -float get_focus_gain(float J, float cuspJ, float limit_J_max) +inline float get_focus_gain(float J, float analytical_threshold, float limit_J_max, float focus_dist) { - const float thr = lerpf(cuspJ, limit_J_max, focus_gain_blend); - - if (J > thr) + float gain = limit_J_max * focus_dist; + if (J > analytical_threshold) { - // Approximate inverse required above threshold - float gain = (limit_J_max - thr) / std::max(0.0001f, (limit_J_max - std::min(limit_J_max, J))); - return powf(log10(gain), 1.f / focus_adjust_gain) + 1.f; - } - else - { - // Analytic inverse possible below cusp - return 1.f; + // Approximate inverse required above threshold due to the introduction of J in the calculation + float gain_adjustment = log10((limit_J_max - analytical_threshold) / std::max(0.0001f, limit_J_max - J)); + //gain = powf(gain, focus_adjust_gain_inv) + 1.f; + gain_adjustment = gain_adjustment * gain_adjustment + 1.f; + gain = gain * gain_adjustment; } + return gain; } -float solve_J_intersect(float J, float M, float focusJ, float maxJ, float slope_gain) +float solve_J_intersect(float J, float M, float focusJ, float maxJ, float slope_gain) // TODO: eval placing maxJ as last patrameter? { - const float a = M / (focusJ * slope_gain); - float b = 0.f; - float c = 0.f; - float intersectJ = 0.f; + const float M_scaled = M / slope_gain; + const float a = M_scaled / focusJ; if (J < focusJ) { - b = 1.f - M / slope_gain; + const float b = 1.f - M_scaled; + const float c = -J; + const float det = b * b - 4.f * a * c; + const float root = sqrt(det); + return -2.f * c / (b + root); } else { - b = - (1.f + M / slope_gain + maxJ * M / (focusJ * slope_gain)); + const float b = -(1.f + M_scaled + maxJ * a); + const float c = maxJ * M_scaled + J; + const float det = b * b - 4.f * a * c; + const float root = sqrt(det); + return -2.f * c / (b - root); } - - if (J < focusJ) - { - c = -J; - } - else - { - c = maxJ * M / slope_gain + J; - } - - const float root = sqrt(b * b - 4.f * a * c); - - if (J < focusJ) - { - intersectJ = 2.f * c / (-b - root); - } - else - { - intersectJ = 2.f * c / (-b + root); - } - - return intersectJ; } -float smin(float a, float b, float s) +// Smooth minimum about the scaled reference, based upon a cubic polynomial +inline float smin_scaled(float a, float b, float scale_reference) { - const float h = std::max(s - std::abs(a - b), 0.f) / s; - return std::min(a, b) - h * h * h * s * (1.f / 6.f); + const float s_scaled = smooth_cusps * scale_reference; + const float h = std::max(s_scaled - std::abs(a - b), 0.0f) / s_scaled; + return std::min(a, b) - h * h * h * s_scaled * (1.f / 6.f); } -f3 find_gamut_boundary_intersection(const f3 &JMh_s, const f2 &JM_cusp_in, float J_focus, float J_max, float slope_gain, float gamma_top, float gamma_bottom) +inline float compute_compression_vector_slope(const float intersectJ, const float focusJ, const float limitJmax, const float slope_gain) { - const float s = std::max(0.000001f, smooth_cusps); - const f2 JM_cusp = { - JM_cusp_in[0], - JM_cusp_in[1] * (1.f + smooth_m * s) - }; + const float direction_scaler = (intersectJ < focusJ) ? intersectJ : (limitJmax - intersectJ); // TODO < vs <= + return direction_scaler * (intersectJ - focusJ) / (focusJ * slope_gain); +} - const float J_intersect_source = solve_J_intersect(JMh_s[0], JMh_s[1], J_focus, J_max, slope_gain); - const float J_intersect_cusp = solve_J_intersect(JM_cusp[0], JM_cusp[1], J_focus, J_max, slope_gain); +inline float estimate_line_and_boundary_intersection_M(const float J_axis_intersect, const float slope, const float inv_gamma, + const float J_max, const float M_max, const float J_intersection_reference) +{ + // Line defined by J = slope * x + J_axis_intersect + // Boundary defined by J = J_max * (x / M_max) ^ (1/inv_gamma) + // Approximate as we do not want to iteratively solve intersection of a straight line and an exponential - float slope = 0.f; - if (J_intersect_source < J_focus) - { - slope = J_intersect_source * (J_intersect_source - J_focus) / (J_focus * slope_gain); - } - else - { - slope = (J_max - J_intersect_source) * (J_intersect_source - J_focus) / (J_focus * slope_gain); - } + // We calculate a shifted intersection from the original intersection using the inverse of the exponential + // and the provided reference + const float normalised_J = J_axis_intersect / J_intersection_reference; + const float shifted_intersection = J_intersection_reference * powf(normalised_J, inv_gamma); - const float M_boundary_lower = J_intersect_cusp * powf(J_intersect_source / J_intersect_cusp, 1.f / gamma_bottom) / (JM_cusp[0] / JM_cusp[1] - slope); - const float M_boundary_upper = JM_cusp[1] * (J_max - J_intersect_cusp) * powf((J_max - J_intersect_source) / (J_max - J_intersect_cusp), 1.f / gamma_top) / (slope * JM_cusp[1] + J_max - JM_cusp[0]); - const float M_boundary = JM_cusp[1] * smin(M_boundary_lower / JM_cusp[1], M_boundary_upper / JM_cusp[1], s); - const float J_boundary = J_intersect_source + slope * M_boundary; + // Now we find the M intersection of two lines + // line from origin to J,M Max l1(x) = J/M * x + // line from J Intersect' with slope l2(x) = slope * x + Intersect' - return {J_boundary, M_boundary, J_intersect_source}; + //return shifted_intersection / ((J_max / M_max) - slope); + return shifted_intersection * M_max / (J_max - slope * M_max); } -f3 get_reach_boundary( - float J, - float M, - float h, - const f2 &JMcusp, - float focusJ, - float limit_J_max, - float model_gamma, - float focus_dist, - const ACES2::Table1D & reach_m_table -) +float find_gamut_boundary_intersection(const f2 &JM_cusp, float J_max, float gamma_top_inv, float gamma_bottom_inv, const float J_intersect_source, const float slope, const float J_intersect_cusp) { - const float reachMaxM = reach_m_from_table(h, reach_m_table); - - const float slope_gain = limit_J_max * focus_dist * get_focus_gain(J, JMcusp[0], limit_J_max); - - const float intersectJ = solve_J_intersect(J, M, focusJ, limit_J_max, slope_gain); + const float M_boundary_lower = estimate_line_and_boundary_intersection_M(J_intersect_source, slope, gamma_bottom_inv, JM_cusp[0], JM_cusp[1], J_intersect_cusp); + + // The upper hull is flipped and thus 'zeroed' at J_max + // Also note we negate the slope + const float f_J_intersect_cusp = J_max - J_intersect_cusp; + const float f_J_intersect_source = J_max - J_intersect_source; + const float f_JM_cusp_J = J_max - JM_cusp[0]; + const float M_boundary_upper = + estimate_line_and_boundary_intersection_M(f_J_intersect_source, -slope, gamma_top_inv, f_JM_cusp_J, JM_cusp[1], f_J_intersect_cusp); + + // Smooth minimum between the two calculated values for the M component + const float M_boundary = smin_scaled(M_boundary_lower, M_boundary_upper, JM_cusp[1]); + return M_boundary; +} - float slope; - if (intersectJ < focusJ) - { - slope = intersectJ * (intersectJ - focusJ) / (focusJ * slope_gain); - } - else +template +inline float reinhard_remap(const float scale, const float nd) +{ + if (invert) { - slope = (limit_J_max - intersectJ) * (intersectJ - focusJ) / (focusJ * slope_gain); + if (nd >= 1.0f) // TODO: given remap_M already tests against proportion do we need this asymptote test + return scale; + else + return scale * -(nd / (nd - 1.0f)); } - - const float boundary = limit_J_max * powf(intersectJ / limit_J_max, model_gamma) * reachMaxM / (limit_J_max - slope * reachMaxM); - return {J, boundary, h}; + return scale * nd / (1.0f + nd); } -float compression_function( - float v, - float thr, - float lim, - bool invert) +template +inline float remap_M(const float M, const float gamut_boundary_M, const float reach_boundary_M) { - float s = (lim - thr) * (1.f - thr) / (lim - 1.f); - float nd = (v - thr) / s; - - float vCompressed; - - if (invert) { - if (v < thr || lim <= 1.0001f || v > thr + s) { - vCompressed = v; - } else { - vCompressed = thr + s * (-nd / (nd - 1.f)); - } - } else { - if (v < thr || lim <= 1.0001f) { - vCompressed = v; - } else { - vCompressed = thr + s * nd / (1.f + nd); - } - } - - return vCompressed; + const float boundary_ratio = gamut_boundary_M / reach_boundary_M; + const float proportion = std::max(boundary_ratio, compression_threshold); + const float threshold = proportion * gamut_boundary_M; + + if (M <= threshold || proportion >= 1.0f) + return M; + + // Translate to place threshold at zero + const float m_offset = M - threshold; + const float gamut_offset = gamut_boundary_M - threshold; + const float reach_offset = reach_boundary_M - threshold; + + const float scale = reach_offset / ((reach_offset / gamut_offset) - 1.0f); + const float nd = m_offset / scale; + + // shift back to absolute + return threshold + reinhard_remap(scale, nd); } -f3 compressGamut(const f3 &JMh, float Jx, const ACES2::GamutCompressParams& p, bool invert) +template +f3 compressGamut(const f3 &JMh, float Jx, const ACES2::ResolvedSharedCompressionParameters& sr, const ACES2::GamutCompressParams& p, const HueDependantGamutParams hdp) { const float J = JMh[0]; const float M = JMh[1]; const float h = JMh[2]; + + const float slope_gain = get_focus_gain(Jx, hdp.analytical_threshold, sr.limit_J_max, p.focus_dist); + const float J_intersect_source = solve_J_intersect(J, M, hdp.focusJ, sr.limit_J_max, slope_gain); + const float gamut_slope = compute_compression_vector_slope(J_intersect_source, hdp.focusJ, sr.limit_J_max, slope_gain); + + const float J_intersect_cusp = solve_J_intersect(hdp.JMcusp[0], hdp.JMcusp[1], hdp.focusJ, sr.limit_J_max, slope_gain); + const float gamut_boundary_M = find_gamut_boundary_intersection(hdp.JMcusp, sr.limit_J_max, hdp.gamma_top_inv, hdp.gamma_bottom_inv, J_intersect_source, gamut_slope, J_intersect_cusp); - if (M < 0.0001f || J > p.limit_J_max) + if (gamut_boundary_M <= 0.0f) // TODO: when/why does this happen? { return {J, 0.f, h}; } - else - { - const f2 project_from = {J, M}; - const f2 JMcusp = cusp_from_table(h, p.gamut_cusp_table); - const float focusJ = lerpf(JMcusp[0], p.mid_J, std::min(1.f, cusp_mid_blend - (JMcusp[0] / p.limit_J_max))); - const float slope_gain = p.limit_J_max * p.focus_dist * get_focus_gain(Jx, JMcusp[0], p.limit_J_max); - - const float gamma_top = hue_dependent_upper_hull_gamma(h, p.upper_hull_gamma_table); - const float gamma_bottom = p.lower_hull_gamma; - - const f3 boundaryReturn = find_gamut_boundary_intersection({J, M, h}, JMcusp, focusJ, p.limit_J_max, slope_gain, gamma_top, gamma_bottom); - const f2 JMboundary = {boundaryReturn[0], boundaryReturn[1]}; - const f2 project_to = {boundaryReturn[2], 0.f}; - - if (JMboundary[1] <= 0.0f) - { - return {J, 0.f, h}; - } - - const f3 reachBoundary = get_reach_boundary(JMboundary[0], JMboundary[1], h, JMcusp, focusJ, p.limit_J_max, p.model_gamma, p.focus_dist, p.reach_m_table); - const float difference = std::max(1.0001f, reachBoundary[1] / JMboundary[1]); - const float threshold = std::max(compression_threshold, 1.f / difference); + const float reachBoundaryM = estimate_line_and_boundary_intersection_M(J_intersect_source, gamut_slope, sr.model_gamma_inv, sr.limit_J_max, sr.reachMaxM, sr.limit_J_max); - float v = project_from[1] / JMboundary[1]; - v = compression_function(v, threshold, difference, invert); + const float remapped_M = remap_M(M, gamut_boundary_M, reachBoundaryM); - const f2 JMcompressed { - project_to[0] + v * (JMboundary[0] - project_to[0]), - project_to[1] + v * (JMboundary[1] - project_to[1]) - }; + const f3 JMcompressed { + J_intersect_source + remapped_M * gamut_slope, + remapped_M, + h + }; + return JMcompressed; +} - return {JMcompressed[0], JMcompressed[1], h}; - } +inline float compute_focusJ(float cusp_J, float mid_J, float limit_J_max) +{ + return lerpf(cusp_J, mid_J, std::min(1.f, cusp_mid_blend - (cusp_J / limit_J_max))); } -f3 gamut_compress_fwd(const f3 &JMh, const GamutCompressParams &p) +HueDependantGamutParams init_HueDependantGamutParams(const float hue, const ResolvedSharedCompressionParameters &sr, const GamutCompressParams &p) { - return compressGamut(JMh, JMh[0], p, false); + HueDependantGamutParams hdp; + hdp.gamma_bottom_inv = p.lower_hull_gamma_inv; + + const unsigned int i_hi = lookup_hue_interval(hue, p.hue_table, p.hue_linearity_search_range); + const float t = interpolation_weight(hue, p.hue_table[i_hi-1], p.hue_table[i_hi]); + const f3 cusp = cusp_from_table(i_hi, t, p.gamut_cusp_table); + + hdp.JMcusp = { cusp[0], cusp[1] }; + hdp.gamma_top_inv = { cusp[2] }; + hdp.focusJ = compute_focusJ(hdp.JMcusp[0], p.mid_J, sr.limit_J_max); + hdp.analytical_threshold = lerpf(hdp.JMcusp[0], sr.limit_J_max, focus_gain_blend); + return hdp; } -f3 gamut_compress_inv(const f3 &JMh, const GamutCompressParams &p) +f3 gamut_compress_fwd(const f3 &JMh, const ResolvedSharedCompressionParameters &sr, const GamutCompressParams &p) { - const f2 JMcusp = cusp_from_table(JMh[2], p.gamut_cusp_table); - float Jx = JMh[0]; + const float J = JMh[0]; + const float M = JMh[1]; + const float h = JMh[2]; - f3 unCompressedJMh; + if (J <= 0.0f) // Limit to +ve J values // TODO test this is needed + { + return {0.0f, 0.f, h}; + } + if (M <= 0.0f || J > sr.limit_J_max) // We compress M only so avoid mapping zero + // Above the expected maximum we explicitly map to 0 M + { + return {J, 0.f, h}; + } + const HueDependantGamutParams hdp = init_HueDependantGamutParams(h, sr, p); + + return compressGamut(JMh, JMh[0], sr, p, hdp); +} - // Analytic inverse below threshold - if (Jx <= lerpf(JMcusp[0], p.limit_J_max, focus_gain_blend)) +f3 gamut_compress_inv(const f3 &JMh, const ResolvedSharedCompressionParameters &sr, const GamutCompressParams &p) +{ + const float J = JMh[0]; + const float M = JMh[1]; + const float h = JMh[2]; + + if (J <= 0.0f) // Limit to +ve J values // TODO test this is needed { - unCompressedJMh = compressGamut(JMh, Jx, p, true); + return {0.0f, 0.f, h}; } - // Approximation above threshold - else + if (M <= 0.0f || J > sr.limit_J_max) // We compress M only so avoid mapping zero + // Above the expected maximum we explicitly map to 0 M { - Jx = compressGamut(JMh, Jx, p, true)[0]; - unCompressedJMh = compressGamut(JMh, Jx, p, true); + return {J, 0.f, h}; } + const HueDependantGamutParams hdp = init_HueDependantGamutParams(h, sr, p); - return unCompressedJMh; + float Jx = J; + if (Jx > hdp.analytical_threshold) + { + // Approximation above threshold + Jx = compressGamut(JMh, Jx, sr, p, hdp)[0]; + } + return compressGamut(JMh, Jx, sr, p, hdp); +} + +static constexpr unsigned int gamma_test_count = 5; +struct testData { + f3 testJMh; + float J_intersect_source; + float slope; + float J_intersect_cusp; +}; + +std::array generate_gamma_test_data(const f2 &JMcusp, float hue, float limit_J_max, float mid_J, float focus_dist) +{ + const std::array testPositions = {0.01f, 0.1f, 0.5f, 0.8f, 0.99f}; + std::array data; + const float analytical_threshold = lerpf(JMcusp[0], limit_J_max, focus_gain_blend); + const float focusJ = compute_focusJ(JMcusp[0], mid_J, limit_J_max); + + unsigned int testIndex = 0; + std::generate(data.begin(), data.end(), [JMcusp, limit_J_max, focus_dist, testPositions, hue, analytical_threshold, focusJ, testIndex]() mutable { + const float testJ = lerpf(JMcusp[0], limit_J_max, testPositions[testIndex]); + const float slope_gain = get_focus_gain(testJ, analytical_threshold, limit_J_max, focus_dist); + const float J_intersect_source = solve_J_intersect(testJ, JMcusp[1], focusJ, limit_J_max, slope_gain); + testData result = { + { testJ, JMcusp[1], hue }, + J_intersect_source, + compute_compression_vector_slope(J_intersect_source, focusJ, limit_J_max, slope_gain), + solve_J_intersect(JMcusp[0], JMcusp[1], focusJ, limit_J_max, slope_gain) + }; + testIndex++; + return result; + }); + return data; } bool evaluate_gamma_fit( const f2 &JMcusp, - const f3 testJMh[3], - float topGamma, + const std::array data, + float topGamma_inv, float peakLuminance, float limit_J_max, - float mid_J, - float focus_dist, - float lower_hull_gamma, + float lower_hull_gamma_inv, const JMhParams &limitJMhParams) { - const float focusJ = lerpf(JMcusp[0], mid_J, std::min(1.f, cusp_mid_blend - (JMcusp[0] / limit_J_max))); - - for (size_t testIndex = 0; testIndex < 3; testIndex++) + const float luminance_limit = peakLuminance / reference_luminance; + for (auto test_data: data) { - const float slope_gain = limit_J_max * focus_dist * get_focus_gain(testJMh[testIndex][0], JMcusp[0], limit_J_max); - const f3 approxLimit = find_gamut_boundary_intersection(testJMh[testIndex], JMcusp, focusJ, limit_J_max, slope_gain, topGamma, lower_hull_gamma); - const f3 approximate_JMh = {approxLimit[0], approxLimit[1], testJMh[testIndex][2]}; + const float approxLimit_M = find_gamut_boundary_intersection(JMcusp, limit_J_max, topGamma_inv, lower_hull_gamma_inv, + test_data.J_intersect_source, test_data.slope, test_data.J_intersect_cusp); + const float approxLimit_J = test_data.J_intersect_source + test_data.slope * approxLimit_M; + + const f3 approximate_JMh = {approxLimit_J, approxLimit_M, test_data.testJMh[2]}; const f3 newLimitRGB = JMh_to_RGB(approximate_JMh, limitJMhParams); - const f3 newLimitRGBScaled = mult_f_f3(reference_luminance / peakLuminance, newLimitRGB); - if (!outside_hull(newLimitRGBScaled)) + if (!outside_hull(newLimitRGB, luminance_limit)) { return false; } @@ -739,47 +1190,38 @@ bool evaluate_gamma_fit( return true; } -Table1D make_upper_hull_gamma( - const Table3D &gamutCuspTable, +void make_upper_hull_gamma( + const Table1D &hue_table, + Table3D &gamutCuspTable, float peakLuminance, float limit_J_max, float mid_J, float focus_dist, - float lower_hull_gamma, + float lower_hull_gamma_inv, const JMhParams &limitJMhParams) { - const int test_count = 3; - const float testPositions[test_count] = {0.01f, 0.5f, 0.99f}; - - Table1D gammaTable{}; - Table1D gamutTopGamma{}; - - for (int i = 0; i < gammaTable.size; i++) + for (unsigned int i = gamutCuspTable.first_nominal_index; i != gamutCuspTable.upper_wrap_index; ++i) { - gammaTable.table[i] = -1.f; + const float hue = hue_table[i]; + const f2 JMcusp = { gamutCuspTable[i][0], gamutCuspTable[i][1] }; - const float hue = (float) i; - const f2 JMcusp = cusp_from_table(hue, gamutCuspTable); - - f3 testJMh[test_count]{}; - for (int testIndex = 0; testIndex < test_count; testIndex++) - { - const float testJ = JMcusp[0] + ((limit_J_max - JMcusp[0]) * testPositions[testIndex]); - testJMh[testIndex] = { - testJ, - JMcusp[1], - hue - }; - } + std::array data = generate_gamma_test_data(JMcusp, hue, limit_J_max, mid_J, focus_dist); + // TODO: calculate best fitting inverse gamma directly rather than reciprocating it in the loop + // TODO: adjacent found gamma values typically fall close to each other could initialise the search range + // with values near to speed up searching. Note that discrepancies do occur at/near corners of gamut const float search_range = gammaSearchStep; float low = gammaMinimum; float high = low + search_range; bool outside = false; - while (!(outside) && (high < 5.f)) + auto gamma_fit_predicate = [JMcusp, data, peakLuminance, limit_J_max, lower_hull_gamma_inv, limitJMhParams](float gamma) + { + return evaluate_gamma_fit(JMcusp, data, 1.0f / gamma, peakLuminance, limit_J_max, lower_hull_gamma_inv, limitJMhParams); + }; + while (!(outside) && (high < gammaMaximum)) { - const bool gammaFound = evaluate_gamma_fit(JMcusp, testJMh, high, peakLuminance, limit_J_max, mid_J, focus_dist, lower_hull_gamma, limitJMhParams); + const bool gammaFound = gamma_fit_predicate(high); if (!gammaFound) { low = high; @@ -794,30 +1236,23 @@ Table1D make_upper_hull_gamma( float testGamma = -1.f; while ( (high-low) > gammaAccuracy) { - testGamma = (high + low) / 2.f; - const bool gammaFound = evaluate_gamma_fit(JMcusp, testJMh, testGamma, peakLuminance, limit_J_max, mid_J, focus_dist, lower_hull_gamma, limitJMhParams); + testGamma = midpoint(high, low); + const bool gammaFound = gamma_fit_predicate(testGamma); if (gammaFound) { high = testGamma; - gammaTable.table[i] = high; } else { low = testGamma; } } - - // Duplicate gamma value to array, leaving empty entries at first and last position - gamutTopGamma.table[i+gamutTopGamma.base_index] = gammaTable.table[i]; + gamutCuspTable[i][2] = 1.0f / high; } - // Copy last populated entry to first empty spot - gamutTopGamma.table[0] = gammaTable.table[gammaTable.size-1]; - - // Copy first populated entry to last empty spot - gamutTopGamma.table[gamutTopGamma.total_size-1] = gammaTable.table[0]; - - return gamutTopGamma; + // Copy last populated entries to empty spot 'wrapping' entries + gamutCuspTable[gamutCuspTable.lower_wrap_index][2] = gamutCuspTable[gamutCuspTable.last_nominal_index][2]; + gamutCuspTable[gamutCuspTable.upper_wrap_index][2] = gamutCuspTable[gamutCuspTable.first_nominal_index][2]; } // Tonescale pre-calculations @@ -836,6 +1271,8 @@ ToneScaleParams init_ToneScaleParams(float peakLuminance) const float r_hit_max = 896.f; // scene-referred value "hitting the roof" // Calculate output constants + // TODO: factor these calculations into well named functions + // TODO: ensure correct use of n_r vs n vs reference_luminance vs 100.f etc const float r_hit = r_hit_min + (r_hit_max - r_hit_min) * (log(n/n_r)/log(10000.f/100.f)); const float m_0 = (n / n_r); const float m_1 = 0.5f * (m_0 + sqrt(m_0 * (m_0 + 4.f * t_1))); @@ -846,9 +1283,12 @@ ToneScaleParams init_ToneScaleParams(float peakLuminance) const float g_ip = 0.5f * (c_t + sqrt(c_t * (c_t + 4.f * t_1))); const float g_ipp2 = -(m_1 * powf((g_ip/m),(1.f/g))) / (powf(g_ip/m , 1.f/g)-1.f); const float w_2 = c / g_ipp2; - const float s_2 = w_2 * m_1; + const float s_2 = w_2 * m_1 * reference_luminance; const float u_2 = powf((r_hit/m_1)/((r_hit/m_1) + w_2), g); const float m_2 = m_1 / u_2; + const float inverse_limit = n / (u_2 * n_r); + const float forward_limit = 8.0f * r_hit; + const float log_peak = log10( n / n_r); ToneScaleParams TonescaleParams = { n, @@ -858,70 +1298,99 @@ ToneScaleParams init_ToneScaleParams(float peakLuminance) c_t, s_2, u_2, - m_2 + m_2, + forward_limit, + inverse_limit, + log_peak }; return TonescaleParams; } -ChromaCompressParams init_ChromaCompressParams(float peakLuminance) +SharedCompressionParameters init_SharedCompressionParams(float peakLuminance, const JMhParams &inputJMhParams, const JMhParams &reachParams) { - const ToneScaleParams tsParams = init_ToneScaleParams(peakLuminance); - const JMhParams inputJMhParams = init_JMhParams(ACES_AP0::primaries); + const float limit_J_max = Y_to_J(peakLuminance, inputJMhParams); + const float model_gamma_inv = 1.f / model_gamma(); + + SharedCompressionParameters params = { + limit_J_max, + model_gamma_inv, + make_reach_m_table(reachParams, limit_J_max) + }; + return params; +} - float limit_J_max = Y_to_J(peakLuminance, inputJMhParams); +ResolvedSharedCompressionParameters resolve_CompressionParams(float hue, const SharedCompressionParameters &p) +{ + ResolvedSharedCompressionParameters params = { + p.limit_J_max, + p.model_gamma_inv, + reach_m_from_table(hue, p.reach_m_table) + }; + return params; +} +ChromaCompressParams init_ChromaCompressParams(float peakLuminance, const ToneScaleParams &tsParams) +{ // Calculated chroma compress variables - const float log_peak = log10( tsParams.n / tsParams.n_r); - const float compr = chroma_compress + (chroma_compress * chroma_compress_fact) * log_peak; - const float sat = std::max(0.2f, chroma_expand - (chroma_expand * chroma_expand_fact) * log_peak); + // TODO: name these magic constants + const float compr = chroma_compress + (chroma_compress * chroma_compress_fact) * tsParams.log_peak; + const float sat = std::max(0.2f, chroma_expand - (chroma_expand * chroma_expand_fact) * tsParams.log_peak); const float sat_thr = chroma_expand_thr / tsParams.n; - const float model_gamma = 1.f / (surround[1] * (1.48f + sqrt(Y_b / L_A))); - - ChromaCompressParams params{}; - params.limit_J_max = limit_J_max; - params.model_gamma = model_gamma; - params.sat = sat; - params.sat_thr = sat_thr; - params.compr = compr; - params.chroma_compress_scale = powf(0.03379f * peakLuminance, 0.30596f) - 0.45135f; - params.reach_m_table = make_reach_m_table(ACES_AP1::primaries, peakLuminance); + const float chroma_compress_scale = powf(0.03379f * peakLuminance, 0.30596f) - 0.45135f; + + ChromaCompressParams params = { + sat, + sat_thr, + compr, + chroma_compress_scale + }; return params; } -GamutCompressParams init_GamutCompressParams(float peakLuminance, const Primaries &limitingPrimaries) +std::array determine_hue_linearity_search_range(const Table3D &gamutCuspTable) { - const ToneScaleParams tsParams = init_ToneScaleParams(peakLuminance); - const JMhParams inputJMhParams = init_JMhParams(ACES_AP0::primaries); + // This function searches through the hues looking for the largest deviations from a linear distribution + // We can then use this to initialise the binary search range to something smaller than the full one to + // reduce the number of lookups per hue lookup from ~ceil(log2(table size)) to ~ceil(log2(range)) + // during image rendering. + + // TODO: Padding values are a quick hack to ensure the range encloses the needed range, left as an exersize + // for the reader to fully reason if these values could be smaller, probably best done the closer the hue + // distribution comes to linear as the overhead becomes a potentially greater issue + constexpr int lower_padding = 0; + constexpr int upper_padding = 1; + std::array hue_linearity_search_range = {lower_padding, upper_padding}; + for (unsigned int i = gamutCuspTable.first_nominal_index; i != gamutCuspTable.upper_wrap_index; ++i) + { + const unsigned int pos = gamutCuspTable.nominal_hue_position_in_uniform_table(gamutCuspTable[i][2]); // TODO: compute from hue table less cache pressure? + const int delta = int(i) - int(pos); + hue_linearity_search_range[0] = std::min(hue_linearity_search_range[0], delta + lower_padding); + hue_linearity_search_range[1] = std::max(hue_linearity_search_range[1], delta + upper_padding); - float limit_J_max = Y_to_J(peakLuminance, inputJMhParams); - float mid_J = Y_to_J(tsParams.c_t * 100.f, inputJMhParams); + //std::cout << i << " " << pos << " " << delta << " " << gamutCuspTable[i][0] << " " << gamutCuspTable[i][1] << " " << gamutCuspTable[i][2] << "\n"; + } + //std::cout << "search range " << hue_linearity_search_range[0] << " " << hue_linearity_search_range[1] << "\n"; + return hue_linearity_search_range; +} - // Calculated chroma compress variables - const float log_peak = log10( tsParams.n / tsParams.n_r); - const float model_gamma = 1.f / (surround[1] * (1.48f + sqrt(Y_b / L_A))); - const float focus_dist = focus_distance + focus_distance * focus_distance_scaling * log_peak; - const float lower_hull_gamma = 1.14f + 0.07f * log_peak; +GamutCompressParams init_GamutCompressParams(float peakLuminance, const JMhParams &inputJMhParams, const JMhParams &limitJMhParams, + const ToneScaleParams &tsParams, const SharedCompressionParameters &shParams, const JMhParams &reachParams) +{ + float mid_J = Y_to_J(tsParams.c_t * reference_luminance, inputJMhParams); - const JMhParams limitJMhParams = init_JMhParams(limitingPrimaries); + // Calculated chroma compress variables + const float focus_dist = focus_distance + focus_distance * focus_distance_scaling * tsParams.log_peak; + const float lower_hull_gamma_inv = 1.0f / (1.14f + 0.07f * tsParams.log_peak); // TODO: name these magic constants - GamutCompressParams params{}; - params.limit_J_max = limit_J_max; + GamutCompressParams params; params.mid_J = mid_J; - params.model_gamma = model_gamma; params.focus_dist = focus_dist; - params.lower_hull_gamma = lower_hull_gamma; - params.reach_m_table = make_reach_m_table(ACES_AP1::primaries, peakLuminance); - params.gamut_cusp_table = make_gamut_table(limitingPrimaries, peakLuminance); - params.upper_hull_gamma_table = make_upper_hull_gamma( - params.gamut_cusp_table, - peakLuminance, - limit_J_max, - mid_J, - focus_dist, - lower_hull_gamma, - limitJMhParams); - + params.lower_hull_gamma_inv = lower_hull_gamma_inv; + params.gamut_cusp_table = make_uniform_hue_gamut_table(reachParams, limitJMhParams, peakLuminance, tsParams.forward_limit, shParams, params.hue_table); + params.hue_linearity_search_range = determine_hue_linearity_search_range(params.gamut_cusp_table); + make_upper_hull_gamma(params.hue_table, params.gamut_cusp_table, peakLuminance, shParams.limit_J_max, mid_J, focus_dist, + lower_hull_gamma_inv, limitJMhParams); //TODO: mess of parameters return params; } diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h index ff3f969e5a..11adc13fdb 100644 --- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h +++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h @@ -14,17 +14,31 @@ namespace ACES2 JMhParams init_JMhParams(const Primaries &P); ToneScaleParams init_ToneScaleParams(float peakLuminance); -ChromaCompressParams init_ChromaCompressParams(float peakLuminance); -GamutCompressParams init_GamutCompressParams(float peakLuminance, const Primaries &P); - +SharedCompressionParameters init_SharedCompressionParams(float peakLuminance, const JMhParams &inputJMhParams, const JMhParams &reachParams); +ResolvedSharedCompressionParameters resolve_CompressionParams(float hue, const SharedCompressionParameters &p); +ChromaCompressParams init_ChromaCompressParams(float peakLuminance, const ToneScaleParams &tsParams); +GamutCompressParams init_GamutCompressParams(float peakLuminance, const JMhParams &inputJMhParams, const JMhParams &limitJMhParams, + const ToneScaleParams &tsParams, const SharedCompressionParameters &shParams, const JMhParams &reachParams); + +f3 RGB_to_Aab(const f3 &RGB, const JMhParams &p); +f3 Aab_to_JMh(const f3 &Aab, const JMhParams &p); f3 RGB_to_JMh(const f3 &RGB, const JMhParams &p); +f3 JMh_to_Aab(const f3 &JMh, const JMhParams &p); +f3 JMh_to_Aab(const f3 &JMh, const float &cos_hr, const float &sin_hr, const JMhParams &p); +f3 Aab_to_RGB(const f3 &Aab, const JMhParams &p); f3 JMh_to_RGB(const f3 &JMh, const JMhParams &p); -f3 tonescale_chroma_compress_fwd(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const ChromaCompressParams &pc); -f3 tonescale_chroma_compress_inv(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const ChromaCompressParams &pc); +float tonescale_fwd(const float J, const JMhParams &p, const ToneScaleParams &pt); +float tonescale_inv(const float J, const JMhParams &p, const ToneScaleParams &pt); +float tonescale_A_to_J_fwd(const float A, const JMhParams &p, const ToneScaleParams &pt); + +float chroma_compress_norm(float cos_hr, float sin_hr, float chroma_compress_scale); + +f3 chroma_compress_fwd(const f3 &JMh, const float J_ts, const float Mnorm, const ResolvedSharedCompressionParameters &ps, const ChromaCompressParams &pc); +f3 chroma_compress_inv(const f3 &JMh, const float J, const float Mnorm, const ResolvedSharedCompressionParameters &ps, const ChromaCompressParams &pc); -f3 gamut_compress_fwd(const f3 &JMh, const GamutCompressParams &p); -f3 gamut_compress_inv(const f3 &JMh, const GamutCompressParams &p); +f3 gamut_compress_fwd(const f3 &JMh, const ResolvedSharedCompressionParameters &ps, const GamutCompressParams &p); +f3 gamut_compress_inv(const f3 &JMh, const ResolvedSharedCompressionParameters &ps, const GamutCompressParams &p); } // namespace ACES2 diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp index e9e9774338..feb387db5f 100644 --- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp +++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp @@ -147,6 +147,7 @@ class Renderer_ACES_OutputTransform20 : public OpCPU ACES2::JMhParams m_pIn; ACES2::JMhParams m_pOut; ACES2::ToneScaleParams m_t; + ACES2::SharedCompressionParameters m_s; ACES2::ChromaCompressParams m_c; ACES2::GamutCompressParams m_g; }; @@ -184,6 +185,7 @@ class Renderer_ACES_TONESCALE_COMPRESS_20 : public OpCPU bool m_fwd; ACES2::JMhParams m_p; ACES2::ToneScaleParams m_t; + ACES2::SharedCompressionParameters m_s; ACES2::ChromaCompressParams m_c; }; @@ -201,6 +203,7 @@ class Renderer_ACES_GAMUT_COMPRESS_20 : public OpCPU protected: bool m_fwd; + ACES2::SharedCompressionParameters m_s; ACES2::GamutCompressParams m_g; }; @@ -1023,8 +1026,10 @@ Renderer_ACES_OutputTransform20::Renderer_ACES_OutputTransform20(ConstFixedFunct m_pIn = ACES2::init_JMhParams(ACES_AP0::primaries); m_pOut = ACES2::init_JMhParams(lim_primaries); m_t = ACES2::init_ToneScaleParams(peak_luminance); - m_c = ACES2::init_ChromaCompressParams(peak_luminance); - m_g = ACES2::init_GamutCompressParams(peak_luminance, lim_primaries); + const ACES2::JMhParams reachGamut = ACES2::init_JMhParams(ACES_AP1::primaries); + m_s = ACES2::init_SharedCompressionParams(peak_luminance, m_pIn, reachGamut); + m_c = ACES2::init_ChromaCompressParams(peak_luminance, m_t); + m_g = ACES2::init_GamutCompressParams(peak_luminance, m_pIn, m_pOut, m_t, m_s, reachGamut); } void Renderer_ACES_OutputTransform20::apply(const void * inImg, void * outImg, long numPixels) const @@ -1047,10 +1052,21 @@ void Renderer_ACES_OutputTransform20::fwd(const void * inImg, void * outImg, lon for(long idx=0; idxgetPixelName()); + ss.newLine() << ss.floatDecl("hwrap") << " = " << pxl << ".b;"; + ss.newLine() << "hwrap = hwrap - floor(hwrap / 360.0) * 360.0;"; + ss.newLine() << "hwrap = (hwrap < 0.0) ? hwrap + 360.0 : hwrap;"; + ss.newLine() << pxl << ".b = hwrap;"; +} + +void _Add_SinCos_Shader( + GpuShaderCreatorRcPtr & shaderCreator, + GpuShaderText & ss) +{ + const std::string pxl(shaderCreator->getPixelName()); + ss.newLine() << ss.floatDecl("h_rad") << " = " << pxl << ".b * " << 3.14159265358979f / 180.0f << ";"; + ss.newLine() << ss.floatDecl("cos_hr") << " = cos(h_rad);"; + ss.newLine() << ss.floatDecl("sin_hr") << " = sin(h_rad);"; +} + + +void _Add_RGB_to_Aab_Shader( GpuShaderCreatorRcPtr & shaderCreator, GpuShaderText & ss, const ACES2::JMhParams & p) { const std::string pxl(shaderCreator->getPixelName()); - ss.newLine() << ss.float3Decl("lms") << " = " << ss.mat3fMul(&p.MATRIX_RGB_to_CAM16[0], pxl + ".rgb") << ";"; - ss.newLine() << "lms = " << "lms * " << ss.float3Const(p.D_RGB[0], p.D_RGB[1], p.D_RGB[2]) << ";"; + ss.newLine() << "{"; + ss.indent(); + + ss.newLine() << ss.float3Decl("lms") << " = " << ss.mat3fMul(&p.MATRIX_RGB_to_CAM16_c[0], pxl + ".rgb") << ";"; + + ss.newLine() << ss.float3Decl("F_L_v") << " = pow(abs(lms), " << ss.float3Const(0.42f) << ");"; + ss.newLine() << ss.float3Decl("rgb_a") << " = (sign(lms) * F_L_v) / ( " << ACES2::cam_nl_offset << " + F_L_v);"; + + ss.newLine() << "Aab = " << ss.mat3fMul(&p.MATRIX_cone_response_to_Aab[0], "rgb_a.rgb") << ";"; + + ss.dedent(); + ss.newLine() << "}"; +} + +void _Add_Aab_to_JMh_Shader( + GpuShaderCreatorRcPtr & shaderCreator, + GpuShaderText & ss, + const ACES2::JMhParams & p) +{ + const std::string pxl(shaderCreator->getPixelName()); - ss.newLine() << ss.float3Decl("F_L_v") << " = pow(" << p.F_L << " * abs(lms) / 100.0, " << ss.float3Const(0.42f) << ");"; - ss.newLine() << ss.float3Decl("rgb_a") << " = (400.0 * sign(lms) * F_L_v) / (27.13 + F_L_v);"; + ss.newLine() << "{"; + ss.indent(); - ss.newLine() << ss.floatDecl("A") << " = 2.0 * rgb_a.r + rgb_a.g + 0.05 * rgb_a.b;"; - ss.newLine() << ss.floatDecl("a") << " = rgb_a.r - 12.0 * rgb_a.g / 11.0 + rgb_a.b / 11.0;"; - ss.newLine() << ss.floatDecl("b") << " = (rgb_a.r + rgb_a.g - 2.0 * rgb_a.b) / 9.0;"; + ss.newLine() << "if (Aab.r <= 0.0)"; + ss.newLine() << "{"; + ss.indent(); + ss.newLine() << "JMh.rgb = " << ss.float3Const(0.0) << ";"; + ss.dedent(); + ss.newLine() << "}"; - ss.newLine() << ss.floatDecl("J") << " = 100.0 * pow(A / " << p.A_w << ", " << ACES2::surround[1] << " * " << p.z << ");"; + ss.newLine() << "else"; + ss.newLine() << "{"; + ss.indent(); + ss.newLine() << ss.floatDecl("J") << " = " << ACES2::J_scale << " * pow(Aab.r, " << p.cz << ");"; - ss.newLine() << ss.floatDecl("M") << " = (J == 0.0) ? 0.0 : 43.0 * " << ACES2::surround[2] << " * sqrt(a * a + b * b);"; + ss.newLine() << ss.floatDecl("M") << " = (J == 0.0) ? 0.0 : sqrt(Aab.g * Aab.g + Aab.b * Aab.b);"; - ss.newLine() << ss.floatDecl("h") << " = (a == 0.0) ? 0.0 : " << ss.atan2("b", "a") << " * 180.0 / 3.14159265358979;"; + ss.newLine() << ss.floatDecl("h") << " = (Aab.g == 0.0) ? 0.0 : " << ss.atan2("Aab.b", "Aab.g") << " * " << 180.0 / 3.14159265358979 << ";"; ss.newLine() << "h = h - floor(h / 360.0) * 360.0;"; ss.newLine() << "h = (h < 0.0) ? h + 360.0 : h;"; - ss.newLine() << pxl << ".rgb = " << ss.float3Const("J", "M", "h") << ";"; + ss.newLine() << "JMh.rgb = " << ss.float3Const("J", "M", "h") << ";"; + ss.dedent(); + ss.newLine() << "}"; + + ss.dedent(); + ss.newLine() << "}"; } -void _Add_JMh_to_RGB_Shader( +void _Add_RGB_to_JMh_Shader( + GpuShaderCreatorRcPtr & shaderCreator, + GpuShaderText & ss, + const ACES2::JMhParams & p) +{ + const std::string pxl(shaderCreator->getPixelName()); + + ss.newLine() << ss.float3Decl("JMh") << ";"; // TODO: leaky abstraction should really be explicit functions + ss.newLine() << ss.float3Decl("Aab") << ";"; // TODO: leaky abstraction should really be explicit functions + + ss.newLine() << "{"; + ss.indent(); + + _Add_RGB_to_Aab_Shader(shaderCreator, ss, p); + _Add_Aab_to_JMh_Shader(shaderCreator, ss, p); + + ss.newLine() << pxl << ".rgb = JMh;"; + + ss.dedent(); + ss.newLine() << "}"; +} + + +void _Add_JMh_to_Aab_Shader( GpuShaderCreatorRcPtr & shaderCreator, GpuShaderText & ss, const ACES2::JMhParams & p) { const std::string pxl(shaderCreator->getPixelName()); - ss.newLine() << ss.floatDecl("h") << " = " << pxl << ".b * 3.14159265358979 / 180.0;"; + ss.newLine() << "{"; + ss.indent(); + + + ss.newLine() << "Aab.r = pow(JMh.r * " << 1.0f / ACES2::J_scale << ", " << p.inv_cz << ");"; + ss.newLine() << "Aab.g = JMh.g * cos_hr;"; + ss.newLine() << "Aab.b = JMh.g * sin_hr;"; + + ss.dedent(); + ss.newLine() << "}"; +} + +void _Add_Aab_to_RGB_Shader( + GpuShaderText & ss, + const ACES2::JMhParams & p) +{ + ss.newLine() << "{"; + ss.indent(); + + ss.newLine() << ss.float3Decl("rgb_a") << " = " << ss.mat3fMul(&p.MATRIX_Aab_to_cone_response[0], "Aab.rgb") << ";"; + ss.newLine() << ss.float3Decl("lms") << " = sign(rgb_a) * pow( " << ACES2::cam_nl_offset << " * abs(rgb_a) / (1.0f - abs(rgb_a)), " << ss.float3Const(1.f / 0.42f) << ");"; + ss.newLine() << "JMh.rgb = " << ss.mat3fMul(&p.MATRIX_CAM16_c_to_RGB[0], "lms") << ";"; - ss.newLine() << ss.floatDecl("scale") << " = " << pxl << ".g / (43.0 * " << ACES2::surround[2] << ");"; - ss.newLine() << ss.floatDecl("A") << " = " << p.A_w << " * pow(" << pxl << ".r / 100.0, 1.0 / (" << ACES2::surround[1] << " * " << p.z << "));"; - ss.newLine() << ss.floatDecl("a") << " = scale * cos(h);"; - ss.newLine() << ss.floatDecl("b") << " = scale * sin(h);"; + ss.dedent(); + ss.newLine() << "}"; +} - ss.newLine() << ss.float3Decl("rgb_a") << ";"; - ss.newLine() << "rgb_a.r = (460.0 * A + 451.0 * a + 288.0 *b) / 1403.0;"; - ss.newLine() << "rgb_a.g = (460.0 * A - 891.0 * a - 261.0 *b) / 1403.0;"; - ss.newLine() << "rgb_a.b = (460.0 * A - 220.0 * a - 6300.0 *b) / 1403.0;"; +void _Add_JMh_to_RGB_Shader( + GpuShaderCreatorRcPtr & shaderCreator, + GpuShaderText & ss, + const ACES2::JMhParams & p) +{ + const std::string pxl(shaderCreator->getPixelName()); - ss.newLine() << ss.float3Decl("lms") << " = sign(rgb_a) * 100.0 / " << p.F_L << " * pow(27.13 * abs(rgb_a) / (400.0 - abs(rgb_a)), " << ss.float3Const(1.f / 0.42f) << ");"; - ss.newLine() << "lms = " << "lms / " << ss.float3Const(p.D_RGB[0], p.D_RGB[1], p.D_RGB[2]) << ";"; + ss.newLine() << ss.float3Decl("JMh") << " = " << pxl << ".rgb;"; + ss.newLine() << ss.float3Decl("Aab") << ";"; + _Add_JMh_to_Aab_Shader(shaderCreator, ss, p); + _Add_Aab_to_RGB_Shader(ss, p); - ss.newLine() << pxl << ".rgb = " << ss.mat3fMul(&p.MATRIX_CAM16_to_RGB[0], "lms") << ";"; + ss.newLine() << pxl << ".rgb = JMh;"; } std::string _Add_Reach_table( @@ -430,12 +528,12 @@ std::string _Add_Reach_table( shaderCreator->addTexture( name.c_str(), GpuShaderText::getSamplerName(name).c_str(), - table.size, + table.total_size, 1, GpuShaderCreator::TEXTURE_RED_CHANNEL, dimensions, INTERP_NEAREST, - &(table.table[0])); + &(table[0])); if (dimensions == GpuShaderDesc::TEXTURE_1D) @@ -458,26 +556,22 @@ std::string _Add_Reach_table( ss.newLine() << "{"; ss.indent(); - ss.newLine() << ss.floatDecl("hwrap") << " = h;"; - ss.newLine() << "hwrap = hwrap - floor(hwrap / 360.0) * 360.0;"; - ss.newLine() << "hwrap = (hwrap < 0.0) ? hwrap + 360.0 : hwrap;"; - - ss.newLine() << ss.floatDecl("i_lo") << " = floor(hwrap);"; - ss.newLine() << ss.floatDecl("i_hi") << " = (i_lo + 1);"; - ss.newLine() << "i_hi = i_hi - floor(i_hi / 360.0) * 360.0;"; + ss.newLine() << ss.floatDecl("i_base") << " = floor(h);"; + ss.newLine() << ss.floatDecl("i_lo") << " = i_base + " << table.base_index << ";"; + ss.newLine() << ss.floatDecl("i_hi") << " = i_lo + 1;"; if (dimensions == GpuShaderDesc::TEXTURE_1D) { - ss.newLine() << ss.floatDecl("lo") << " = " << ss.sampleTex1D(name, "(i_lo + 0.5) / 360.0") << ".r;"; - ss.newLine() << ss.floatDecl("hi") << " = " << ss.sampleTex1D(name, "(i_hi + 0.5) / 360.0") << ".r;"; + ss.newLine() << ss.floatDecl("lo") << " = " << ss.sampleTex1D(name, "(i_lo + 0.5) / " + std::to_string(table.total_size)) << ".r;"; + ss.newLine() << ss.floatDecl("hi") << " = " << ss.sampleTex1D(name, "(i_hi + 0.5) / " + std::to_string(table.total_size)) << ".r;"; } else { - ss.newLine() << ss.floatDecl("lo") << " = " << ss.sampleTex2D(name, ss.float2Const("(i_lo + 0.5) / 360.0", "0.0")) << ".r;"; - ss.newLine() << ss.floatDecl("hi") << " = " << ss.sampleTex2D(name, ss.float2Const("(i_hi + 0.5) / 360.0", "0.5")) << ".r;"; + ss.newLine() << ss.floatDecl("lo") << " = " << ss.sampleTex2D(name, ss.float2Const("(i_lo + 0.5) / " + std::to_string(table.total_size), "0.0")) << ".r;"; + ss.newLine() << ss.floatDecl("hi") << " = " << ss.sampleTex2D(name, ss.float2Const("(i_hi + 0.5) / " + std::to_string(table.total_size), "0.5")) << ".r;"; } - ss.newLine() << ss.floatDecl("t") << " = (h - i_lo) / (i_hi - i_lo);"; + ss.newLine() << ss.floatDecl("t") << " = h - i_base;"; // Hardcoded single degree spacing ss.newLine() << "return " << ss.lerp("lo", "hi", "t") << ";"; ss.dedent(); @@ -532,14 +626,101 @@ std::string _Add_Toe_func( return name; } +std::string _Add_Tonescale_func( + GpuShaderCreatorRcPtr & shaderCreator, + unsigned resourceIndex, + bool invert, + const ACES2::JMhParams & p, + const ACES2::ToneScaleParams & t) +{ + // Reserve name + std::ostringstream resName; + resName << shaderCreator->getResourcePrefix() + << std::string("_") + << std::string("tonescale") + << (invert ? std::string("_inv") : std::string("_fwd")) + << resourceIndex; + + // Note: Remove potentially problematic double underscores from GLSL resource names. + std::string name(resName.str()); + StringUtils::ReplaceInPlace(name, "__", "_"); + + GpuShaderText ss(shaderCreator->getLanguage()); + + ss.newLine() << ss.floatKeyword() << " " << name << "(float J)"; + ss.newLine() << "{"; + ss.indent(); + + // Tonescale applied in Y (convert to and from J) + // TODO: Investigate if we can receive negative J here at all. + // If not, abs(J) here and the sign(J) at the return may not be needed at all. + ss.newLine() << ss.floatDecl("A") << " = " << p.A_w_J << " * pow(abs(J) * " << 1.0f / ACES2::J_scale << ", " << p.inv_cz << ");"; + ss.newLine() << ss.floatDecl("Y") << " = pow(( " << ACES2::cam_nl_offset << " * A) / (1.0f - A), " << 1.0 / 0.42 << ");"; + + if (invert) + { + // Inverse Tonescale applied in Y (convert to and from J) + ss.newLine() << ss.floatDecl("Y_i") << " = Y / " << double(p.F_L_n) * ACES2::reference_luminance << ";"; + + ss.newLine() << ss.floatDecl("Z") << " = max(0.0, min(" << t.inverse_limit << ", Y_i));"; + ss.newLine() << ss.floatDecl("ht") << " = 0.5 * (Z + sqrt(Z * (" << 4.0 * t.t_1 << " + Z)));"; + ss.newLine() << ss.floatDecl("Yo") << " = " << double(p.F_L_n) * t.s_2 << " / (pow((" << t.m_2 << " / ht), (" << 1.0 / t.g << ")) - 1.0);"; + + ss.newLine() << ss.floatDecl("F_L_Y") << " = pow(abs(Yo), 0.42);"; + } + else + { + // Tonescale applied in Y (convert to and from J) + ss.newLine() << ss.floatDecl("f") << " = " << t.m_2 << " * pow(Y / (Y + " << double(t.s_2) * p.F_L_n << "), " << t.g << ");"; + ss.newLine() << ss.floatDecl("Y_ts") << " = max(0.0, f * f / (f + " << t.t_1 << "));"; + ss.newLine() << ss.floatDecl("F_L_Y") << " = pow(" << double(p.F_L_n) * ACES2::reference_luminance << " * Y_ts, 0.42);"; + } + + ss.newLine() << ss.floatDecl("J_ts") << " = " << ACES2::J_scale << " * pow((F_L_Y / ( " << ACES2::cam_nl_offset << " + F_L_Y)) * " << p.inv_A_w_J << ", " << p.cz << ");"; + ss.newLine() << "return sign(J) * J_ts;"; + + ss.dedent(); + ss.newLine() << "}"; + + shaderCreator->addToHelperShaderCode(ss.string().c_str()); + + return name; +} + +void _Add_ChromaCompressionNorm_Shader( + GpuShaderText & ss, + const ACES2::ChromaCompressParams & c) +{ + // Mnorm + ss.newLine() << ss.floatDecl("Mnorm") << ";"; + ss.newLine() << "{"; + ss.indent(); + + // TODO: optimization: can bake weights into terms and convert dotprods to addition. /coz + ss.newLine() << ss.floatDecl("cos_hr2") << " = 2.0 * cos_hr * cos_hr - 1.0;"; + ss.newLine() << ss.floatDecl("sin_hr2") << " = 2.0 * cos_hr * sin_hr;"; + ss.newLine() << ss.floatDecl("cos_hr3") << " = 4.0 * cos_hr * cos_hr * cos_hr - 3.0 * cos_hr;"; + ss.newLine() << ss.floatDecl("sin_hr3") << " = 3.0 * sin_hr - 4.0 * sin_hr * sin_hr * sin_hr;"; + ss.newLine() << ss.float3Decl("cosines") << " = " << ss.float3Const("cos_hr", "cos_hr2", "cos_hr3") <<";"; + ss.newLine() << ss.float3Decl("cosine_weights") << " = " << ss.float3Const(11.34072 * c.chroma_compress_scale, + 16.46899 * c.chroma_compress_scale, + 7.88380 * c.chroma_compress_scale) <<";"; + ss.newLine() << ss.float3Decl("sines") << " = " << ss.float3Const("sin_hr", "sin_hr2", "sin_hr3") <<";"; + ss.newLine() << ss.float3Decl("sine_weights") << " = " << ss.float3Const(14.66441 * c.chroma_compress_scale, + -6.37224 * c.chroma_compress_scale, + 9.19364 * c.chroma_compress_scale) <<";"; + ss.newLine() << "Mnorm = dot(cosines, cosine_weights) + dot(sines, sine_weights) + " << 77.12896 * c.chroma_compress_scale<< ";"; + + ss.dedent(); + ss.newLine() << "}"; +} + void _Add_Tonescale_Compress_Fwd_Shader( GpuShaderCreatorRcPtr & shaderCreator, GpuShaderText & ss, unsigned resourceIndex, - const ACES2::JMhParams & p, - const ACES2::ToneScaleParams & t, - const ACES2::ChromaCompressParams & c, - const std::string & reachName) + const ACES2::SharedCompressionParameters & s, + const ACES2::ChromaCompressParams & c) { std::string toeName = _Add_Toe_func(shaderCreator, resourceIndex, false); @@ -549,15 +730,6 @@ void _Add_Tonescale_Compress_Fwd_Shader( ss.newLine() << ss.floatDecl("M") << " = " << pxl << ".g;"; ss.newLine() << ss.floatDecl("h") << " = " << pxl << ".b;"; - // Tonescale applied in Y (convert to and from J) - ss.newLine() << ss.floatDecl("A") << " = " << p.A_w_J << " * pow(abs(J) / 100.0, 1.0 / (" << ACES2::surround[1] << " * " << p.z << "));"; - ss.newLine() << ss.floatDecl("Y") << " = sign(J) * 100.0 / " << p.F_L << " * pow((27.13 * A) / (400.0 - A), 1.0 / 0.42) / 100.0;"; - - ss.newLine() << ss.floatDecl("f") << " = " << t.m_2 << " * pow(max(0.0, Y) / (Y + " << t.s_2 << "), " << t.g << ");"; - ss.newLine() << ss.floatDecl("Y_ts") << " = max(0.0, f * f / (f + " << t.t_1 << ")) * " << t.n_r << ";"; - - ss.newLine() << ss.floatDecl("F_L_Y") << " = pow(" << p.F_L << " * abs(Y_ts) / 100.0, 0.42);"; - ss.newLine() << ss.floatDecl("J_ts") << " = sign(Y_ts) * 100.0 * pow(((400.0 * F_L_Y) / (27.13 + F_L_Y)) / " << p.A_w_J << ", " << ACES2::surround[1] << " * " << p.z << ");"; // ChromaCompress ss.newLine() << ss.floatDecl("M_cp") << " = M;"; @@ -566,31 +738,13 @@ void _Add_Tonescale_Compress_Fwd_Shader( ss.newLine() << "{"; ss.indent(); - ss.newLine() << ss.floatDecl("nJ") << " = J_ts / " << c.limit_J_max << ";"; + ss.newLine() << ss.floatDecl("nJ") << " = J_ts / " << s.limit_J_max << ";"; ss.newLine() << ss.floatDecl("snJ") << " = max(0.0, 1.0 - nJ);"; - // Mnorm - ss.newLine() << ss.floatDecl("Mnorm") << ";"; - ss.newLine() << "{"; - ss.indent(); - - ss.newLine() << ss.floatDecl("PI") << " = 3.14159265358979;"; - ss.newLine() << ss.floatDecl("h_rad") << " = h / 180.0 * PI;"; - ss.newLine() << ss.floatDecl("a") << " = cos(h_rad);"; - ss.newLine() << ss.floatDecl("b") << " = sin(h_rad);"; - ss.newLine() << ss.floatDecl("cos_hr2") << " = a * a - b * b;"; - ss.newLine() << ss.floatDecl("sin_hr2") << " = 2.0 * a * b;"; - ss.newLine() << ss.floatDecl("cos_hr3") << " = 4.0 * a * a * a - 3.0 * a;"; - ss.newLine() << ss.floatDecl("sin_hr3") << " = 3.0 * b - 4.0 * b * b * b;"; - ss.newLine() << ss.floatDecl("M") << " = 11.34072 * a + 16.46899 * cos_hr2 + 7.88380 * cos_hr3 + 14.66441 * b + -6.37224 * sin_hr2 + 9.19364 * sin_hr3 + 77.12896;"; - ss.newLine() << "Mnorm = M * " << c.chroma_compress_scale << ";"; - - ss.dedent(); - ss.newLine() << "}"; + _Add_ChromaCompressionNorm_Shader(ss, c); - ss.newLine() << ss.floatDecl("reachM") << " = " << reachName << "_sample(h);"; - ss.newLine() << ss.floatDecl("limit") << " = pow(nJ, " << c.model_gamma << ") * reachM / Mnorm;"; - ss.newLine() << "M_cp = M * pow(J_ts / J, " << c.model_gamma << ");"; + ss.newLine() << ss.floatDecl("limit") << " = pow(nJ, " << s.model_gamma_inv << ") * reachMaxM / Mnorm;"; + ss.newLine() << "M_cp = M * pow(J_ts / J, " << s.model_gamma_inv << ");"; ss.newLine() << "M_cp = M_cp / Mnorm;"; ss.newLine() << "M_cp = limit - " << toeName << "(limit - M_cp, limit - 0.001, snJ * " << c.sat << ", sqrt(nJ * nJ + " << c.sat_thr << "));"; @@ -607,30 +761,17 @@ void _Add_Tonescale_Compress_Inv_Shader( GpuShaderCreatorRcPtr & shaderCreator, GpuShaderText & ss, unsigned resourceIndex, - const ACES2::JMhParams & p, - const ACES2::ToneScaleParams & t, - const ACES2::ChromaCompressParams & c, - const std::string & reachName) + const ACES2::SharedCompressionParameters & s, + const ACES2::ChromaCompressParams & c) { std::string toeName = _Add_Toe_func(shaderCreator, resourceIndex, true); const std::string pxl(shaderCreator->getPixelName()); - + ss.newLine() << ss.floatDecl("J_ts") << " = " << pxl << ".r;"; ss.newLine() << ss.floatDecl("M_cp") << " = " << pxl << ".g;"; ss.newLine() << ss.floatDecl("h") << " = " << pxl << ".b;"; - // Inverse Tonescale applied in Y (convert to and from J) - ss.newLine() << ss.floatDecl("A") << " = " << p.A_w_J << " * pow(abs(J_ts) / 100.0, 1.0 / (" << ACES2::surround[1] << " * " << p.z << "));"; - ss.newLine() << ss.floatDecl("Y_ts") << " = sign(J_ts) * 100.0 / " << p.F_L << " * pow((27.13 * A) / (400.0 - A), 1.0 / 0.42) / 100.0;"; - - ss.newLine() << ss.floatDecl("Z") << " = max(0.0, min(" << t.n << " / (" << t.u_2 * t.n_r << "), Y_ts));"; - ss.newLine() << ss.floatDecl("ht") << " = (Z + sqrt(Z * (4.0 * " << t.t_1 << " + Z))) / 2.0;"; - ss.newLine() << ss.floatDecl("Y") << " = " << t.s_2 << " / (pow((" << t.m_2 << " / ht), (1.0 / " << t.g << ")) - 1.0);"; - - ss.newLine() << ss.floatDecl("F_L_Y") << " = pow(" << p.F_L << " * abs(Y * 100.0) / 100.0, 0.42);"; - ss.newLine() << ss.floatDecl("J") << " = sign(Y) * 100.0 * pow(((400.0 * F_L_Y) / (27.13 + F_L_Y)) / " << p.A_w_J << ", " << ACES2::surround[1] << " * " << p.z << ");"; - // ChromaCompress ss.newLine() << ss.floatDecl("M") << " = M_cp;"; @@ -638,36 +779,18 @@ void _Add_Tonescale_Compress_Inv_Shader( ss.newLine() << "{"; ss.indent(); - ss.newLine() << ss.floatDecl("nJ") << " = J_ts / " << c.limit_J_max << ";"; + ss.newLine() << ss.floatDecl("nJ") << " = J_ts / " << s.limit_J_max << ";"; ss.newLine() << ss.floatDecl("snJ") << " = max(0.0, 1.0 - nJ);"; - // Mnorm - ss.newLine() << ss.floatDecl("Mnorm") << ";"; - ss.newLine() << "{"; - ss.indent(); - - ss.newLine() << ss.floatDecl("PI") << " = 3.14159265358979;"; - ss.newLine() << ss.floatDecl("h_rad") << " = h / 180.0 * PI;"; - ss.newLine() << ss.floatDecl("a") << " = cos(h_rad);"; - ss.newLine() << ss.floatDecl("b") << " = sin(h_rad);"; - ss.newLine() << ss.floatDecl("cos_hr2") << " = a * a - b * b;"; - ss.newLine() << ss.floatDecl("sin_hr2") << " = 2.0 * a * b;"; - ss.newLine() << ss.floatDecl("cos_hr3") << " = 4.0 * a * a * a - 3.0 * a;"; - ss.newLine() << ss.floatDecl("sin_hr3") << " = 3.0 * b - 4.0 * b * b * b;"; - ss.newLine() << ss.floatDecl("M") << " = 11.34072 * a + 16.46899 * cos_hr2 + 7.88380 * cos_hr3 + 14.66441 * b + -6.37224 * sin_hr2 + 9.19364 * sin_hr3 + 77.12896;"; - ss.newLine() << "Mnorm = M * " << c.chroma_compress_scale << ";"; - - ss.dedent(); - ss.newLine() << "}"; + _Add_ChromaCompressionNorm_Shader(ss, c); - ss.newLine() << ss.floatDecl("reachM") << " = " << reachName << "_sample(h);"; - ss.newLine() << ss.floatDecl("limit") << " = pow(nJ, " << c.model_gamma << ") * reachM / Mnorm;"; + ss.newLine() << ss.floatDecl("limit") << " = pow(nJ, " << s.model_gamma_inv << ") * reachMaxM / Mnorm;"; ss.newLine() << "M = M_cp / Mnorm;"; ss.newLine() << "M = " << toeName << "(M, limit, nJ * " << c.compr << ", snJ);"; ss.newLine() << "M = limit - " << toeName << "(limit - M, limit - 0.001, snJ * " << c.sat << ", sqrt(nJ * nJ + " << c.sat_thr << "));"; ss.newLine() << "M = M * Mnorm;"; - ss.newLine() << "M = M * pow(J_ts / J, " << -c.model_gamma << ");"; + ss.newLine() << "M = M * pow(J_ts / J, " << -s.model_gamma_inv << ");"; ss.dedent(); ss.newLine() << "}"; @@ -708,7 +831,7 @@ std::string _Add_Cusp_table( GpuShaderCreator::TEXTURE_RGB_CHANNEL, dimensions, INTERP_NEAREST, - &(g.gamut_cusp_table.table[0][0])); + &(g.gamut_cusp_table[0][0])); if (dimensions == GpuShaderDesc::TEXTURE_1D) { @@ -727,24 +850,20 @@ std::string _Add_Cusp_table( GpuShaderText ss(shaderCreator->getLanguage()); const std::string hues_array_name = name + "_hues_array"; - std::vector hues_array(g.gamut_cusp_table.total_size); - for (int i = 0; i < g.gamut_cusp_table.total_size; ++i) - { - hues_array[i] = g.gamut_cusp_table.table[i][2]; - } - ss.declareFloatArrayConst(hues_array_name, (int) hues_array.size(), hues_array.data()); + ss.declareFloatArrayConst(hues_array_name, (int) g.hue_table.total_size, g.hue_table.data()); - ss.newLine() << ss.float2Keyword() << " " << name << "_sample(float h)"; + ss.newLine() << ss.float3Keyword() << " " << name << "_sample(float h)"; ss.newLine() << "{"; ss.indent(); - ss.newLine() << ss.floatDecl("i_lo") << " = 0;"; - ss.newLine() << ss.floatDecl("i_hi") << " = " << g.gamut_cusp_table.base_index + g.gamut_cusp_table.size << ";"; + ss.newLine() << ss.intDecl("i") << " = " << ss.intKeyword() << "(h) + " << g.gamut_cusp_table.base_index << ";"; - ss.newLine() << ss.floatDecl("hwrap") << " = h;"; - ss.newLine() << "hwrap = hwrap - floor(hwrap / 360.0) * 360.0;"; - ss.newLine() << "hwrap = (hwrap < 0.0) ? hwrap + 360.0 : hwrap;"; - ss.newLine() << ss.intDecl("i") << " = " << ss.intKeyword() << "(hwrap + " << g.gamut_cusp_table.base_index << ");"; + ss.newLine() << ss.intDecl("i_lo") << " = " << ss.intKeyword() << "(max(" + << ss.floatKeyword() << "(" << g.gamut_cusp_table.lower_wrap_index << "), " + << ss.floatKeyword() << "(i + " << g.hue_linearity_search_range[0] << ")));"; + ss.newLine() << ss.intDecl("i_hi") << " = " << ss.intKeyword() << "(min(" + << ss.floatKeyword() << "(" << g.gamut_cusp_table.upper_wrap_index << "), " + << ss.floatKeyword() << "(i + " << g.hue_linearity_search_range[1] << ")));"; ss.newLine() << "while (i_lo + 1 < i_hi)"; ss.newLine() << "{"; @@ -764,7 +883,7 @@ std::string _Add_Cusp_table( ss.newLine() << "i_hi = i;"; ss.dedent(); ss.newLine() << "}"; - ss.newLine() << "i = " << ss.intKeyword() << "((i_lo + i_hi) / 2.0);"; + ss.newLine() << "i = (i_lo + i_hi) / 2;"; ss.dedent(); ss.newLine() << "}"; @@ -780,98 +899,8 @@ std::string _Add_Cusp_table( ss.newLine() << ss.float3Decl("hi") << " = " << ss.sampleTex2D(name, ss.float2Const(std::string("(i_hi + 0.5) / ") + std::to_string(g.gamut_cusp_table.total_size), "0.5")) << ".rgb;"; } - ss.newLine() << ss.floatDecl("t") << " = (h - lo.b) / (hi.b - lo.b);"; - ss.newLine() << ss.floatDecl("cuspJ") << " = " << ss.lerp("lo.r", "hi.r", "t") << ";"; - ss.newLine() << ss.floatDecl("cuspM") << " = " << ss.lerp("lo.g", "hi.g", "t") << ";"; - - ss.newLine() << "return " << ss.float2Const("cuspJ", "cuspM") << ";"; - - ss.dedent(); - ss.newLine() << "}"; - - shaderCreator->addToHelperShaderCode(ss.string().c_str()); - - return name; -} - -std::string _Add_Gamma_table( - GpuShaderCreatorRcPtr & shaderCreator, - unsigned resourceIndex, - const ACES2::GamutCompressParams & g) -{ - // Reserve name - std::ostringstream resName; - resName << shaderCreator->getResourcePrefix() - << std::string("_") - << std::string("upper_hull_gamma_table_") - << resourceIndex; - - // Note: Remove potentially problematic double underscores from GLSL resource names. - std::string name(resName.str()); - StringUtils::ReplaceInPlace(name, "__", "_"); - - // Register texture - GpuShaderDesc::TextureDimensions dimensions = GpuShaderDesc::TEXTURE_1D; - if (shaderCreator->getLanguage() == GPU_LANGUAGE_GLSL_ES_1_0 - || shaderCreator->getLanguage() == GPU_LANGUAGE_GLSL_ES_3_0 - || !shaderCreator->getAllowTexture1D()) - { - dimensions = GpuShaderDesc::TEXTURE_2D; - } - - shaderCreator->addTexture( - name.c_str(), - GpuShaderText::getSamplerName(name).c_str(), - g.gamut_cusp_table.total_size, - 1, - GpuShaderCreator::TEXTURE_RED_CHANNEL, - dimensions, - INTERP_NEAREST, - &(g.upper_hull_gamma_table.table[0])); - - - if (dimensions == GpuShaderDesc::TEXTURE_1D) - { - GpuShaderText ss(shaderCreator->getLanguage()); - ss.declareTex1D(name); - shaderCreator->addToDeclareShaderCode(ss.string().c_str()); - } - else - { - GpuShaderText ss(shaderCreator->getLanguage()); - ss.declareTex2D(name); - shaderCreator->addToDeclareShaderCode(ss.string().c_str()); - } - - // Sampler function - GpuShaderText ss(shaderCreator->getLanguage()); - - ss.newLine() << ss.floatKeyword() << " " << name << "_sample(float h)"; - ss.newLine() << "{"; - ss.indent(); - - ss.newLine() << ss.floatDecl("hwrap") << " = h;"; - ss.newLine() << "hwrap = hwrap - floor(hwrap / 360.0) * 360.0;"; - ss.newLine() << "hwrap = (hwrap < 0.0) ? hwrap + 360.0 : hwrap;"; - - ss.newLine() << ss.floatDecl("i_lo") << " = floor(hwrap) + " << g.upper_hull_gamma_table.base_index << ";"; - ss.newLine() << ss.floatDecl("i_hi") << " = (i_lo + 1);"; - ss.newLine() << "i_hi = i_hi - floor(i_hi / 360.0) * 360.0;"; - - ss.newLine() << ss.floatDecl("base_hue") << " = i_lo - " << g.upper_hull_gamma_table.base_index << ";"; - - if (dimensions == GpuShaderDesc::TEXTURE_1D) - { - ss.newLine() << ss.floatDecl("lo") << " = " << ss.sampleTex1D(name, std::string("(i_lo + 0.5) / ") + std::to_string(g.upper_hull_gamma_table.total_size)) << ".r;"; - ss.newLine() << ss.floatDecl("hi") << " = " << ss.sampleTex1D(name, std::string("(i_hi + 0.5) / ") + std::to_string(g.upper_hull_gamma_table.total_size)) << ".r;"; - } - else - { - ss.newLine() << ss.floatDecl("lo") << " = " << ss.sampleTex2D(name, ss.float2Const(std::string("(i_lo + 0.5) / ") + std::to_string(g.upper_hull_gamma_table.total_size), "0.5")) << ".r;"; - ss.newLine() << ss.floatDecl("hi") << " = " << ss.sampleTex2D(name, ss.float2Const(std::string("(i_hi + 0.5) / ") + std::to_string(g.upper_hull_gamma_table.total_size), "0.5")) << ".r;"; - } - - ss.newLine() << ss.floatDecl("t") << " = hwrap - base_hue;"; + ss.newLine() << ss.floatDecl("t") << " = (h - " << hues_array_name << "[i_hi - 1]) / " + "(" << hues_array_name << "[i_hi]" << " - " << hues_array_name << "[i_hi - 1]);"; ss.newLine() << "return " << ss.lerp("lo", "hi", "t") << ";"; ss.dedent(); @@ -885,7 +914,7 @@ std::string _Add_Gamma_table( std::string _Add_Focus_Gain_func( GpuShaderCreatorRcPtr & shaderCreator, unsigned resourceIndex, - const ACES2::GamutCompressParams & g) + const ACES2::SharedCompressionParameters & s) { // Reserve name std::ostringstream resName; @@ -904,13 +933,15 @@ std::string _Add_Focus_Gain_func( ss.newLine() << "{"; ss.indent(); - ss.newLine() << ss.floatDecl("thr") << " = " << ss.lerp("cuspJ", std::to_string(g.limit_J_max), std::to_string(ACES2::focus_gain_blend)) << ";"; + ss.newLine() << ss.floatDecl("thr") << " = " << ss.lerp("cuspJ", std::to_string(s.limit_J_max), std::to_string(ACES2::focus_gain_blend)) << ";"; - ss.newLine() << "if (J > thr)"; + ss.newLine() << "if (J > thr)"; // TODO threshold ss.newLine() << "{"; ss.indent(); - ss.newLine() << ss.floatDecl("gain") << " = ( " << g.limit_J_max << " - thr) / max(0.0001, (" << g.limit_J_max << " - min(" << g.limit_J_max << ", J)));"; - ss.newLine() << "return pow(log(gain)/log(10.0), 1.0 / " << ACES2::focus_adjust_gain << ") + 1.0;"; + ss.newLine() << ss.floatDecl("gain") << " = ( " << s.limit_J_max << " - thr) / max(0.0001, " << s.limit_J_max << " - J);"; + ss.newLine() << "gain = log(gain)/log(10.0);"; // TODO log10(gain) but not all shading languages have log10() would log2(gain)/log2(10) be better? perhaps delegate to GpuShaderText? + //ss.newLine() << "return pow(gain, " << ACES2::focus_adjust_gain_inv << ") + 1.0;"; // TODO; remove once agreed on change + ss.newLine() << "return gain * gain + 1.0;"; ss.dedent(); ss.newLine() << "}"; ss.newLine() << "else"; @@ -931,7 +962,7 @@ std::string _Add_Focus_Gain_func( std::string _Add_Solve_J_Intersect_func( GpuShaderCreatorRcPtr & shaderCreator, unsigned resourceIndex, - const ACES2::GamutCompressParams & g) + const ACES2::SharedCompressionParameters & s) { // Reserve name std::ostringstream resName; @@ -950,54 +981,30 @@ std::string _Add_Solve_J_Intersect_func( ss.newLine() << "{"; ss.indent(); - ss.newLine() << ss.floatDecl("a") << " = " << "M / (focusJ * slope_gain);"; - ss.newLine() << ss.floatDecl("b") << " = 0.0;"; - ss.newLine() << ss.floatDecl("c") << " = 0.0;"; - ss.newLine() << ss.floatDecl("intersectJ") << " = 0.0;"; + ss.newLine() << ss.floatDecl("M_scaled") << " = M / slope_gain;"; + ss.newLine() << ss.floatDecl("a") << " = M_scaled / focusJ;"; ss.newLine() << "if (J < focusJ)"; ss.newLine() << "{"; ss.indent(); - ss.newLine() << "b = 1.0 - M / slope_gain;"; + ss.newLine() << ss.floatDecl("b") << " = 1.0 - M_scaled;"; + ss.newLine() << ss.floatDecl("c") << " = -J;"; + ss.newLine() << ss.floatDecl("det") << " = b * b - 4.f * a * c;"; + ss.newLine() << ss.floatDecl("root") << " = sqrt(det);"; + ss.newLine() << "return -2.0 * c / (b + root);"; ss.dedent(); ss.newLine() << "}"; ss.newLine() << "else"; ss.newLine() << "{"; ss.indent(); - ss.newLine() << "b = - (1.0 + M / slope_gain + " << g.limit_J_max << " * M / (focusJ * slope_gain));"; + ss.newLine() << ss.floatDecl("b") << " = - (1.0 + M_scaled + " << s.limit_J_max << " * a);"; + ss.newLine() << ss.floatDecl("c") << " = " << s.limit_J_max << " * M_scaled + J;"; + ss.newLine() << ss.floatDecl("det") << " = b * b - 4.f * a * c;"; + ss.newLine() << ss.floatDecl("root") << " = sqrt(det);"; + ss.newLine() << "return -2.0 * c / (b - root);"; ss.dedent(); ss.newLine() << "}"; - ss.newLine() << "if (J < focusJ)"; - ss.newLine() << "{"; - ss.indent(); - ss.newLine() << "c = -J;"; - ss.dedent(); - ss.newLine() << "}"; - ss.newLine() << "else"; - ss.newLine() << "{"; - ss.indent(); - ss.newLine() << "c = " << g.limit_J_max << " * M / slope_gain + J;"; - ss.dedent(); - ss.newLine() << "}"; - - ss.newLine() << ss.floatDecl("root") << " = sqrt(b * b - 4.0 * a * c);"; - - ss.newLine() << "if (J < focusJ)"; - ss.newLine() << "{"; - ss.indent(); - ss.newLine() << "intersectJ = 2.0 * c / (-b - root);"; - ss.dedent(); - ss.newLine() << "}"; - ss.newLine() << "else"; - ss.newLine() << "{"; - ss.indent(); - ss.newLine() << "intersectJ = 2.0 * c / (-b + root);"; - ss.dedent(); - ss.newLine() << "}"; - - ss.newLine() << "return intersectJ;"; - ss.dedent(); ss.newLine() << "}"; @@ -1009,8 +1016,7 @@ std::string _Add_Solve_J_Intersect_func( std::string _Add_Find_Gamut_Boundary_Intersection_func( GpuShaderCreatorRcPtr & shaderCreator, unsigned resourceIndex, - const ACES2::GamutCompressParams & g, - const std::string & solveJIntersectName) + const ACES2::SharedCompressionParameters & s) { // Reserve name std::ostringstream resName; @@ -1025,49 +1031,28 @@ std::string _Add_Find_Gamut_Boundary_Intersection_func( GpuShaderText ss(shaderCreator->getLanguage()); - ss.newLine() << ss.float3Keyword() << " " << name << "(" << ss.float3Keyword() << " JMh_s, " << ss.float2Keyword() << " JM_cusp_in, float J_focus, float slope_gain, float gamma_top, float gamma_bottom)"; + ss.newLine() << ss.floatKeyword() << " " << name << "(" << ss.float2Keyword() + << " JM_cusp, float gamma_top_inv, float gamma_bottom_inv, float J_intersect_source, float J_intersect_cusp, float slope)"; ss.newLine() << "{"; ss.indent(); - ss.newLine() << ss.float2Decl("JM_cusp") << " = " << ss.float2Const("JM_cusp_in.r", std::string("JM_cusp_in.g * (1.0 + ") + std::to_string(ACES2::smooth_m) + " * " + std::to_string(ACES2::smooth_cusps) + ")") << ";"; - - ss.newLine() << ss.floatDecl("J_intersect_source") << " = " << solveJIntersectName << "(JMh_s.r, JMh_s.g, J_focus, slope_gain);"; - ss.newLine() << ss.floatDecl("J_intersect_cusp") << " = " << solveJIntersectName << "(JM_cusp.r, JM_cusp.g, J_focus, slope_gain);"; - - ss.newLine() << ss.floatDecl("slope") << " = 0.0;"; - ss.newLine() << "if (J_intersect_source < J_focus)"; - ss.newLine() << "{"; - ss.indent(); - ss.newLine() << "slope = J_intersect_source * (J_intersect_source - J_focus) / (J_focus * slope_gain);"; - ss.dedent(); - ss.newLine() << "}"; - ss.newLine() << "else"; - ss.newLine() << "{"; - ss.indent(); - ss.newLine() << "slope = (" << g.limit_J_max << " - J_intersect_source) * (J_intersect_source - J_focus) / (J_focus * slope_gain);"; - ss.dedent(); - ss.newLine() << "}"; - - ss.newLine() << ss.floatDecl("M_boundary_lower ") << " = J_intersect_cusp * pow(J_intersect_source / J_intersect_cusp, 1.0 / gamma_bottom) / (JM_cusp.r / JM_cusp.g - slope);"; - ss.newLine() << ss.floatDecl("M_boundary_upper") << " = JM_cusp.g * (" << g.limit_J_max << " - J_intersect_cusp) * pow((" << g.limit_J_max << " - J_intersect_source) / (" << g.limit_J_max << " - J_intersect_cusp), 1.0 / gamma_top) / (slope * JM_cusp.g + " << g.limit_J_max << " - JM_cusp.r);"; + ss.newLine() << ss.floatDecl("M_boundary_lower") << " = J_intersect_cusp * pow(J_intersect_source / J_intersect_cusp, gamma_bottom_inv) / (JM_cusp.r / JM_cusp.g - slope);"; + ss.newLine() << ss.floatDecl("M_boundary_upper") << " = JM_cusp.g * (" << s.limit_J_max << " - J_intersect_cusp) * pow((" << s.limit_J_max << " - J_intersect_source) / (" << s.limit_J_max << " - J_intersect_cusp), gamma_top_inv) / (slope * JM_cusp.g + " << s.limit_J_max << " - JM_cusp.r);"; ss.newLine() << ss.floatDecl("smin") << " = 0.0;"; ss.newLine() << "{"; ss.indent(); - ss.newLine() << ss.floatDecl("a") << " = M_boundary_lower / JM_cusp.g;"; - ss.newLine() << ss.floatDecl("b") << " = M_boundary_upper / JM_cusp.g;"; - ss.newLine() << ss.floatDecl("s") << " = " << ACES2::smooth_cusps << ";"; + ss.newLine() << ss.floatDecl("a") << " = M_boundary_lower;"; + ss.newLine() << ss.floatDecl("b") << " = M_boundary_upper;"; + ss.newLine() << ss.floatDecl("s") << " = " << ACES2::smooth_cusps << " * JM_cusp.g;"; ss.newLine() << ss.floatDecl("h") << " = max(s - abs(a - b), 0.0) / s;"; - ss.newLine() << "smin = min(a, b) - h * h * h * s * (1.0 / 6.0);"; + ss.newLine() << "smin = min(a, b) - h * h * h * s * " << (1.0 / 6.0) << ";"; ss.dedent(); ss.newLine() << "}"; - ss.newLine() << ss.floatDecl("M_boundary") << " = JM_cusp.g * smin;"; - ss.newLine() << ss.floatDecl("J_boundary") << "= J_intersect_source + slope * M_boundary;"; - - ss.newLine() << "return " << ss.float3Const("J_boundary", "M_boundary", "J_intersect_source") << ";"; + ss.newLine() << "return smin;"; ss.dedent(); ss.newLine() << "}"; @@ -1077,19 +1062,17 @@ std::string _Add_Find_Gamut_Boundary_Intersection_func( return name; } -std::string _Add_Reach_Boundary_func( +std::string _Add_Compression_func( GpuShaderCreatorRcPtr & shaderCreator, unsigned resourceIndex, - const ACES2::GamutCompressParams & g, - const std::string & reachName, - const std::string & getFocusGainName, - const std::string & solveJIntersectName) + bool invert) { // Reserve name std::ostringstream resName; resName << shaderCreator->getResourcePrefix() << std::string("_") - << std::string("get_reach_boundary") + << std::string("remap_M") + << (invert ? std::string("_inv") : std::string("_fwd")) << resourceIndex; // Note: Remove potentially problematic double underscores from GLSL resource names. @@ -1098,102 +1081,47 @@ std::string _Add_Reach_Boundary_func( GpuShaderText ss(shaderCreator->getLanguage()); - ss.newLine() << ss.float3Keyword() << " " << name << "(float J, float M, float h, " << ss.float2Keyword() << " JMcusp, float focusJ)"; + ss.newLine() << ss.floatKeyword() << " " << name << "(float M, float gamut_boundary_M, float reach_boundary_M)"; ss.newLine() << "{"; ss.indent(); - ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(h);"; - ss.newLine() << ss.floatDecl("slope_gain") << " = " << g.limit_J_max << " * " << g.focus_dist << " * " << getFocusGainName << "(J, JMcusp.r);"; - ss.newLine() << ss.floatDecl("intersectJ") << " = " << solveJIntersectName << "(J, M, focusJ, slope_gain);"; + ss.newLine() << ss.floatDecl("boundary_ratio") << " = gamut_boundary_M / reach_boundary_M;"; + ss.newLine() << ss.floatDecl("proportion") << " = max(boundary_ratio, " << ACES2::compression_threshold << ");"; + ss.newLine() << ss.floatDecl("threshold") << " = proportion * gamut_boundary_M;"; - ss.newLine() << ss.floatDecl("slope") << " = 0.0;"; - ss.newLine() << "if (intersectJ < focusJ)"; + ss.newLine() << "if (proportion >= 1.0f || M <= threshold)"; ss.newLine() << "{"; ss.indent(); - ss.newLine() << "slope = intersectJ * (intersectJ - focusJ) / (focusJ * slope_gain);"; - ss.dedent(); - ss.newLine() << "}"; - ss.newLine() << "else"; - ss.newLine() << "{"; - ss.indent(); - ss.newLine() << "slope = (" << g.limit_J_max << " - intersectJ) * (intersectJ - focusJ) / (focusJ * slope_gain);"; - ss.dedent(); - ss.newLine() << "}"; - - ss.newLine() << ss.floatDecl("boundary") << " = " << g.limit_J_max << " * pow(intersectJ / " << g.limit_J_max << ", " << g.model_gamma << ") * reachMaxM / (" << g.limit_J_max << " - slope * reachMaxM);"; - - ss.newLine() << "return " << ss.float3Const("J", "boundary", "h") << ";"; - + ss.newLine() << "return M;"; ss.dedent(); ss.newLine() << "}"; + ss.newLine() << ss.floatDecl("m_offset") << " = M - threshold;"; + ss.newLine() << ss.floatDecl("gamut_offset") << " = gamut_boundary_M - threshold;"; + ss.newLine() << ss.floatDecl("reach_offset") << " = reach_boundary_M - threshold;"; - shaderCreator->addToHelperShaderCode(ss.string().c_str()); - - return name; -} - -std::string _Add_Compression_func( - GpuShaderCreatorRcPtr & shaderCreator, - unsigned resourceIndex, - bool invert) -{ - // Reserve name - std::ostringstream resName; - resName << shaderCreator->getResourcePrefix() - << std::string("_") - << std::string("compression") - << (invert ? std::string("_inv") : std::string("_fwd")) - << resourceIndex; - - // Note: Remove potentially problematic double underscores from GLSL resource names. - std::string name(resName.str()); - StringUtils::ReplaceInPlace(name, "__", "_"); - - GpuShaderText ss(shaderCreator->getLanguage()); - - ss.newLine() << ss.floatKeyword() << " " << name << "(float v, float thr, float lim)"; - ss.newLine() << "{"; - ss.indent(); - - ss.newLine() << ss.floatDecl("s") << " = (lim - thr) * (1.0 - thr) / (lim - 1.0);"; - ss.newLine() << ss.floatDecl("nd") << " = (v - thr) / s;"; - - - ss.newLine() << ss.floatDecl("vCompressed") << " = 0.0;"; + ss.newLine() << ss.floatDecl("scale") << " = reach_offset / ((reach_offset / gamut_offset) - 1.0f);"; + ss.newLine() << ss.floatDecl("nd") << " = m_offset / scale;"; if (invert) { - ss.newLine() << "if (v < thr || lim <= 1.0001 || v > thr + s)"; + ss.newLine() << "if (nd >= 1.0f)"; // TODO: could be done branchless? ss.newLine() << "{"; ss.indent(); - ss.newLine() << "vCompressed = v;"; + ss.newLine() << "return threshold + scale;"; ss.dedent(); ss.newLine() << "}"; ss.newLine() << "else"; ss.newLine() << "{"; ss.indent(); - ss.newLine() << "vCompressed = thr + s * (-nd / (nd - 1));"; + ss.newLine() << "return threshold + scale * -(nd / (nd - 1.0f));"; ss.dedent(); ss.newLine() << "}"; } else { - ss.newLine() << "if (v < thr || lim <= 1.0001)"; - ss.newLine() << "{"; - ss.indent(); - ss.newLine() << "vCompressed = v;"; - ss.dedent(); - ss.newLine() << "}"; - ss.newLine() << "else"; - ss.newLine() << "{"; - ss.indent(); - ss.newLine() << "vCompressed = thr + s * nd / (1.0 + nd);"; - ss.dedent(); - ss.newLine() << "}"; + ss.newLine() << "return threshold + scale * nd / (1.0f + nd);"; } - ss.newLine() << "return vCompressed;"; - ss.dedent(); ss.newLine() << "}"; @@ -1205,13 +1133,12 @@ std::string _Add_Compression_func( std::string _Add_Compress_Gamut_func( GpuShaderCreatorRcPtr & shaderCreator, unsigned resourceIndex, + const ACES2::SharedCompressionParameters & s, const ACES2::GamutCompressParams & g, - const std::string & cuspName, const std::string & getFocusGainName, - const std::string & gammaName, const std::string & findGamutBoundaryIntersectionName, - const std::string & getReachBoundaryName, - const std::string & compressionName) + const std::string & compressionName, + const std::string & solveJIntersectName) { // Reserve name std::ostringstream resName; @@ -1226,7 +1153,7 @@ std::string _Add_Compress_Gamut_func( GpuShaderText ss(shaderCreator->getLanguage()); - ss.newLine() << ss.float3Keyword() << " " << name << "(" << ss.float3Keyword() << " JMh, float Jx)"; + ss.newLine() << ss.float3Keyword() << " " << name << "(" << ss.float3Keyword() << " JMh, float Jx, " << ss.float3Keyword() << " JMGcusp, float reachMaxM)"; ss.newLine() << "{"; ss.indent(); @@ -1234,7 +1161,7 @@ std::string _Add_Compress_Gamut_func( ss.newLine() << ss.floatDecl("M") << " = JMh.g;"; ss.newLine() << ss.floatDecl("h") << " = JMh.b;"; - ss.newLine() << "if (M < 0.0001 || J > " << g.limit_J_max << ")"; + ss.newLine() << "if (M <= 0.0 || J > " << s.limit_J_max << ")"; ss.newLine() << "{"; ss.indent(); ss.newLine() << "return " << ss.float3Const("J", "0.0", "h") << ";"; @@ -1244,40 +1171,36 @@ std::string _Add_Compress_Gamut_func( ss.newLine() << "{"; ss.indent(); - ss.newLine() << ss.float2Decl("project_from") << " = " << ss.float2Const("J", "M") << ";"; - ss.newLine() << ss.float2Decl("JMcusp") << " = " << cuspName << "_sample(h);"; + ss.newLine() << ss.float2Decl("JMcusp") << " = JMGcusp.rg;"; - ss.newLine() << ss.floatDecl("focusJ") << " = " << ss.lerp("JMcusp.r", std::to_string(g.mid_J), std::string("min(1.0, ") + std::to_string(ACES2::cusp_mid_blend) + " - (JMcusp.r / " + std::to_string(g.limit_J_max)) << "));"; - ss.newLine() << ss.floatDecl("slope_gain") << " = " << g.limit_J_max << " * " << g.focus_dist << " * " << getFocusGainName << "(Jx, JMcusp.r);"; + ss.newLine() << ss.floatDecl("focusJ") << " = " << ss.lerp("JMcusp.r", std::to_string(g.mid_J), std::string("min(1.0, ") + std::to_string(ACES2::cusp_mid_blend) + " - (JMcusp.r / " + std::to_string(s.limit_J_max)) << "));"; + ss.newLine() << ss.floatDecl("slope_gain") << " = " << s.limit_J_max * g.focus_dist << " * " << getFocusGainName << "(Jx, JMcusp.r);"; + ss.newLine() << ss.floatDecl("J_intersect_source") << " = " << solveJIntersectName << "(JMh.r, JMh.g, focusJ, slope_gain);"; + ss.newLine() << ss.floatDecl("gamut_slope") << " = (J_intersect_source < focusJ) ? J_intersect_source : (" << s.limit_J_max << " - J_intersect_source);"; + ss.newLine() << "gamut_slope = gamut_slope * (J_intersect_source - focusJ) / (focusJ * slope_gain);"; - ss.newLine() << ss.floatDecl("gamma_top") << " = " << gammaName << "_sample(h);"; - ss.newLine() << ss.floatDecl("gamma_bottom") << " = " << g.lower_hull_gamma << ";"; + ss.newLine() << ss.floatDecl("gamma_top_inv") << " = JMGcusp.b;"; + ss.newLine() << ss.floatDecl("gamma_bottom_inv") << " = " << g.lower_hull_gamma_inv << ";"; // TODO move to where it is used - ss.newLine() << ss.float3Decl("boundaryReturn") << " = " << findGamutBoundaryIntersectionName << "(" << ss.float3Const("J", "M", "h") << ", JMcusp, focusJ, slope_gain, gamma_top, gamma_bottom);"; - ss.newLine() << ss.float2Decl("JMboundary") << " = " << ss.float2Const("boundaryReturn.r", "boundaryReturn.g") << ";"; - ss.newLine() << ss.float2Decl("project_to") << " = " << ss.float2Const("boundaryReturn.b", "0.0") << ";"; + ss.newLine() << ss.floatDecl("J_intersect_cusp") << " = " << solveJIntersectName << "(JMcusp.r, JMcusp.g, focusJ, slope_gain);"; + ss.newLine() << ss.floatDecl("gamutBoundaryM") << " = " << findGamutBoundaryIntersectionName + << "(JMcusp, gamma_top_inv, gamma_bottom_inv, J_intersect_source, J_intersect_cusp, gamut_slope);"; - ss.newLine() << "if (JMboundary.g <= 0.0)"; + ss.newLine() << "if (gamutBoundaryM <= 0.0)"; ss.newLine() << "{"; ss.indent(); ss.newLine() << "return " << ss.float3Const("J", "0.0", "h") << ";"; ss.dedent(); ss.newLine() << "}"; - ss.newLine() << ss.float3Decl("reachBoundary") << " = " << getReachBoundaryName << "(JMboundary.r, JMboundary.g, h, JMcusp, focusJ);"; - - ss.newLine() << ss.floatDecl("difference") << " = max(1.0001, reachBoundary.g / JMboundary.g);"; - ss.newLine() << ss.floatDecl("threshold") << " = max(" << ACES2::compression_threshold << ", 1.0 / difference);"; + ss.newLine() << ss.floatDecl("reachBoundaryM") << " = " << s.limit_J_max << " * pow(J_intersect_source / " << s.limit_J_max << ", " << s.model_gamma_inv << ");"; + ss.newLine() << "reachBoundaryM = reachBoundaryM / ((" << s.limit_J_max << " / reachMaxM) - gamut_slope);"; - ss.newLine() << ss.floatDecl("v") << " = project_from.g / JMboundary.g;"; - ss.newLine() << "v = " << compressionName << "(v, threshold, difference);"; + ss.newLine() << ss.floatDecl("remapped_M") << " = " << compressionName << "(M, gamutBoundaryM, reachBoundaryM);"; + ss.newLine() << ss.floatDecl("remapped_J") << " = J_intersect_source + remapped_M * gamut_slope;"; - ss.newLine() << ss.float2Decl("JMcompressed") << " = " << ss.float2Const( - "project_to.r + v * (JMboundary.r - project_to.r)", - "project_to.g + v * (JMboundary.g - project_to.g)" - ) << ";"; - ss.newLine() << "return " << ss.float3Const("JMcompressed.r", "JMcompressed.g", "h") << ";"; + ss.newLine() << "return " << ss.float3Const("remapped_J", "remapped_M", "h") << ";"; ss.dedent(); ss.newLine() << "}"; @@ -1294,58 +1217,56 @@ void _Add_Gamut_Compress_Fwd_Shader( GpuShaderCreatorRcPtr & shaderCreator, GpuShaderText & ss, unsigned int resourceIndex, - const ACES2::GamutCompressParams & g, - const std::string & reachName) + const ACES2::SharedCompressionParameters & s, + const ACES2::GamutCompressParams & g) { std::string cuspName = _Add_Cusp_table(shaderCreator, resourceIndex, g); - std::string gammaName = _Add_Gamma_table(shaderCreator, resourceIndex, g); - std::string getFocusGainName = _Add_Focus_Gain_func(shaderCreator, resourceIndex, g); - std::string solveJIntersectName = _Add_Solve_J_Intersect_func(shaderCreator, resourceIndex, g); - std::string findGamutBoundaryIntersectionName = _Add_Find_Gamut_Boundary_Intersection_func(shaderCreator, resourceIndex, g, solveJIntersectName); - std::string getReachBoundaryName = _Add_Reach_Boundary_func(shaderCreator, resourceIndex, g, reachName, getFocusGainName, solveJIntersectName); + std::string getFocusGainName = _Add_Focus_Gain_func(shaderCreator, resourceIndex, s); + std::string solveJIntersectName = _Add_Solve_J_Intersect_func(shaderCreator, resourceIndex, s); + std::string findGamutBoundaryIntersectionName = _Add_Find_Gamut_Boundary_Intersection_func(shaderCreator, resourceIndex, s); std::string compressionName = _Add_Compression_func(shaderCreator, resourceIndex, false); - std::string gamutCompressName = _Add_Compress_Gamut_func(shaderCreator, resourceIndex, g, cuspName, getFocusGainName, gammaName, findGamutBoundaryIntersectionName, getReachBoundaryName, compressionName); + std::string gamutCompressName = _Add_Compress_Gamut_func(shaderCreator, resourceIndex, s, g, getFocusGainName, findGamutBoundaryIntersectionName, compressionName, solveJIntersectName); const std::string pxl(shaderCreator->getPixelName()); - ss.newLine() << pxl << ".rgb = " << gamutCompressName << "(" << pxl << ".rgb, " << pxl << ".r);"; + ss.newLine() << ss.float3Decl("JMGcusp") << " = " << cuspName << "_sample(" << pxl << ".b);"; + ss.newLine() << pxl << ".rgb = " << gamutCompressName << "(" << pxl << ".rgb, " << pxl << ".r, JMGcusp, reachMaxM);"; } void _Add_Gamut_Compress_Inv_Shader( GpuShaderCreatorRcPtr & shaderCreator, GpuShaderText & ss, unsigned int resourceIndex, - const ACES2::GamutCompressParams & g, - const std::string & reachName) + const ACES2::SharedCompressionParameters & s, + const ACES2::GamutCompressParams & g) { std::string cuspName = _Add_Cusp_table(shaderCreator, resourceIndex, g); - std::string gammaName = _Add_Gamma_table(shaderCreator, resourceIndex, g); - std::string getFocusGainName = _Add_Focus_Gain_func(shaderCreator, resourceIndex, g); - std::string solveJIntersectName = _Add_Solve_J_Intersect_func(shaderCreator, resourceIndex, g); - std::string findGamutBoundaryIntersectionName = _Add_Find_Gamut_Boundary_Intersection_func(shaderCreator, resourceIndex, g, solveJIntersectName); - std::string getReachBoundaryName = _Add_Reach_Boundary_func(shaderCreator, resourceIndex, g, reachName, getFocusGainName, solveJIntersectName); + std::string getFocusGainName = _Add_Focus_Gain_func(shaderCreator, resourceIndex, s); + std::string solveJIntersectName = _Add_Solve_J_Intersect_func(shaderCreator, resourceIndex, s); + std::string findGamutBoundaryIntersectionName = _Add_Find_Gamut_Boundary_Intersection_func(shaderCreator, resourceIndex, s); std::string compressionName = _Add_Compression_func(shaderCreator, resourceIndex, true); - std::string gamutCompressName = _Add_Compress_Gamut_func(shaderCreator, resourceIndex, g, cuspName, getFocusGainName, gammaName, findGamutBoundaryIntersectionName, getReachBoundaryName, compressionName); + std::string gamutCompressName = _Add_Compress_Gamut_func(shaderCreator, resourceIndex, s, g, getFocusGainName, findGamutBoundaryIntersectionName, compressionName, solveJIntersectName); const std::string pxl(shaderCreator->getPixelName()); - ss.newLine() << ss.float2Decl("JMcusp") << " = " << cuspName << "_sample(" << pxl << ".b);"; + ss.newLine() << ss.float3Decl("JMGcusp") << " = " << cuspName << "_sample(" << pxl << ".b);"; + ss.newLine() << ss.floatDecl("Jx") << " = " << pxl << ".r;"; ss.newLine() << ss.float3Decl("unCompressedJMh") << ";"; // Analytic inverse below threshold - ss.newLine() << "if (Jx <= " << ss.lerp("JMcusp.r", std::to_string(g.limit_J_max), std::to_string(ACES2::focus_gain_blend)) << ")"; + ss.newLine() << "if (Jx <= " << ss.lerp("JMGcusp.r", std::to_string(s.limit_J_max), std::to_string(ACES2::focus_gain_blend)) << ")"; ss.newLine() << "{"; ss.indent(); - ss.newLine() << "unCompressedJMh = " << gamutCompressName << "(" << pxl << ".rgb, Jx);"; + ss.newLine() << "unCompressedJMh = " << gamutCompressName << "(" << pxl << ".rgb, Jx, JMGcusp, reachMaxM);"; ss.dedent(); ss.newLine() << "}"; // Approximation above threshold ss.newLine() << "else"; ss.newLine() << "{"; ss.indent(); - ss.newLine() << "Jx = " << gamutCompressName << "(" << pxl << ".rgb, Jx).r;"; - ss.newLine() << "unCompressedJMh = " << gamutCompressName << "(" << pxl << ".rgb, Jx);"; + ss.newLine() << "Jx = " << gamutCompressName << "(" << pxl << ".rgb, Jx, JMGcusp, reachMaxM).r;"; + ss.newLine() << "unCompressedJMh = " << gamutCompressName << "(" << pxl << ".rgb, Jx, JMGcusp, reachMaxM);"; ss.dedent(); ss.newLine() << "}"; @@ -1368,8 +1289,6 @@ void Add_ACES_OutputTransform_Fwd_Shader( const float white_x = (float) params[7]; const float white_y = (float) params[8]; - const Primaries in_primaries = ACES_AP0::primaries; - const Primaries lim_primaries = { {red_x , red_y }, {green_x, green_y}, @@ -1377,31 +1296,41 @@ void Add_ACES_OutputTransform_Fwd_Shader( {white_x, white_y} }; - ACES2::JMhParams pIn = ACES2::init_JMhParams(in_primaries); - ACES2::JMhParams pLim = ACES2::init_JMhParams(lim_primaries); - ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance); - ACES2::ChromaCompressParams c = ACES2::init_ChromaCompressParams(peak_luminance); - const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, lim_primaries); + const ACES2::JMhParams pIn = ACES2::init_JMhParams(ACES_AP0::primaries); + const ACES2::JMhParams pLim = ACES2::init_JMhParams(lim_primaries); + const ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance); + const ACES2::JMhParams reachGamut = ACES2::init_JMhParams(ACES_AP1::primaries); + const ACES2::SharedCompressionParameters s = ACES2::init_SharedCompressionParams(peak_luminance, pIn, reachGamut); + const ACES2::ChromaCompressParams c = ACES2::init_ChromaCompressParams(peak_luminance, t); + const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, pIn, pLim, t, s, reachGamut); unsigned resourceIndex = shaderCreator->getNextResourceIndex(); - std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, g.reach_m_table); + const std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table); + const std::string tonescaleName_Fwd = _Add_Tonescale_func(shaderCreator, resourceIndex, false, pIn, t); + const std::string pxl(shaderCreator->getPixelName()); ss.newLine() << ""; ss.newLine() << "// Add RGB to JMh"; ss.newLine() << ""; - ss.newLine() << "{"; - ss.indent(); - _Add_RGB_to_JMh_Shader(shaderCreator, ss, pIn); - ss.dedent(); - ss.newLine() << "}"; + _Add_RGB_to_JMh_Shader(shaderCreator, ss, pIn); + _Add_SinCos_Shader(shaderCreator, ss); + ss.newLine() << ""; ss.newLine() << "// Add ToneScale and ChromaCompress (fwd)"; ss.newLine() << ""; + + ss.newLine() << ss.floatDecl("J_ts") << " = " << tonescaleName_Fwd << "(" << pxl << ".r);"; + + ss.newLine() << "// Sample tables (fwd)"; + ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(" << pxl << ".b);"; + + ss.newLine() << ""; + ss.newLine() << "{"; ss.indent(); - _Add_Tonescale_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, pIn, t, c, reachName); + _Add_Tonescale_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, s, c); ss.dedent(); ss.newLine() << "}"; @@ -1410,7 +1339,7 @@ void Add_ACES_OutputTransform_Fwd_Shader( ss.newLine() << ""; ss.newLine() << "{"; ss.indent(); - _Add_Gamut_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, g, reachName); + _Add_Gamut_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, s, g); ss.dedent(); ss.newLine() << "}"; @@ -1441,8 +1370,6 @@ void Add_ACES_OutputTransform_Inv_Shader( const float white_x = (float) params[7]; const float white_y = (float) params[8]; - const Primaries in_primaries = ACES_AP0::primaries; - const Primaries lim_primaries = { {red_x , red_y }, {green_x, green_y}, @@ -1450,40 +1377,44 @@ void Add_ACES_OutputTransform_Inv_Shader( {white_x, white_y} }; - ACES2::JMhParams pIn = ACES2::init_JMhParams(in_primaries); - ACES2::JMhParams pLim = ACES2::init_JMhParams(lim_primaries); - ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance); - ACES2::ChromaCompressParams c = ACES2::init_ChromaCompressParams(peak_luminance); - const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, lim_primaries); + const ACES2::JMhParams pIn = ACES2::init_JMhParams(ACES_AP0::primaries); + const ACES2::JMhParams pLim = ACES2::init_JMhParams(lim_primaries); + const ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance); + const ACES2::JMhParams reachGamut = ACES2::init_JMhParams(ACES_AP1::primaries); + const ACES2::SharedCompressionParameters s = ACES2::init_SharedCompressionParams(peak_luminance, pIn, reachGamut); + const ACES2::ChromaCompressParams c = ACES2::init_ChromaCompressParams(peak_luminance, t); + const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, pIn, pLim, t, s, reachGamut); unsigned resourceIndex = shaderCreator->getNextResourceIndex(); - - std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, c.reach_m_table); + const std::string pxl(shaderCreator->getPixelName()); + + std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table); + std::string tonescaleName_Inv = _Add_Tonescale_func(shaderCreator, resourceIndex, true, pIn, t); ss.newLine() << ""; ss.newLine() << "// Add RGB to JMh"; ss.newLine() << ""; - ss.newLine() << "{"; - ss.indent(); - _Add_RGB_to_JMh_Shader(shaderCreator, ss, pLim); - ss.dedent(); - ss.newLine() << "}"; + _Add_RGB_to_JMh_Shader(shaderCreator, ss, pLim); + _Add_SinCos_Shader(shaderCreator, ss); + + ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(" << pxl << ".b);"; ss.newLine() << ""; ss.newLine() << "// Add GamutCompress (inv)"; ss.newLine() << ""; ss.newLine() << "{"; ss.indent(); - _Add_Gamut_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, g, reachName); + _Add_Gamut_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, s, g); ss.dedent(); ss.newLine() << "}"; ss.newLine() << ""; ss.newLine() << "// Add ToneScale and ChromaCompress (inv)"; ss.newLine() << ""; + ss.newLine() << ss.floatDecl("J") << " = " << tonescaleName_Inv << "(" << pxl << ".r);"; ss.newLine() << "{"; ss.indent(); - _Add_Tonescale_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, pIn, t, c, reachName); + _Add_Tonescale_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, s, c); ss.dedent(); ss.newLine() << "}"; @@ -1544,7 +1475,8 @@ void Add_JMh_to_RGB_Shader( }; ACES2::JMhParams p = ACES2::init_JMhParams(primaries); - + _Add_WrapHueChannel_Shader(shaderCreator, ss); + _Add_SinCos_Shader(shaderCreator, ss); _Add_JMh_to_RGB_Shader(shaderCreator, ss, p); } @@ -1555,15 +1487,25 @@ void Add_Tonescale_Compress_Fwd_Shader( { const float peak_luminance = (float) params[0]; - ACES2::JMhParams p = ACES2::init_JMhParams(ACES_AP0::primaries); - ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance); - ACES2::ChromaCompressParams c = ACES2::init_ChromaCompressParams(peak_luminance); + const ACES2::JMhParams p = ACES2::init_JMhParams(ACES_AP0::primaries); + const ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance); + const ACES2::JMhParams reachGamut = ACES2::init_JMhParams(ACES_AP1::primaries); + const ACES2::SharedCompressionParameters s = ACES2::init_SharedCompressionParams(peak_luminance, p, reachGamut); + const ACES2::ChromaCompressParams c = ACES2::init_ChromaCompressParams(peak_luminance, t); unsigned resourceIndex = shaderCreator->getNextResourceIndex(); + const std::string pxl(shaderCreator->getPixelName()); - std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, c.reach_m_table); + const std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table); + const std::string tonescaleName_Fwd = _Add_Tonescale_func(shaderCreator, resourceIndex, false, p, t); - _Add_Tonescale_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, p, t, c, reachName); + _Add_WrapHueChannel_Shader(shaderCreator, ss); + _Add_SinCos_Shader(shaderCreator, ss); + + ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(" << pxl << ".b);"; + ss.newLine() << ss.floatDecl("J_ts") << " = " << tonescaleName_Fwd << "(" << pxl << ".r);"; + + _Add_Tonescale_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, s, c); } void Add_Tonescale_Compress_Inv_Shader( @@ -1573,15 +1515,25 @@ void Add_Tonescale_Compress_Inv_Shader( { const float peak_luminance = (float) params[0]; - ACES2::JMhParams p = ACES2::init_JMhParams(ACES_AP0::primaries); - ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance); - ACES2::ChromaCompressParams c = ACES2::init_ChromaCompressParams(peak_luminance); + const ACES2::JMhParams p = ACES2::init_JMhParams(ACES_AP0::primaries); + const ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance); + const ACES2::JMhParams reachGamut = ACES2::init_JMhParams(ACES_AP1::primaries); + const ACES2::SharedCompressionParameters s = ACES2::init_SharedCompressionParams(peak_luminance, p, reachGamut); + const ACES2::ChromaCompressParams c = ACES2::init_ChromaCompressParams(peak_luminance, t); unsigned resourceIndex = shaderCreator->getNextResourceIndex(); + const std::string pxl(shaderCreator->getPixelName()); - std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, c.reach_m_table); + const std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table); + std::string tonescaleName_Inv = _Add_Tonescale_func(shaderCreator, resourceIndex, true, p, t); - _Add_Tonescale_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, p, t, c, reachName); + _Add_WrapHueChannel_Shader(shaderCreator, ss); + _Add_SinCos_Shader(shaderCreator, ss); + + ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(" << pxl << ".b);"; + ss.newLine() << ss.floatDecl("J") << " = " << tonescaleName_Inv << "(" << pxl << ".r);"; + + _Add_Tonescale_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, s, c); } void Add_Gamut_Compress_Fwd_Shader( @@ -1607,13 +1559,24 @@ void Add_Gamut_Compress_Fwd_Shader( {white_x, white_y} }; - const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, primaries); + const ACES2::JMhParams pIn = ACES2::init_JMhParams(ACES_AP0::primaries); + const ACES2::JMhParams pLim = ACES2::init_JMhParams(primaries); + const ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance); + const ACES2::JMhParams reachGamut = ACES2::init_JMhParams(ACES_AP1::primaries); + const ACES2::SharedCompressionParameters s = ACES2::init_SharedCompressionParams(peak_luminance, pIn, reachGamut); + const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, pIn, pLim, t, s, reachGamut); unsigned resourceIndex = shaderCreator->getNextResourceIndex(); + const std::string pxl(shaderCreator->getPixelName()); - std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, g.reach_m_table); + const std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table); - _Add_Gamut_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, g, reachName); + _Add_WrapHueChannel_Shader(shaderCreator, ss); + _Add_SinCos_Shader(shaderCreator, ss); + + ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(" << pxl << ".b);"; + + _Add_Gamut_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, s, g); } void Add_Gamut_Compress_Inv_Shader( @@ -1639,13 +1602,24 @@ void Add_Gamut_Compress_Inv_Shader( {white_x, white_y} }; - const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, primaries); + const ACES2::JMhParams pIn = ACES2::init_JMhParams(ACES_AP0::primaries); + const ACES2::JMhParams pLim = ACES2::init_JMhParams(primaries); + const ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance); + const ACES2::JMhParams reachGamut = ACES2::init_JMhParams(ACES_AP1::primaries); + const ACES2::SharedCompressionParameters s = ACES2::init_SharedCompressionParams(peak_luminance, pIn, reachGamut); + const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, pIn, pLim, t, s, reachGamut); unsigned resourceIndex = shaderCreator->getNextResourceIndex(); + const std::string pxl(shaderCreator->getPixelName()); + + const std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table); + + _Add_WrapHueChannel_Shader(shaderCreator, ss); + _Add_SinCos_Shader(shaderCreator, ss); - std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, g.reach_m_table); + ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(" << pxl << ".b);"; - _Add_Gamut_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, g, reachName); + _Add_Gamut_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, s, g); } void Add_Surround_10_Fwd_Shader(GpuShaderCreatorRcPtr & shaderCreator, GpuShaderText & ss, float gamma) diff --git a/src/apps/ocioconvert/main.cpp b/src/apps/ocioconvert/main.cpp index 60305e06ec..e8071c76fa 100644 --- a/src/apps/ocioconvert/main.cpp +++ b/src/apps/ocioconvert/main.cpp @@ -50,6 +50,7 @@ int main(int argc, const char **argv) std::vector stringAttrs; std::string outputDepth; + std::string inputconfig; bool usegpu = false; bool usegpuLegacy = false; @@ -85,6 +86,7 @@ int main(int argc, const char **argv) "--h", &help, "Display the help and exit", "--help", &help, "Display the help and exit", "-v" , &verbose, "Display general information", + "--iconfig %s", &inputconfig, "Input .ocio configuration file (default: $OCIO)", "", "\nOpenImageIO or OpenEXR options:", "--bitdepth %s", &outputDepth, "Output image bitdepth", "--float-attribute %L", &floatAttrs, "\"name=float\" pair defining OIIO float attribute " @@ -266,34 +268,51 @@ int main(int argc, const char **argv) outputimage = args[2].c_str(); } + // Load the current config. + OCIO::ConstConfigRcPtr config; + + try + { + if (useLut) + { + config = OCIO::Config::CreateRaw(); + } + else if (!inputconfig.empty() ) + { + config = OCIO::Config::CreateFromFile(inputconfig.c_str()); + } + else + { + const char * env = OCIO::GetEnvVariable("OCIO"); + if (env && *env) + inputconfig = env; + config = OCIO::GetCurrentConfig(); + } + } + catch (const OCIO::Exception & e) + { + std::cout << "ERROR loading config file: " << e.what() << std::endl; + exit(1); + } + catch (...) + { + + std::cerr << "ERROR loading config file: '" << inputconfig << "'" << std::endl; + exit(1); + } + if (verbose) { std::cout << std::endl; std::cout << OCIO::ImageIO::GetVersion() << std::endl; std::cout << "OCIO Version: " << OCIO::GetVersion() << std::endl; - const char * env = OCIO::GetEnvVariable("OCIO"); - if (env && *env && !useLut) + if (!useLut) { - try - { - std::cout << std::endl; - std::cout << "OCIO Config. file: '" << env << "'" << std::endl; - OCIO::ConstConfigRcPtr config = OCIO::GetCurrentConfig(); - std::cout << "OCIO Config. version: " << config->getMajorVersion() << "." - << config->getMinorVersion() << std::endl; - std::cout << "OCIO search_path: " << config->getSearchPath() << std::endl; - } - catch (const OCIO::Exception & e) - { - std::cout << "ERROR loading config file: " << e.what() << std::endl; - exit(1); - } - catch (...) - { - - std::cerr << "ERROR loading config file: '" << env << "'" << std::endl; - exit(1); - } + std::cout << std::endl; + std::cout << "OCIO Config. file: '" << inputconfig << "'" << std::endl; + std::cout << "OCIO Config. version: " << config->getMajorVersion() << "." + << config->getMinorVersion() << std::endl; + std::cout << "OCIO search_path: " << config->getSearchPath() << std::endl; } } @@ -388,10 +407,6 @@ int main(int argc, const char **argv) // Process the image. try { - // Load the current config. - OCIO::ConstConfigRcPtr config - = useLut ? OCIO::Config::CreateRaw() : OCIO::GetCurrentConfig(); - // Get the processor. OCIO::ConstProcessorRcPtr processor; @@ -637,7 +652,6 @@ int main(int argc, const char **argv) { if (useDisplayView) { - OCIO::ConstConfigRcPtr config = OCIO::GetCurrentConfig(); outputcolorspace = config->getDisplayViewColorSpaceName(display, view); } diff --git a/src/apps/ocioperf/main.cpp b/src/apps/ocioperf/main.cpp index a3af75a587..4c7837494f 100644 --- a/src/apps/ocioperf/main.cpp +++ b/src/apps/ocioperf/main.cpp @@ -24,7 +24,6 @@ class CustomMeasure explicit CustomMeasure(const char * explanation) : m_explanations(explanation) - , m_iterations(1) { resume(); } @@ -183,6 +182,10 @@ int main(int argc, const char **argv) bool useDisplayview = false; bool useInvertview = false; + std::string inputconfig; + OCIO::ConstConfigRcPtr srcConfig; + + ArgParse ap; ap.options("ocioperf -- apply and measure a color transformation processing\n\n" "usage: ocioperf [options] --transform /path/to/file.clf\n\n", @@ -206,6 +209,8 @@ int main(int argc, const char **argv) "(Deprecated) Provide the input and (display, view) pair to apply on the image", "--invertview %s %s %s", &display, &view, &outColorSpace, "Provide the (display, view) pair and output color space to apply on the image", + "--iconfig %s", &inputconfig, + "Input .ocio configuration file (default: $OCIO)", "--iter %d", &iterations, "Provide the number of iterations on the processing. Default is 50", "--bitdepths %s %s", &inBitDepthStr, &outBitDepthStr, "Provide input and output bit-depths (i.e. ui16, f32). Default is f32", @@ -232,25 +237,6 @@ int main(int argc, const char **argv) { std::cout << std::endl; std::cout << "OCIO Version: " << OCIO::GetVersion() << std::endl; - const char * env = OCIO::GetEnvVariable("OCIO"); - if(env && *env) - { - try - { - std::cout << std::endl; - std::cout << "OCIO Config. file: '" << env << "'" << std::endl; - OCIO::ConstConfigRcPtr config = OCIO::GetCurrentConfig(); - std::cout << "OCIO Config. version: " << config->getMajorVersion() << "." - << config->getMinorVersion() << std::endl; - std::cout << "OCIO search_path: " << config->getSearchPath() << std::endl; - } - catch(...) - { - - std::cerr << "ERROR: Error loading the config file: '" << env << "'"; - return 1; - } - } } if (!transformFile.empty()) @@ -258,7 +244,7 @@ int main(int argc, const char **argv) std::cout << std::endl; std::cout << "Processing using '" << transformFile << "'" << std::endl << std::endl; } - + std::cout << std::endl << std::endl; std::cout << "Processing statistics:" << std::endl << std::endl; @@ -296,25 +282,38 @@ int main(int argc, const char **argv) // Checking for an input colorspace or input (display, view) pair. else if (!inColorSpace.empty() || (!display.empty() && !view.empty())) { + if(!inputconfig.empty()) + { + std::cout << std::endl; + std::cout << "Loading " << inputconfig << std::endl; + srcConfig = OCIO::Config::CreateFromFile(inputconfig.c_str()); + } + else if(OCIO::GetEnvVariable("OCIO")) + { + std::cout << std::endl; + std::cout << "Loading $OCIO " << OCIO::GetEnvVariable("OCIO") << std::endl; + srcConfig = OCIO::Config::CreateFromEnv(); + } + else + { + throw OCIO::Exception("You must specify an input OCIO configuration (either with --iconfig or $OCIO).\n"); + } + if (verbose) { - const char * env = OCIO::GetEnvVariable("OCIO"); - if (env && *env) - { - std::cout << std::endl; - const std::string inputStr = !inColorSpace.empty() ? inColorSpace : "(" + display + ", " + view + ")"; - const std::string outputStr = !outColorSpace.empty() ? outColorSpace : "(" + display + ", " + view + ")"; - std::cout << "Processing from '" - << inputStr << "' to '" - << outputStr << "'" << std::endl; - } - else - { - throw OCIO::Exception("Missing the ${OCIO} env. variable."); - } + std::cout << std::endl; + std::cout << "OCIO Config. version: " << srcConfig->getMajorVersion() << "." + << srcConfig->getMinorVersion() << std::endl; + std::cout << "OCIO search_path: " << srcConfig->getSearchPath() << std::endl; + std::cout << std::endl; + const std::string inputStr = !inColorSpace.empty() ? inColorSpace : "(" + display + ", " + view + ")"; + const std::string outputStr = !outColorSpace.empty() ? outColorSpace : "(" + display + ", " + view + ")"; + std::cout << "Processing from '" + << inputStr << "' to '" + << outputStr << "'" << std::endl; } - OCIO::ConfigRcPtr config = OCIO::Config::CreateFromEnv()->createEditableCopy(); + OCIO::ConfigRcPtr config = srcConfig->createEditableCopy(); config->setProcessorCacheFlags(nocache ? OCIO::PROCESSOR_CACHE_OFF : OCIO::PROCESSOR_CACHE_DEFAULT); diff --git a/tests/cpu/UnitTestUtils.h b/tests/cpu/UnitTestUtils.h index bbdac35549..37b584e88c 100644 --- a/tests/cpu/UnitTestUtils.h +++ b/tests/cpu/UnitTestUtils.h @@ -79,7 +79,8 @@ template inline bool EqualWithSafeRelError(T value, T expected, T eps, - T minExpected) + T minExpected, + T* computedError = nullptr) { // If value and expected are infinity, return true. if (value == expected) return true; @@ -88,9 +89,14 @@ inline bool EqualWithSafeRelError(T value, ((expected < minExpected) ? minExpected : expected) : ((-expected < minExpected) ? minExpected : -expected); - return ( - ((value > expected) ? value - expected : expected - value) - / div) <= eps; + T err = ((value > expected) ? value - expected : expected - value) / div; + + if (computedError) + { + *computedError = err; + } + + return (err <= eps); } // C++20 introduces new strongly typed, UTF-8 based, char8_t and u8string types diff --git a/tests/cpu/ops/fixedfunction/FixedFunctionOpCPU_tests.cpp b/tests/cpu/ops/fixedfunction/FixedFunctionOpCPU_tests.cpp index 1aa7dbc463..008264582e 100644 --- a/tests/cpu/ops/fixedfunction/FixedFunctionOpCPU_tests.cpp +++ b/tests/cpu/ops/fixedfunction/FixedFunctionOpCPU_tests.cpp @@ -33,17 +33,22 @@ void ApplyFixedFunction(float * input_32f, // Using rel error with a large minExpected value of 1 will transition // from absolute error for expected values < 1 and // relative error for values > 1. + float computedError = 0.0f; const bool equalRel = OCIO::EqualWithSafeRelError(input_32f[idx], expected_32f[idx], errorThreshold, - 1.0f); + 1.0f, + &computedError); if (!equalRel) { std::ostringstream errorMsg; errorMsg.precision(14); errorMsg << "Index: " << idx; errorMsg << " - Values: " << input_32f[idx] << " expected: " << expected_32f[idx]; - errorMsg << " - Threshold: " << errorThreshold; + errorMsg << " - Error: " << computedError << " (" + << std::setprecision(3) << computedError / errorThreshold; + errorMsg << "x of Threshold: " + << std::setprecision(6) << errorThreshold << ")"; OCIO_CHECK_ASSERT_MESSAGE_FROM(0, errorMsg.str(), lineNo); } } @@ -462,45 +467,45 @@ OCIO_ADD_TEST(FixedFunctionOpCPU, aces_output_transform_20) const float expected_32f[num_samples*4] = { // ACEScg primaries and secondaries scaled by 4 - 4.965774059f, -0.032864563f, 0.041625995f, 1.0f, - 3.969441891f, 3.825784922f, -0.056133576f, 1.0f, - -0.075329021f, 3.688980103f, 0.270296901f, 1.0f, - -0.095423937f, 3.650517225f, 3.459972620f, 1.0f, - -0.028930068f, 0.196428135f, 2.796343565f, 1.0f, - 4.900805950f, -0.064376131f, 3.838256121f, 1.0f, + 4.966013432f, -0.033002287f, 0.041583523f, 1.0f, + 3.969460726f, 3.825797558f, -0.056160748f, 1.0f, + -0.075460039f, 3.689072609f, 0.270235062f, 1.0f, + -0.095436633f, 3.650521517f, 3.459975719f, 1.0f, + -0.028881177f, 0.196473420f, 2.796123743f, 1.0f, + 4.900828362f, -0.064385533f, 3.838270903f, 1.0f, // OCIO test values - 0.096890204f, -0.001135312f, 0.018971510f, 0.5f, - 0.809614301f, 0.479856580f, 0.814239502f, 1.0f, - 0.107420206f, 0.920529068f, 0.726378500f, 0.0f, + 0.096890487f, -0.001135427f, 0.018971475f, 0.5f, + 0.809613585f, 0.479857147f, 0.814239979f, 1.0f, + 0.107417941f, 0.920530438f, 0.726379037f, 0.0f, // ColorChecker24 (SMPTE 2065-1 2021) - 0.115475260f, 0.050812904f, 0.030212952f, 1.0f, - 0.484879673f, 0.301042974f, 0.226768956f, 1.0f, - 0.098463766f, 0.160814658f, 0.277010560f, 1.0f, - 0.071130365f, 0.107334383f, 0.035097566f, 1.0f, - 0.207111493f, 0.198474824f, 0.375326216f, 1.0f, - 0.195447892f, 0.481111974f, 0.393299013f, 1.0f, - 0.571913838f, 0.196872935f, 0.041634772f, 1.0f, - 0.045791931f, 0.069875360f, 0.291233480f, 1.0f, - 0.424848706f, 0.083199009f, 0.102153838f, 1.0f, - 0.059589427f, 0.022219172f, 0.091246888f, 1.0f, - 0.360365510f, 0.478741467f, 0.086726837f, 1.0f, - 0.695662081f, 0.371994525f, 0.068298168f, 1.0f, - 0.011806309f, 0.021665439f, 0.199594811f, 1.0f, - 0.076526314f, 0.256237417f, 0.060564656f, 1.0f, - 0.300064564f, 0.023416257f, 0.030360471f, 1.0f, - 0.805484772f, 0.596903503f, 0.082996152f, 1.0f, - 0.388385952f, 0.079899102f, 0.245819211f, 1.0f, - 0.010952532f, 0.196105912f, 0.307181358f, 1.0f, - 0.921019495f, 0.921707213f, 0.912856042f, 1.0f, - 0.590192318f, 0.588423848f, 0.587825358f, 1.0f, - 0.337743521f, 0.337685764f, 0.338155121f, 1.0f, - 0.169265985f, 0.169178501f, 0.169557109f, 1.0f, - 0.058346048f, 0.059387825f, 0.060296260f, 1.0f, - 0.012581184f, 0.012947139f, 0.013654195f, 1.0f, + 0.115475342f, 0.050812997f, 0.030212998f, 1.0f, + 0.484880149f, 0.301042914f, 0.226769030f, 1.0f, + 0.098463453f, 0.160814837f, 0.277010798f, 1.0f, + 0.071130276f, 0.107334509f, 0.035097614f, 1.0f, + 0.207111374f, 0.198474824f, 0.375326097f, 1.0f, + 0.195447117f, 0.481112540f, 0.393299103f, 1.0f, + 0.571913302f, 0.196873263f, 0.041634843f, 1.0f, + 0.045791976f, 0.069875412f, 0.291233569f, 1.0f, + 0.424848884f, 0.083199054f, 0.102153927f, 1.0f, + 0.059589352f, 0.022219239f, 0.091246955f, 1.0f, + 0.360364884f, 0.478741497f, 0.086726815f, 1.0f, + 0.695661962f, 0.371994466f, 0.068298057f, 1.0f, + 0.011806240f, 0.021665439f, 0.199594870f, 1.0f, + 0.076526135f, 0.256237596f, 0.060564563f, 1.0f, + 0.300064713f, 0.023416281f, 0.030360531f, 1.0f, + 0.805483222f, 0.596904039f, 0.082996234f, 1.0f, + 0.388385385f, 0.079899333f, 0.245818958f, 1.0f, + 0.010951802f, 0.196106046f, 0.307181537f, 1.0f, + 0.921020269f, 0.921707630f, 0.912857533f, 1.0f, + 0.590191603f, 0.588424563f, 0.587825298f, 1.0f, + 0.337743223f, 0.337686002f, 0.338155240f, 1.0f, + 0.169266403f, 0.169178575f, 0.169557154f, 1.0f, + 0.058346011f, 0.059387885f, 0.060296256f, 1.0f, + 0.012581199f, 0.012947144f, 0.013654212f, 1.0f, // Spectrally non-selective 18 % reflecting diffuser - 0.145115465f, 0.145115525f, 0.145115510f, 1.0f, + 0.145115077f, 0.145115703f, 0.145115480f, 1.0f, // Perfect reflecting diffuser - 1.041565657f, 1.041566014f, 1.041565657f, 1.0f, + 1.041565537f, 1.041566610f, 1.041566253f, 1.0f, }; OCIO::FixedFunctionOpData::Params params = { @@ -518,6 +523,17 @@ OCIO_ADD_TEST(FixedFunctionOpCPU, aces_output_transform_20) 1e-5f, __LINE__); +#if DUMP_RESULT + std::cout << "aces_output_transform_20 results: \n" << std::setprecision(9) << std::fixed; + for (unsigned i = 0; i < num_samples; ++i) + { + std::cout << input2_32f[i * 4 + 0] << "f, " + << input2_32f[i * 4 + 1] << "f, " + << input2_32f[i * 4 + 2] << "f, " + << input2_32f[i * 4 + 3] << "f,\n"; + } +#endif + OCIO::ConstFixedFunctionOpDataRcPtr funcData2 = std::make_shared(OCIO::FixedFunctionOpData::ACES_OUTPUT_TRANSFORM_20_INV, params); @@ -528,6 +544,13 @@ OCIO_ADD_TEST(FixedFunctionOpCPU, aces_output_transform_20) __LINE__); } +// NB: The ACES 2 FixedFunction takes linear ACES2065-1 values and produces linear RGB values +// in the encoding gamut. The relatively large tolerance on the following round-trip tests doesn't +// fully test accuracy of saturated values. See additional tests in BuiltinTransform_tests.cpp +// that do a similar round-trip but using gamma-corrected code values and therefore does +// a more thorough test of colors where one or more channels is near zero, which is an area +// that is more challenging for the algorithm to invert. + OCIO_ADD_TEST(FixedFunctionOpCPU, aces_ot_20_rec709_100n_rt) { const int lut_size = 8; @@ -778,35 +801,35 @@ OCIO_ADD_TEST(FixedFunctionOpCPU, aces_tonescale_compress_20) const float expected_32f[num_samples*4] = { // ACEScg primaries and secondaries scaled by 4 - 110.702453613f, 211.251770020f, 25.025110245f, 1.0f, + 110.702453613f, 211.251770020f, 25.025110245f, 1.0f, 168.016815186f, 129.796249390f, 106.183448792f, 1.0f, - 140.814849854f, 193.459213257f, 147.056488037f, 1.0f, - 156.429519653f, 110.938514709f, 192.204727173f, 1.0f, - 80.456542969f, 98.490524292f, 268.442108154f, 1.0f, - 135.172195435f, 175.559280396f, 341.715240479f, 1.0f, + 140.814849854f, 193.459197998f, 147.056488037f, 1.0f, + 156.429504395f, 110.938423157f, 192.204727173f, 1.0f, + 80.456558228f, 98.490531921f, 268.442108154f, 1.0f, + 135.172225952f, 175.559326172f, 341.715240479f, 1.0f, // OCIO test values - 18.187314987f, 33.819175720f, 4.173158169f, 0.5f, - 80.413116455f, 21.309329987f, 332.159759521f, 1.0f, - 83.447891235f, 37.852291107f, 182.925750732f, 0.0f, + 18.187316895f, 33.819190979f, 4.173158169f, 0.5f, + 80.413101196f, 21.309329987f, 332.159759521f, 1.0f, + 83.447883606f, 37.852523804f, 182.925750732f, 0.0f, // ColorChecker24 (SMPTE 2065-1 2021) - 27.411964417f, 13.382769585f, 38.146659851f, 1.0f, - 59.987670898f, 14.391894341f, 39.841842651f, 1.0f, + 27.411968231f, 13.382784843f, 38.146659851f, 1.0f, + 59.987659454f, 14.391894341f, 39.841842651f, 1.0f, 43.298923492f, 12.199877739f, 249.107116699f, 1.0f, - 31.489658356f, 14.075142860f, 128.878036499f, 1.0f, - 50.749198914f, 12.731814384f, 285.658966064f, 1.0f, - 64.728637695f, 18.593795776f, 179.324264526f, 1.0f, - 53.399448395f, 37.394428253f, 50.924011230f, 1.0f, + 31.489654541f, 14.075141907f, 128.878036499f, 1.0f, + 50.749198914f, 12.731806755f, 285.658966064f, 1.0f, + 64.728637695f, 18.593791962f, 179.324264526f, 1.0f, + 53.399444580f, 37.394416809f, 50.924011230f, 1.0f, 34.719596863f, 21.616765976f, 271.008331299f, 1.0f, - 43.910713196f, 36.788166046f, 13.975610733f, 1.0f, - 23.196525574f, 15.118354797f, 317.544281006f, 1.0f, - 63.348674774f, 33.283493042f, 119.145133972f, 1.0f, - 64.908889771f, 35.371044159f, 70.842193604f, 1.0f, - 24.876911163f, 23.143159866f, 273.228973389f, 1.0f, + 43.910709381f, 36.788166046f, 13.975610733f, 1.0f, + 23.196529388f, 15.118354797f, 317.544281006f, 1.0f, + 63.348682404f, 33.283519745f, 119.145133972f, 1.0f, + 64.908874512f, 35.371063232f, 70.842193604f, 1.0f, + 24.876913071f, 23.143159866f, 273.228973389f, 1.0f, 44.203376770f, 28.918329239f, 144.154159546f, 1.0f, - 32.824356079f, 43.447875977f, 17.892261505f, 1.0f, - 75.830871582f, 39.872474670f, 90.752044678f, 1.0f, - 45.823116302f, 34.652069092f, 348.832092285f, 1.0f, - 43.597240448f, 23.079078674f, 218.454376221f, 1.0f, + 32.824359894f, 43.447853088f, 17.892261505f, 1.0f, + 75.830871582f, 39.872489929f, 90.752044678f, 1.0f, + 45.823120117f, 34.652057648f, 348.832092285f, 1.0f, + 43.597236633f, 23.079071045f, 218.454376221f, 1.0f, }; OCIO::FixedFunctionOpData::Params params = {1000.f}; @@ -871,35 +894,35 @@ OCIO_ADD_TEST(FixedFunctionOpCPU, aces_gamut_map_20) const float expected_32f[num_samples*4] = { // ACEScg primaries and secondaries scaled by 4 - 107.831291199f, 174.252944946f, 25.025119781f, 1.0f, - 168.028198242f, 118.224960327f, 106.183464050f, 1.0f, - 140.030105591f, 127.177192688f, 147.056488037f, 1.0f, - 156.512435913f, 73.218856812f, 192.204727173f, 1.0f, - 79.378631592f, 72.613555908f, 268.442108154f, 1.0f, - 133.827835083f, 149.929809570f, 341.715240479f, 1.0f, + 107.829742432f, 174.270156860f, 25.025110245f, 1.0f, + 168.028274536f, 118.227561951f, 106.183448792f, 1.0f, + 140.030166626f, 127.184478760f, 147.056488037f, 1.0f, + 156.512435913f, 73.219184875f, 192.204727173f, 1.0f, + 79.378555298f, 72.608604431f, 268.442108154f, 1.0f, + 133.827941895f, 149.930618286f, 341.715240479f, 1.0f, // OCIO test values - 18.194000244f, 33.312938690f, 4.173166752f, 0.5f, - 80.413116455f, 21.309329987f, 332.159759521f, 1.0f, - 83.467437744f, 37.305160522f, 182.925750732f, 0.0f, + 18.193992615f, 33.313068390f, 4.173158169f, 0.5f, + 80.413116455f, 21.309329987f, 332.159759521f, 1.0f, + 83.467445374f, 37.305030823f, 182.925750732f, 0.0f, // ColorChecker24 (SMPTE 2065-1 2021) - 27.411962509f, 13.382793427f, 38.146591187f, 1.0f, - 59.987670898f, 14.391893387f, 39.841842651f, 1.0f, - 43.298923492f, 12.199877739f, 249.107116699f, 1.0f, - 31.489658356f, 14.075142860f, 128.878036499f, 1.0f, - 50.749198914f, 12.731814384f, 285.658966064f, 1.0f, - 64.728637695f, 18.593795776f, 179.324264526f, 1.0f, - 53.399448395f, 37.394428253f, 50.924011230f, 1.0f, - 34.719596863f, 21.616765976f, 271.008331299f, 1.0f, - 43.910709381f, 36.788166046f, 13.975610733f, 1.0f, - 23.196525574f, 15.118361473f, 317.544250488f, 1.0f, - 63.348674774f, 33.283493042f, 119.145133972f, 1.0f, - 64.908889771f, 35.371044159f, 70.842193604f, 1.0f, - 24.876916885f, 23.143167496f, 273.229034424f, 1.0f, - 44.203376770f, 28.918329239f, 144.154159546f, 1.0f, - 32.824352264f, 43.447864532f, 17.892255783f, 1.0f, - 75.830871582f, 39.872474670f, 90.752044678f, 1.0f, - 45.823104858f, 34.652038574f, 348.832092285f, 1.0f, - 43.635551453f, 21.629474640f, 218.454376221f, 1.0f, + 27.411962509f, 13.382769585f, 38.146659851f, 1.0f, + 59.987674713f, 14.391894341f, 39.841842651f, 1.0f, + 43.298919678f, 12.199877739f, 249.107116699f, 1.0f, + 31.489658356f, 14.075142860f, 128.878036499f, 1.0f, + 50.749198914f, 12.731814384f, 285.658966064f, 1.0f, + 64.728637695f, 18.593795776f, 179.324264526f, 1.0f, + 53.399448395f, 37.394428253f, 50.924011230f, 1.0f, + 34.719596863f, 21.616765976f, 271.008331299f, 1.0f, + 43.910713196f, 36.788166046f, 13.975610733f, 1.0f, + 23.196525574f, 15.118354797f, 317.544281006f, 1.0f, + 63.348674774f, 33.283493042f, 119.145133972f, 1.0f, + 64.908882141f, 35.371044159f, 70.842193604f, 1.0f, + 24.876911163f, 23.143159866f, 273.228973389f, 1.0f, + 44.203376770f, 28.918329239f, 144.154159546f, 1.0f, + 32.824356079f, 43.447875977f, 17.892261505f, 1.0f, + 75.830871582f, 39.872474670f, 90.752044678f, 1.0f, + 45.823112488f, 34.652069092f, 348.832092285f, 1.0f, + 43.635547638f, 21.629518509f, 218.454376221f, 1.0f, }; OCIO::FixedFunctionOpData::Params params = { diff --git a/tests/cpu/transforms/BuiltinTransform_tests.cpp b/tests/cpu/transforms/BuiltinTransform_tests.cpp index e46bd74fe6..29d252cfb9 100644 --- a/tests/cpu/transforms/BuiltinTransform_tests.cpp +++ b/tests/cpu/transforms/BuiltinTransform_tests.cpp @@ -8,6 +8,7 @@ #include "transforms/BuiltinTransform.cpp" +#include "ops/lut3d/Lut3DOp.h" #include "ops/matrix/MatrixOp.h" #include "Platform.h" #include "transforms/builtins/ACES.h" @@ -18,7 +19,6 @@ namespace OCIO = OCIO_NAMESPACE; - OCIO_ADD_TEST(BuiltinTransform, creation) { // Tests around the creation of a built-in transform instance. @@ -124,7 +124,8 @@ void ValidateValues(const char * prefixMsg, T in, T out, T errorThreshold, int l // Using rel error with a large minExpected value of 1 will transition // from absolute error for expected values < 1 and // relative error for values > 1. - if (!OCIO::EqualWithSafeRelError(in, out, errorThreshold, T(1.))) + T computedError{}; + if (!OCIO::EqualWithSafeRelError(in, out, errorThreshold, T(1.), &computedError)) { std::ostringstream errorMsg; errorMsg.precision(std::numeric_limits::max_digits10); @@ -132,7 +133,11 @@ void ValidateValues(const char * prefixMsg, T in, T out, T errorThreshold, int l { errorMsg << prefixMsg << ": "; } - errorMsg << "value = " << in << " but expected = " << out; + errorMsg << " - Values: " << in << " expected: " << out; + errorMsg << " - Error: " << computedError << " (" + << std::setprecision(3) << computedError / errorThreshold; + errorMsg << "x of Threshold: " << std::setprecision(6) << errorThreshold + << ")"; OCIO_CHECK_ASSERT_MESSAGE_FROM(0, errorMsg.str(), lineNo); } } @@ -328,7 +333,7 @@ namespace using Values = std::vector; using AllValues = std::map>; -void ValidateBuiltinTransform(const char * style, const Values & in, const Values & out, float errorThreshold, int lineNo) +void ValidateBuiltinTransform(const char * style, const Values & in, const Values & out, float errorThreshold, int lineNo, Values &results) { OCIO::BuiltinTransformRcPtr builtin = OCIO::BuiltinTransform::Create(); OCIO_CHECK_NO_THROW_FROM(builtin->setStyle(style), lineNo); @@ -348,16 +353,16 @@ void ValidateBuiltinTransform(const char * style, const Values & in, const Value OCIO::PackedImageDesc inDesc((void *)&in[0], long(in.size() / 3), 1, 3); - Values vals(in.size(), -1.0f); - OCIO::PackedImageDesc outDesc((void *)&vals[0], long(vals.size() / 3), 1, 3); + results = Values(in.size(), -1.0f); + OCIO::PackedImageDesc outDesc((void *)&results[0], long(results.size() / 3), 1, 3); OCIO_CHECK_NO_THROW_FROM(cpu->apply(inDesc, outDesc), lineNo); for (size_t idx = 0; idx < out.size(); ++idx) { std::ostringstream oss; - oss << style << ": for index = " << idx << " with threshold = " << errorThreshold; - ValidateValues(oss.str().c_str(), vals[idx], out[idx], errorThreshold, lineNo); + oss << style << ": for index = " << idx; + ValidateValues(oss.str().c_str(), results[idx], out[idx], errorThreshold, lineNo); } } @@ -458,100 +463,130 @@ AllValues UnitTestValues { 1.0e-6f, { 0.5f, 0.4f, 0.3f }, { 0.22214814f, 0.21179835f, 0.15639816f } } }, - { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-REC709_2.0", - { 1.0e-6f, - { 0.5f, 0.4f, 0.3f }, { 0.26260212f, 0.25207470f, 0.20617338f } } }, - { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-P3-D65_2.0", - { 1.0e-6f, - { 0.5f, 0.4f, 0.3f }, { 0.26260212f, 0.25207472f, 0.20617332f } } }, - { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-108nit-P3-D65_2.0", - { 1.0e-6f, - { 0.5f, 0.4f, 0.3f }, { 0.16253406f, 0.15513624f, 0.12449740f } } }, - { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-300nit-P3-D65_2.0", - { 1.0e-6f, - { 0.5f, 0.4f, 0.3f }, { 0.20592399f, 0.19440515f, 0.15028581f } } }, - { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-500nit-P3-D65_2.0", - { 1.0e-6f, - { 0.5f, 0.4f, 0.3f }, { 0.41039306f, 0.38813826f, 0.30191866f } } }, - { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-1000nit-P3-D65_2.0", - { 1.0e-6f, - { 0.5f, 0.4f, 0.3f }, { 0.46536570f, 0.43852836f, 0.33688113f } } }, - { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-2000nit-P3-D65_2.0", - { 1.0e-6f, - { 0.5f, 0.4f, 0.3f }, { 0.51225936f, 0.48264506f, 0.37060050f } } }, - { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-4000nit-P3-D65_2.0", - { 1.0e-6f, - { 0.5f, 0.4f, 0.3f }, { 0.55653524f, 0.51967940f, 0.38678724f } } }, - { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-500nit-REC2020_2.0", - { 1.0e-6f, - { 0.5f, 0.4f, 0.3f }, { 0.41039258f, 0.38813800f, 0.30191845f } } }, - { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-1000nit-REC2020_2.0", - { 1.0e-6f, - { 0.5f, 0.4f, 0.3f }, { 0.46536540f, 0.43852820f, 0.33688095f } } }, - { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-2000nit-REC2020_2.0", - { 1.0e-6f, - { 0.5f, 0.4f, 0.3f }, { 0.51225930f, 0.48264477f, 0.37060022f } } }, - { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-4000nit-REC2020_2.0", - { 1.0e-6f, - { 0.5f, 0.4f, 0.3f }, { 0.55653550f, 0.51967950f, 0.38678730f } } }, - - { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-REC709-D60-in-REC709-D65_2.0", - { 1.0e-6f, - { 0.5f, 0.4f, 0.3f }, { 0.25147703f, 0.24029444f, 0.18221131f } } }, - { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-REC709-D60-in-P3-D65_2.0", - { 1.0e-6f, - { 0.5f, 0.4f, 0.3f }, { 0.25373828f, 0.24245512f, 0.18384966f } } }, - { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-REC709-D60-in-REC2020-D65_2.0", - { 1.0e-6f, - { 0.5f, 0.4f, 0.3f }, { 0.25712878f, 0.24569483f, 0.18630630f } } }, - { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-P3-D60-in-P3-D65_2.0", - { 1.0e-6f, - { 0.5f, 0.4f, 0.3f }, { 0.25373834f, 0.24245517f, 0.18384990f } } }, - { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-P3-D60-in-XYZ-E_2.0", - { 1.0e-6f, - { 0.5f, 0.4f, 0.3f }, { 0.26332240f, 0.25161302f, 0.19079340f } } }, - { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-108nit-P3-D60-in-P3-D65_2.0", - { 1.0e-6f, - { 0.5f, 0.4f, 0.3f }, { 0.15705064f, 0.14920068f, 0.11100890f } } }, - { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-300nit-P3-D60-in-XYZ-E_2.0", - { 1.0e-6f, - { 0.5f, 0.4f, 0.3f }, { 0.20469205f, 0.19229384f, 0.13782679f } } }, - { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-500nit-P3-D60-in-P3-D65_2.0", - { 1.0e-6f, - { 0.5f, 0.4f, 0.3f }, { 0.39655724f, 0.37322620f, 0.26917280f } } }, - { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-1000nit-P3-D60-in-P3-D65_2.0", - { 1.0e-6f, - { 0.5f, 0.4f, 0.3f }, { 0.44968104f, 0.42165324f, 0.30032730f } } }, - { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-2000nit-P3-D60-in-P3-D65_2.0", - { 1.0e-6f, - { 0.5f, 0.4f, 0.3f }, { 0.49499428f, 0.46407104f, 0.33038715f } } }, - { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-4000nit-P3-D60-in-P3-D65_2.0", - { 1.0e-6f, - { 0.5f, 0.4f, 0.3f }, { 0.53778990f, 0.49960223f, 0.34477120f } } }, - { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-500nit-P3-D60-in-REC2020-D65_2.0", - { 1.0e-6f, - { 0.5f, 0.4f, 0.3f }, { 0.40185606f, 0.37821326f, 0.27276948f } } }, - { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-1000nit-P3-D60-in-REC2020-D65_2.0", - { 1.0e-6f, - { 0.5f, 0.4f, 0.3f }, { 0.45568960f, 0.42728746f, 0.30434040f } } }, - { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-2000nit-P3-D60-in-REC2020-D65_2.0", - { 1.0e-6f, - { 0.5f, 0.4f, 0.3f }, { 0.50160843f, 0.47027197f, 0.33480182f } } }, - { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-4000nit-P3-D60-in-REC2020-D65_2.0", - { 1.0e-6f, - { 0.5f, 0.4f, 0.3f }, { 0.54497580f, 0.50627790f, 0.34937808f } } }, - { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-500nit-REC2020-D60-in-REC2020-D65_2.0", - { 1.0e-6f, - { 0.5f, 0.4f, 0.3f }, { 0.40185624f, 0.37821335f, 0.27276933f } } }, - { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-1000nit-REC2020-D60-in-REC2020-D65_2.0", - { 1.0e-6f, - { 0.5f, 0.4f, 0.3f }, { 0.45568994f, 0.42728750f, 0.30434027f } } }, - { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-2000nit-REC2020-D60-in-REC2020-D65_2.0", - { 1.0e-6f, - { 0.5f, 0.4f, 0.3f }, { 0.50160870f, 0.47027220f, 0.33480210f } } }, - { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-4000nit-REC2020-D60-in-REC2020-D65_2.0", - { 1.0e-6f, - { 0.5f, 0.4f, 0.3f }, { 0.54497580f, 0.50627780f, 0.34937814f } } }, + { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-REC709_2.0", + { 1.0e-4f, + { 0.5f, 0.4f, 0.3f }, + { 0.26260215f, 0.25207460f, 0.20617345f } } }, + { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-P3-D65_2.0", + { 1.0e-4f, + { 0.5f, 0.4f, 0.3f }, + { 0.26260215f, 0.25207475f, 0.20617352f } } }, + { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-108nit-P3-D65_2.0", + { 1.0e-4f, + { 0.5f, 0.4f, 0.3f }, + { 0.16253395f, 0.15513620f, 0.12449738f } } }, + { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-300nit-P3-D65_2.0", + { 1.0e-4f, + { 0.5f, 0.4f, 0.3f }, + { 0.20592400f, 0.19440512f, 0.15028587f } } }, + { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-500nit-P3-D65_2.0", + { 1.0e-4f, + { 0.5f, 0.4f, 0.3f }, + { 0.41039270f, 0.38813815f, 0.30191854f } } }, + { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-1000nit-P3-D65_2.0", + { 1.0e-4f, + { 0.5f, 0.4f, 0.3f }, + { 0.46536559f, 0.43852845f, 0.33688101f } } }, + { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-2000nit-P3-D65_2.0", + { 1.0e-4f, + { 0.5f, 0.4f, 0.3f }, + { 0.51225948f, 0.48264498f, 0.37060043f } } }, + { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-4000nit-P3-D65_2.0", + { 1.0e-4f, + { 0.5f, 0.4f, 0.3f }, + { 0.55653530f, 0.51967967f, 0.38678783f } } }, + { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-500nit-REC2020_2.0", + { 1.0e-4f, + { 0.5f, 0.4f, 0.3f }, + { 0.41039288f, 0.38813818f, 0.30191860f } } }, + { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-1000nit-REC2020_2.0", + { 1.0e-4f, + { 0.5f, 0.4f, 0.3f }, + { 0.46536580f, 0.43852842f, 0.33688098f } } }, + { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-2000nit-REC2020_2.0", + { 1.0e-4f, + { 0.5f, 0.4f, 0.3f }, + { 0.51225960f, 0.48264492f, 0.37060046f } } }, + { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-4000nit-REC2020_2.0", + { 1.0e-4f, + { 0.5f, 0.4f, 0.3f }, + { 0.55653548f, 0.51967967f, 0.38678783f } } }, + { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-REC709-D60-in-REC709-D65_2.0", + { 1.0e-4f, + { 0.5f, 0.4f, 0.3f }, + { 0.25147712f, 0.24029461f, 0.18221153f } } }, + { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-REC709-D60-in-P3-D65_2.0", + { 1.0e-4f, + { 0.5f, 0.4f, 0.3f }, + { 0.25373834f, 0.24245527f, 0.18384993f } } }, + { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-REC709-D60-in-REC2020-D65_2.0", + { 1.0e-4f, + { 0.5f, 0.4f, 0.3f }, + { 0.25712875f, 0.24569492f, 0.18630651f } } }, + { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-P3-D60-in-P3-D65_2.0", + { 1.0e-4f, + { 0.5f, 0.4f, 0.3f }, + { 0.25373828f, 0.24245520f, 0.18384989f } } }, + { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-P3-D60-in-XYZ-E_2.0", + { 1.0e-4f, + { 0.5f, 0.4f, 0.3f }, + { 0.26332238f, 0.25161314f, 0.19079420f } } }, + { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-108nit-P3-D60-in-P3-D65_2.0", + { 1.0e-4f, + { 0.5f, 0.4f, 0.3f }, + { 0.15705051f, 0.14920059f, 0.11100878f } } }, + { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-300nit-P3-D60-in-XYZ-E_2.0", + { 1.0e-4f, + { 0.5f, 0.4f, 0.3f }, + { 0.20469207f, 0.19229385f, 0.13782671f } } }, + { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-500nit-P3-D60-in-P3-D65_2.0", + { 1.0e-4f, + { 0.5f, 0.4f, 0.3f }, + { 0.39655733f, 0.37322620f, 0.26917258f } } }, + { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-1000nit-P3-D60-in-P3-D65_2.0", + { 1.0e-4f, + { 0.5f, 0.4f, 0.3f }, + { 0.44968122f, 0.42165339f, 0.30032712f } } }, + { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-2000nit-P3-D60-in-P3-D65_2.0", + { 1.0e-4f, + { 0.5f, 0.4f, 0.3f }, + { 0.49499470f, 0.46407115f, 0.33038712f } } }, + { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-4000nit-P3-D60-in-P3-D65_2.0", + { 1.0e-4f, + { 0.5f, 0.4f, 0.3f }, + { 0.53778988f, 0.49960214f, 0.34477147f } } }, + { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-500nit-P3-D60-in-REC2020-D65_2.0", + { 1.0e-4f, + { 0.5f, 0.4f, 0.3f }, + { 0.40185603f, 0.37821317f, 0.27276924f } } }, + { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-1000nit-P3-D60-in-REC2020-D65_2.0", + { 1.0e-4f, + { 0.5f, 0.4f, 0.3f }, + { 0.45568976f, 0.42728746f, 0.30434006f } } }, + { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-2000nit-P3-D60-in-REC2020-D65_2.0", + { 1.0e-4f, + { 0.5f, 0.4f, 0.3f }, + { 0.50160873f, 0.47027206f, 0.33480173f } } }, + { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-4000nit-P3-D60-in-REC2020-D65_2.0", + { 1.0e-4f, + { 0.5f, 0.4f, 0.3f }, + { 0.54497570f, 0.50627774f, 0.34937829f } } }, + { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-500nit-REC2020-D60-in-REC2020-D65_2.0", + { 1.0e-4f, + { 0.5f, 0.4f, 0.3f }, + { 0.40185642f, 0.37821338f, 0.27276939f } } }, + { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-1000nit-REC2020-D60-in-REC2020-D65_2.0", + { 1.0e-4f, + { 0.5f, 0.4f, 0.3f }, + { 0.45569009f, 0.42728764f, 0.30434042f } } }, + { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-2000nit-REC2020-D60-in-REC2020-D65_2.0", + { 1.0e-4f, + { 0.5f, 0.4f, 0.3f }, + { 0.50160891f, 0.47027206f, 0.33480188f } } }, + { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-4000nit-REC2020-D60-in-REC2020-D65_2.0", + { 1.0e-4f, + { 0.5f, 0.4f, 0.3f }, + { 0.54497600f, 0.50627792f, 0.34937853f } } }, { "APPLE_LOG_to_ACES2065-1", { 1.0e-6f, @@ -696,7 +731,8 @@ OCIO_ADD_TEST(Builtins, validate) } else { - ValidateBuiltinTransform(name, std::get<1>(values), std::get<2>(values), std::get<0>(values), __LINE__); + Values results; + ValidateBuiltinTransform(name, std::get<1>(values), std::get<2>(values), std::get<0>(values), __LINE__, results); } } @@ -704,3 +740,153 @@ OCIO_ADD_TEST(Builtins, validate) // that don't have an associated built-in. OCIO_CHECK_EQUAL(UnitTestValues.size(), reg->getNumBuiltins()); } + +namespace +{ + +void ValidateDisplayViewRoundTrip(const char * display_style, const char * view_style, + float scale, float errorThreshold, + std::vector difficultItems, float difficultThreshold, + int lineNo) +{ + // Built-in transform for the display. + OCIO::BuiltinTransformRcPtr display_builtin = OCIO::BuiltinTransform::Create(); + OCIO_CHECK_NO_THROW_FROM(display_builtin->setStyle(display_style), lineNo); + OCIO_CHECK_NO_THROW_FROM(display_builtin->validate(), lineNo); + auto display_builtin_inv = display_builtin->createEditableCopy(); + display_builtin_inv->setDirection(OCIO::TRANSFORM_DIR_INVERSE); + + // Built-in transform for the view. + OCIO::BuiltinTransformRcPtr view_builtin = OCIO::BuiltinTransform::Create(); + OCIO_CHECK_NO_THROW_FROM(view_builtin->setStyle(view_style), lineNo); + OCIO_CHECK_NO_THROW_FROM(view_builtin->validate(), lineNo); + auto view_builtin_inv = view_builtin->createEditableCopy(); + view_builtin_inv->setDirection(OCIO::TRANSFORM_DIR_INVERSE); + + // Assemble inverse and forward transform into a group transform that goes from + // display code values to ACES and back to code values. + OCIO::GroupTransformRcPtr group = OCIO::GroupTransform::Create(); + group->appendTransform(display_builtin_inv); + group->appendTransform(view_builtin_inv); + group->appendTransform(view_builtin); + group->appendTransform(display_builtin); + + // Create a Processor. + OCIO::ConstConfigRcPtr config = OCIO::Config::CreateRaw(); + OCIO::ConstProcessorRcPtr proc; + OCIO_CHECK_NO_THROW_FROM(proc = config->getProcessor(group), lineNo); + OCIO_REQUIRE_ASSERT(proc); + + // Create a CPUProcessor. + // Use optimization none to avoid replacing inv/fwd pairs and avoid fast pow for the display. + OCIO::ConstCPUProcessorRcPtr cpu; + OCIO_CHECK_NO_THROW_FROM(cpu = proc->getOptimizedCPUProcessor(OCIO::OPTIMIZATION_NONE), lineNo); + OCIO_REQUIRE_ASSERT(cpu); + + // Create a 7 x 7 x 7 grid of RGBA values. + const unsigned lut_size = 7; + const unsigned num_channels = 4; + unsigned num_samples = lut_size * lut_size * lut_size; + std::vector input_32f(num_samples * num_channels, 0.f); + std::vector output_32f(num_samples * num_channels, 0.f); + + GenerateIdentityLut3D(input_32f.data(), lut_size, num_channels, OCIO::LUT3DORDER_FAST_RED); + + // Scale the grid of points, which is necessary when testing the ST-2084/PQ displays + // since the transforms are only designed to process up to a maximum luminance level. + for(unsigned idx=0; idx<(num_samples*4); ++idx) + { + input_32f[idx] *= scale; + } + + // Process the values. + OCIO::PackedImageDesc inDesc((void *)&input_32f[0], num_samples, 1, 4); + OCIO::PackedImageDesc outDesc((void *)&output_32f[0], num_samples, 1, 4); + OCIO_CHECK_NO_THROW_FROM(cpu->apply(inDesc, outDesc), lineNo); + + // Check if values are within tolerance. + for(unsigned idx=0; idx<(num_samples*4); idx+=4) + { + float computedErrorR, computedErrorG, computedErrorB = 0.0f; + + const bool isDifficult = std::find(difficultItems.begin(), difficultItems.end(), idx) + != difficultItems.end(); + const float tol = isDifficult ? difficultThreshold: errorThreshold; + + const bool equalRelR = OCIO::EqualWithSafeRelError(output_32f[idx], input_32f[idx], + tol, 1.0f, + &computedErrorR); + const bool equalRelG = OCIO::EqualWithSafeRelError(output_32f[idx+1], input_32f[idx+1], + tol, 1.0f, + &computedErrorG); + const bool equalRelB = OCIO::EqualWithSafeRelError(output_32f[idx+2], input_32f[idx+2], + tol, 1.0f, + &computedErrorB); + if (!equalRelR || !equalRelG || !equalRelB) + { + std::ostringstream errorMsg; + errorMsg.precision(10); + errorMsg << "Index: " << idx << " - Tol.: " << tol << "\n - Expected: "; + errorMsg << std::fixed; + errorMsg << input_32f[idx] << ", " << input_32f[idx+1] << ", " << input_32f[idx+2]; + errorMsg << "\n - Actual: "; + errorMsg << output_32f[idx] << ", " << output_32f[idx+1] << ", " << output_32f[idx+2]; + errorMsg << "\n - Error: "; + errorMsg << computedErrorR << ", " << computedErrorG << ", " << computedErrorB; + OCIO_CHECK_ASSERT_MESSAGE_FROM(0, errorMsg.str(), lineNo); + } + } +} + +} // anon. + +OCIO_ADD_TEST(Builtins, aces2_displayview_roundtrip) +{ + // Perform a round-trip test from display code-values to ACES and back to code values. + // This uses a 7 x 7 x 7 grid of RGB values. + + ValidateDisplayViewRoundTrip("DISPLAY - CIE-XYZ-D65_to_REC.1886-REC.709", + "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-REC709_2.0", + 1.0f, // scale factor + 0.004f, // tolerance + {}, 0.f, + __LINE__); + + ValidateDisplayViewRoundTrip("DISPLAY - CIE-XYZ-D65_to_DisplayP3", + "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-P3-D65_2.0", + 1.0f, // scale factor + 0.001f, // tolerance + {}, 0.f, + __LINE__); + + ValidateDisplayViewRoundTrip("DISPLAY - CIE-XYZ-D65_to_ST2084-P3-D65", + "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-1000nit-P3-D65_2.0", + // Need to lower the max value from 1000 to 990 nits. + 0.7507f, // scale factor = 990 nits + 0.005f, // main tolerance + {168, 196, 364, 392, 1344}, // difficult values + 0.03f, // tolerance for difficult values + __LINE__); + + ValidateDisplayViewRoundTrip("DISPLAY - CIE-XYZ-D65_to_ST2084-P3-D65", + "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-4000nit-P3-D65_2.0", + // Need to lower the max value from 4000 to 3860 nits. + 0.8987f, // scale factor = 3860 nits + 0.007f, // main tolerance + {168, 196, 392, 396, 588, 592, 952, 1148, 1196, 1200, 1260, 1288}, + 0.2f, // tolerance for difficult values + __LINE__); + + // TODO: The Rec.2100 transforms have too many values that don't invert to easily validate. +// ValidateDisplayViewRoundTrip("DISPLAY - CIE-XYZ-D65_to_REC.2100-PQ", +// "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-1000nit-REC2020_2.0", +// 0.7507f, // scale factor = 990 nits +// 5e-3f, // tolerance +// __LINE__); +// +// ValidateDisplayViewRoundTrip("DISPLAY - CIE-XYZ-D65_to_REC.2100-PQ", +// "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-4000nit-REC2020_2.0", +// 0.8987f, // scale factor = 3860 nits +// 5e-3f, // tolerance +// __LINE__); +} diff --git a/tests/gpu/FixedFunctionOp_test.cpp b/tests/gpu/FixedFunctionOp_test.cpp index a7e77bc2d2..523ff66ef1 100644 --- a/tests/gpu/FixedFunctionOp_test.cpp +++ b/tests/gpu/FixedFunctionOp_test.cpp @@ -434,7 +434,11 @@ OCIO_ADD_GPU_TEST(FixedFunction, style_aces2_output_transform_inv) OCIO_ADD_GPU_TEST(FixedFunction, style_aces2_output_transform_invfwd) { - const double data_inv[9] = { + // Test that there are no shader resource (textures, functions, etc) conflicts between + // the 2 different inverse and forward transforms. This is not a round-trip test, + // it tests that the forward and inverse may exist in the same shader with no issue. + + const double data_inv[9] = { // Peak luminance 100.f, // REC709 gamut @@ -458,11 +462,161 @@ OCIO_ADD_GPU_TEST(FixedFunction, style_aces2_output_transform_invfwd) grp->appendTransform(func_inv); grp->appendTransform(func_fwd); + test.setTestNaN(false); // TODO: Partially running the output transform without the clamp so do not test Nan or Inf values + test.setTestInfinity(false); + test.setProcessor(grp); + // This is not expected to be an identity, but it should be the same between CPU & GPU. test.setErrorThreshold(7e-4f); } +namespace +{ + +OCIO::GroupTransformRcPtr BuildRoundTripTransform(const char * display_style, const char * view_style) +{ + // Built-in transform for the display. + OCIO::BuiltinTransformRcPtr display_builtin = OCIO::BuiltinTransform::Create(); + display_builtin->setStyle(display_style); + display_builtin->validate(); + auto display_builtin_inv = display_builtin->createEditableCopy(); + display_builtin_inv->setDirection(OCIO::TRANSFORM_DIR_INVERSE); + + // Built-in transform for the view. + OCIO::BuiltinTransformRcPtr view_builtin = OCIO::BuiltinTransform::Create(); + view_builtin->setStyle(view_style); + view_builtin->validate(); + auto view_builtin_inv = view_builtin->createEditableCopy(); + view_builtin_inv->setDirection(OCIO::TRANSFORM_DIR_INVERSE); + + // Assemble inverse and forward transform into a group transform that goes from + // display code values to ACES and back to code values. + OCIO::GroupTransformRcPtr group = OCIO::GroupTransform::Create(); + group->appendTransform(display_builtin_inv); + group->appendTransform(view_builtin_inv); + group->appendTransform(view_builtin); + group->appendTransform(display_builtin); + + return group; +} + +void GenerateIdentityLut3D(OCIOGPUTest::CustomValues & values, int edgeLen, int numChannels, float scale) +{ + int num_samples = edgeLen * edgeLen * edgeLen; + std::vector img(num_samples * numChannels, 0.f); + + float c = 1.0f / ((float)edgeLen - 1.0f); + for (int i = 0; i < edgeLen*edgeLen*edgeLen; i++) + { + img[numChannels*i + 0] = scale * (float)(i%edgeLen) * c; + img[numChannels*i + 1] = scale * (float)((i / edgeLen) % edgeLen) * c; + img[numChannels*i + 2] = scale * (float)((i / edgeLen / edgeLen) % edgeLen) * c; + } + values.m_inputValues = img; +} + +} // anon. + +// The following group of tests compares the display code value to ACES and back to code value +// round-trip. The round-trip is not perfect (see BuiltinTransform_tests.cpp) but the tests +// here simply check if the CPU and GPU are giving the same result. + +OCIO_ADD_GPU_TEST(FixedFunction, style_aces2_rec709_rndtrip) +{ + const char * display_style = "DISPLAY - CIE-XYZ-D65_to_REC.1886-REC.709"; + const char * view_style = "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-REC709_2.0"; + auto group = BuildRoundTripTransform(display_style, view_style); + + // The test harness gets a processor from the transform with the default optimization + // level. However, the forward/inverse does not optimize out due to the clamp to AP1 + // in-between the FixedFunctions. + test.setProcessor(group); + + // Set up a grid of RGBA custom values. + const int lut_size = 17; + const int num_channels = 4; + OCIOGPUTest::CustomValues values; + GenerateIdentityLut3D(values, lut_size, num_channels, 1.0f); + + test.setCustomValues(values); + + test.setErrorThreshold(0.004f); +} + +OCIO_ADD_GPU_TEST(FixedFunction, style_aces2_displayp3_rndtrip) +{ + const char * display_style = "DISPLAY - CIE-XYZ-D65_to_DisplayP3"; + const char * view_style = "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-P3-D65_2.0"; + auto group = BuildRoundTripTransform(display_style, view_style); + test.setProcessor(group); + + const int lut_size = 17; + const int num_channels = 4; + OCIOGPUTest::CustomValues values; + GenerateIdentityLut3D(values, lut_size, num_channels, 1.0f); + + test.setCustomValues(values); + + test.setErrorThreshold(0.001f); +} + +OCIO_ADD_GPU_TEST(FixedFunction, style_aces2_1000nit_p3_rndtrip) +{ + const char * display_style = "DISPLAY - CIE-XYZ-D65_to_ST2084-P3-D65"; + const char * view_style = "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-1000nit-P3-D65_2.0"; + auto group = BuildRoundTripTransform(display_style, view_style); + test.setProcessor(group); + + const int lut_size = 17; + const int num_channels = 4; + OCIOGPUTest::CustomValues values; + GenerateIdentityLut3D(values, lut_size, num_channels, 0.75183f); // scale to 1000 nits + + test.setCustomValues(values); + + // TODO: Investigate why this is not closer. + // Setting the CPUProcessor to OPTIMIZATION_NONE helps slightly, but is not the main + // cause of the error. + test.setErrorThreshold(0.012f); +} + +OCIO_ADD_GPU_TEST(FixedFunction, style_aces2_4000nit_p3_rndtrip) +{ + const char * display_style = "DISPLAY - CIE-XYZ-D65_to_ST2084-P3-D65"; + const char * view_style = "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-4000nit-P3-D65_2.0"; + auto group = BuildRoundTripTransform(display_style, view_style); + test.setProcessor(group); + + const int lut_size = 17; + const int num_channels = 4; + OCIOGPUTest::CustomValues values; + GenerateIdentityLut3D(values, lut_size, num_channels, 0.90257f); // scale to 4000 nits + + test.setCustomValues(values); + + // TODO: Investigate why this is not closer. + test.setErrorThreshold(0.018f); +} + +OCIO_ADD_GPU_TEST(FixedFunction, style_aces2_4000nit_rec2020_rndtrip) +{ + const char * display_style = "DISPLAY - CIE-XYZ-D65_to_REC.2100-PQ"; + const char * view_style = "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-4000nit-REC2020_2.0"; + auto group = BuildRoundTripTransform(display_style, view_style); + test.setProcessor(group); + + const int lut_size = 17; + const int num_channels = 4; + OCIOGPUTest::CustomValues values; + GenerateIdentityLut3D(values, lut_size, num_channels, 0.90257f); // scale to 4000 nits + + test.setCustomValues(values); + + // TODO: Investigate why this is not closer. + test.setErrorThreshold(0.03f); +} + OCIO_ADD_GPU_TEST(FixedFunction, style_aces2_rgb_to_jmh_fwd) { // ACES AP0